diff --git a/dqn_letter_gssr.ipynb b/dqn_letter_gssr.ipynb index 1039266..2b8a960 100644 --- a/dqn_letter_gssr.ipynb +++ b/dqn_letter_gssr.ipynb @@ -27,7 +27,7 @@ "metadata": {}, "outputs": [], "source": [ - "from stable_baselines3 import PPO # Or any other suitable RL algorithm\n", + "from stable_baselines3 import PPO, DQN # Or any other suitable RL algorithm\n", "from stable_baselines3.common.env_checker import check_env\n", "from letter_guess import LetterGuessingEnv\n", "from tqdm import tqdm" @@ -62,13 +62,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ + "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mltcptgeneral\u001b[0m (\u001b[33mfulltime\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" ] }, @@ -87,7 +81,7 @@ { "data": { "text/html": [ - "Run data is saved locally in /home/art/cse151b-final-project/wandb/run-20240319_162920-ot2i0b8h" + "Run data is saved locally in /home/art/cse151b-final-project/wandb/run-20240319_211220-cyh5nscz" ], "text/plain": [ "" @@ -99,7 +93,7 @@ { "data": { "text/html": [ - "Syncing run confused-meadow-3 to Weights & Biases (docs)
" + "Syncing run distinctive-flower-20 to Weights & Biases (docs)
" ], "text/plain": [ "" @@ -123,7 +117,7 @@ { "data": { "text/html": [ - " View run at https://wandb.ai/fulltime/wordle/runs/ot2i0b8h" + " View run at https://wandb.ai/fulltime/wordle/runs/cyh5nscz" ], "text/plain": [ "" @@ -134,6 +128,7 @@ } ], "source": [ + "model_save_path = \"wordle_ppo_model\"\n", "config = {\n", " \"policy_type\": \"MlpPolicy\",\n", " \"total_timesteps\": 200_000\n", @@ -157,13 +152,13 @@ "Using cuda device\n", "Wrapping the env with a `Monitor` wrapper\n", "Wrapping the env in a DummyVecEnv.\n", - "Logging to runs/ot2i0b8h/PPO_1\n" + "Logging to runs/cyh5nscz/PPO_1\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "585e7545478a485aa91c487b8630840f", + "model_id": "ca60c274a90b4dddaf275fe164012f16", "version_major": 2, "version_minor": 0 }, @@ -180,94 +175,40 @@ "text": [ "---------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 2.48 |\n", - "| ep_rew_mean | -3.7 |\n", + "| ep_len_mean | 2.54 |\n", + "| ep_rew_mean | -3.66 |\n", "| time/ | |\n", - "| fps | 465 |\n", + "| fps | 721 |\n", "| iterations | 1 |\n", - "| time_elapsed | 4 |\n", + "| time_elapsed | 2 |\n", "| total_timesteps | 2048 |\n", "---------------------------------\n" ] }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 2.49 |\n", - "| ep_rew_mean | -3.65 |\n", - "| time/ | |\n", - "| fps | 395 |\n", - "| iterations | 2 |\n", - "| time_elapsed | 10 |\n", - "| total_timesteps | 4096 |\n", - "| train/ | |\n", - "| approx_kl | 0.04501068 |\n", - "| clip_fraction | 0.427 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -3.23 |\n", - "| explained_variance | 0.189 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.205 |\n", - "| n_updates | 10 |\n", - "| policy_gradient_loss | -0.0667 |\n", - "| value_loss | 0.997 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 2.84 |\n", - "| ep_rew_mean | -3.4 |\n", - "| time/ | |\n", - "| fps | 381 |\n", - "| iterations | 3 |\n", - "| time_elapsed | 16 |\n", - "| total_timesteps | 6144 |\n", - "| train/ | |\n", - "| approx_kl | 0.01765968 |\n", - "| clip_fraction | 0.319 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -3.17 |\n", - "| explained_variance | 0.481 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.123 |\n", - "| n_updates | 20 |\n", - "| policy_gradient_loss | -0.0525 |\n", - "| value_loss | 0.383 |\n", - "----------------------------------------\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 2.98 |\n", - "| ep_rew_mean | -3.28 |\n", + "| ep_len_mean | 2.53 |\n", + "| ep_rew_mean | -3.61 |\n", "| time/ | |\n", - "| fps | 374 |\n", - "| iterations | 4 |\n", - "| time_elapsed | 21 |\n", - "| total_timesteps | 8192 |\n", + "| fps | 718 |\n", + "| iterations | 2 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 4096 |\n", "| train/ | |\n", - "| approx_kl | 0.018652592 |\n", - "| clip_fraction | 0.368 |\n", + "| approx_kl | 0.011673957 |\n", + "| clip_fraction | 0.0292 |\n", "| clip_range | 0.2 |\n", - "| entropy_loss | -3.11 |\n", - "| explained_variance | 0.428 |\n", + "| entropy_loss | -3.25 |\n", + "| explained_variance | -0.126 |\n", "| learning_rate | 0.0003 |\n", - "| loss | 0.181 |\n", - "| n_updates | 30 |\n", - "| policy_gradient_loss | -0.0572 |\n", - "| value_loss | 0.51 |\n", + "| loss | 0.576 |\n", + "| n_updates | 10 |\n", + "| policy_gradient_loss | -0.0197 |\n", + "| value_loss | 3.58 |\n", "-----------------------------------------\n" ] }, @@ -277,348 +218,24 @@ "text": [ "-----------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 3.1 |\n", - "| ep_rew_mean | -3.24 |\n", + "| ep_len_mean | 2.7 |\n", + "| ep_rew_mean | -3.56 |\n", "| time/ | |\n", - "| fps | 369 |\n", - "| iterations | 5 |\n", - "| time_elapsed | 27 |\n", - "| total_timesteps | 10240 |\n", + "| fps | 698 |\n", + "| iterations | 3 |\n", + "| time_elapsed | 8 |\n", + "| total_timesteps | 6144 |\n", "| train/ | |\n", - "| approx_kl | 0.023806999 |\n", - "| clip_fraction | 0.365 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -3.04 |\n", - "| explained_variance | 0.46 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.118 |\n", - "| n_updates | 40 |\n", - "| policy_gradient_loss | -0.0609 |\n", - "| value_loss | 0.499 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 3.15 |\n", - "| ep_rew_mean | -3.09 |\n", - "| time/ | |\n", - "| fps | 366 |\n", - "| iterations | 6 |\n", - "| time_elapsed | 33 |\n", - "| total_timesteps | 12288 |\n", - "| train/ | |\n", - "| approx_kl | 0.024716537 |\n", - "| clip_fraction | 0.372 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -2.94 |\n", - "| explained_variance | 0.495 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.266 |\n", - "| n_updates | 50 |\n", - "| policy_gradient_loss | -0.0578 |\n", - "| value_loss | 0.503 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 3.46 |\n", - "| ep_rew_mean | -2.8 |\n", - "| time/ | |\n", - "| fps | 365 |\n", - "| iterations | 7 |\n", - "| time_elapsed | 39 |\n", - "| total_timesteps | 14336 |\n", - "| train/ | |\n", - "| approx_kl | 0.023435738 |\n", - "| clip_fraction | 0.357 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -2.82 |\n", - "| explained_variance | 0.556 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.105 |\n", - "| n_updates | 60 |\n", - "| policy_gradient_loss | -0.0537 |\n", - "| value_loss | 0.491 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 3.54 |\n", - "| ep_rew_mean | -2.74 |\n", - "| time/ | |\n", - "| fps | 363 |\n", - "| iterations | 8 |\n", - "| time_elapsed | 45 |\n", - "| total_timesteps | 16384 |\n", - "| train/ | |\n", - "| approx_kl | 0.02574392 |\n", - "| clip_fraction | 0.29 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -2.71 |\n", - "| explained_variance | 0.608 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.125 |\n", - "| n_updates | 70 |\n", - "| policy_gradient_loss | -0.0445 |\n", - "| value_loss | 0.464 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 3.71 |\n", - "| ep_rew_mean | -2.63 |\n", - "| time/ | |\n", - "| fps | 362 |\n", - "| iterations | 9 |\n", - "| time_elapsed | 50 |\n", - "| total_timesteps | 18432 |\n", - "| train/ | |\n", - "| approx_kl | 0.021754535 |\n", - "| clip_fraction | 0.251 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -2.56 |\n", - "| explained_variance | 0.673 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.152 |\n", - "| n_updates | 80 |\n", - "| policy_gradient_loss | -0.0385 |\n", - "| value_loss | 0.4 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 3.8 |\n", - "| ep_rew_mean | -2.5 |\n", - "| time/ | |\n", - "| fps | 362 |\n", - "| iterations | 10 |\n", - "| time_elapsed | 56 |\n", - "| total_timesteps | 20480 |\n", - "| train/ | |\n", - "| approx_kl | 0.018548178 |\n", - "| clip_fraction | 0.239 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -2.46 |\n", - "| explained_variance | 0.702 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.218 |\n", - "| n_updates | 90 |\n", - "| policy_gradient_loss | -0.0361 |\n", - "| value_loss | 0.396 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 3.8 |\n", - "| ep_rew_mean | -2.34 |\n", - "| time/ | |\n", - "| fps | 362 |\n", - "| iterations | 11 |\n", - "| time_elapsed | 62 |\n", - "| total_timesteps | 22528 |\n", - "| train/ | |\n", - "| approx_kl | 0.016667131 |\n", - "| clip_fraction | 0.24 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -2.36 |\n", - "| explained_variance | 0.698 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.226 |\n", - "| n_updates | 100 |\n", - "| policy_gradient_loss | -0.037 |\n", - "| value_loss | 0.411 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 3.95 |\n", - "| ep_rew_mean | -2.31 |\n", - "| time/ | |\n", - "| fps | 361 |\n", - "| iterations | 12 |\n", - "| time_elapsed | 67 |\n", - "| total_timesteps | 24576 |\n", - "| train/ | |\n", - "| approx_kl | 0.020023255 |\n", - "| clip_fraction | 0.257 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -2.24 |\n", - "| explained_variance | 0.712 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.0958 |\n", - "| n_updates | 110 |\n", - "| policy_gradient_loss | -0.0381 |\n", - "| value_loss | 0.406 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.19 |\n", - "| ep_rew_mean | -2.03 |\n", - "| time/ | |\n", - "| fps | 360 |\n", - "| iterations | 13 |\n", - "| time_elapsed | 73 |\n", - "| total_timesteps | 26624 |\n", - "| train/ | |\n", - "| approx_kl | 0.019943349 |\n", - "| clip_fraction | 0.266 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -2.13 |\n", - "| explained_variance | 0.712 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.0974 |\n", - "| n_updates | 120 |\n", - "| policy_gradient_loss | -0.0387 |\n", - "| value_loss | 0.444 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.24 |\n", - "| ep_rew_mean | -1.96 |\n", - "| time/ | |\n", - "| fps | 360 |\n", - "| iterations | 14 |\n", - "| time_elapsed | 79 |\n", - "| total_timesteps | 28672 |\n", - "| train/ | |\n", - "| approx_kl | 0.022638176 |\n", - "| clip_fraction | 0.298 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.98 |\n", - "| explained_variance | 0.697 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.145 |\n", - "| n_updates | 130 |\n", - "| policy_gradient_loss | -0.0433 |\n", - "| value_loss | 0.486 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.45 |\n", - "| ep_rew_mean | -1.89 |\n", - "| time/ | |\n", - "| fps | 361 |\n", - "| iterations | 15 |\n", - "| time_elapsed | 84 |\n", - "| total_timesteps | 30720 |\n", - "| train/ | |\n", - "| approx_kl | 0.02262218 |\n", - "| clip_fraction | 0.354 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.77 |\n", - "| explained_variance | 0.727 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.0807 |\n", - "| n_updates | 140 |\n", - "| policy_gradient_loss | -0.0439 |\n", - "| value_loss | 0.454 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.91 |\n", - "| ep_rew_mean | -1.65 |\n", - "| time/ | |\n", - "| fps | 362 |\n", - "| iterations | 16 |\n", - "| time_elapsed | 90 |\n", - "| total_timesteps | 32768 |\n", - "| train/ | |\n", - "| approx_kl | 0.023807548 |\n", - "| clip_fraction | 0.251 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.62 |\n", - "| explained_variance | 0.654 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.171 |\n", - "| n_updates | 150 |\n", - "| policy_gradient_loss | -0.0355 |\n", - "| value_loss | 0.686 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.86 |\n", - "| ep_rew_mean | -1.42 |\n", - "| time/ | |\n", - "| fps | 361 |\n", - "| iterations | 17 |\n", - "| time_elapsed | 96 |\n", - "| total_timesteps | 34816 |\n", - "| train/ | |\n", - "| approx_kl | 0.018085614 |\n", + "| approx_kl | 0.019258872 |\n", "| clip_fraction | 0.198 |\n", "| clip_range | 0.2 |\n", - "| entropy_loss | -1.49 |\n", - "| explained_variance | 0.699 |\n", + "| entropy_loss | -3.22 |\n", + "| explained_variance | -0.211 |\n", "| learning_rate | 0.0003 |\n", - "| loss | 0.339 |\n", - "| n_updates | 160 |\n", - "| policy_gradient_loss | -0.0297 |\n", - "| value_loss | 0.674 |\n", + "| loss | 0.187 |\n", + "| n_updates | 20 |\n", + "| policy_gradient_loss | -0.0215 |\n", + "| value_loss | 0.637 |\n", "-----------------------------------------\n" ] }, @@ -628,1887 +245,24 @@ "text": [ "-----------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 5.4 |\n", - "| ep_rew_mean | -1.88 |\n", + "| ep_len_mean | 2.73 |\n", + "| ep_rew_mean | -3.43 |\n", "| time/ | |\n", - "| fps | 361 |\n", - "| iterations | 18 |\n", - "| time_elapsed | 102 |\n", - "| total_timesteps | 36864 |\n", + "| fps | 681 |\n", + "| iterations | 4 |\n", + "| time_elapsed | 12 |\n", + "| total_timesteps | 8192 |\n", "| train/ | |\n", - "| approx_kl | 0.015559142 |\n", - "| clip_fraction | 0.222 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.35 |\n", - "| explained_variance | 0.719 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.354 |\n", - "| n_updates | 170 |\n", - "| policy_gradient_loss | -0.0349 |\n", - "| value_loss | 0.629 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 5.54 |\n", - "| ep_rew_mean | -1.22 |\n", - "| time/ | |\n", - "| fps | 360 |\n", - "| iterations | 19 |\n", - "| time_elapsed | 108 |\n", - "| total_timesteps | 38912 |\n", - "| train/ | |\n", - "| approx_kl | 0.014995611 |\n", - "| clip_fraction | 0.17 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.32 |\n", - "| explained_variance | 0.436 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 1.33 |\n", - "| n_updates | 180 |\n", - "| policy_gradient_loss | -0.031 |\n", - "| value_loss | 2.79 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 5.28 |\n", - "| ep_rew_mean | -1.2 |\n", - "| time/ | |\n", - "| fps | 360 |\n", - "| iterations | 20 |\n", - "| time_elapsed | 113 |\n", - "| total_timesteps | 40960 |\n", - "| train/ | |\n", - "| approx_kl | 0.018023107 |\n", - "| clip_fraction | 0.169 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.23 |\n", - "| explained_variance | 0.559 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.743 |\n", - "| n_updates | 190 |\n", - "| policy_gradient_loss | -0.0327 |\n", - "| value_loss | 1.24 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 6.22 |\n", - "| ep_rew_mean | -0.38 |\n", - "| time/ | |\n", - "| fps | 361 |\n", - "| iterations | 21 |\n", - "| time_elapsed | 119 |\n", - "| total_timesteps | 43008 |\n", - "| train/ | |\n", - "| approx_kl | 0.023376558 |\n", - "| clip_fraction | 0.29 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.07 |\n", - "| explained_variance | 0.628 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.502 |\n", - "| n_updates | 200 |\n", - "| policy_gradient_loss | -0.0451 |\n", - "| value_loss | 1.11 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 7.06 |\n", - "| ep_rew_mean | 0.26 |\n", - "| time/ | |\n", - "| fps | 362 |\n", - "| iterations | 22 |\n", - "| time_elapsed | 124 |\n", - "| total_timesteps | 45056 |\n", - "| train/ | |\n", - "| approx_kl | 0.023838695 |\n", - "| clip_fraction | 0.269 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.01 |\n", - "| explained_variance | 0.566 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.925 |\n", - "| n_updates | 210 |\n", - "| policy_gradient_loss | -0.0463 |\n", - "| value_loss | 1.71 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 7.93 |\n", - "| ep_rew_mean | 1.19 |\n", - "| time/ | |\n", - "| fps | 362 |\n", - "| iterations | 23 |\n", - "| time_elapsed | 129 |\n", - "| total_timesteps | 47104 |\n", - "| train/ | |\n", - "| approx_kl | 0.021363221 |\n", - "| clip_fraction | 0.229 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.906 |\n", - "| explained_variance | 0.594 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.819 |\n", - "| n_updates | 220 |\n", - "| policy_gradient_loss | -0.0426 |\n", - "| value_loss | 1.94 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 8.52 |\n", - "| ep_rew_mean | 1.98 |\n", - "| time/ | |\n", - "| fps | 363 |\n", - "| iterations | 24 |\n", - "| time_elapsed | 135 |\n", - "| total_timesteps | 49152 |\n", - "| train/ | |\n", - "| approx_kl | 0.022241611 |\n", - "| clip_fraction | 0.167 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.882 |\n", - "| explained_variance | 0.667 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.838 |\n", - "| n_updates | 230 |\n", - "| policy_gradient_loss | -0.0343 |\n", - "| value_loss | 1.73 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 8.9 |\n", - "| ep_rew_mean | 1.8 |\n", - "| time/ | |\n", - "| fps | 364 |\n", - "| iterations | 25 |\n", - "| time_elapsed | 140 |\n", - "| total_timesteps | 51200 |\n", - "| train/ | |\n", - "| approx_kl | 0.011297604 |\n", - "| clip_fraction | 0.111 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.859 |\n", - "| explained_variance | 0.763 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.5 |\n", - "| n_updates | 240 |\n", - "| policy_gradient_loss | -0.024 |\n", - "| value_loss | 1.35 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9.01 |\n", - "| ep_rew_mean | 2.23 |\n", - "| time/ | |\n", - "| fps | 364 |\n", - "| iterations | 26 |\n", - "| time_elapsed | 145 |\n", - "| total_timesteps | 53248 |\n", - "| train/ | |\n", - "| approx_kl | 0.010706454 |\n", - "| clip_fraction | 0.0958 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.84 |\n", - "| explained_variance | 0.486 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.872 |\n", - "| n_updates | 250 |\n", - "| policy_gradient_loss | -0.0237 |\n", - "| value_loss | 2.77 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9.75 |\n", - "| ep_rew_mean | 1.81 |\n", - "| time/ | |\n", - "| fps | 365 |\n", - "| iterations | 27 |\n", - "| time_elapsed | 151 |\n", - "| total_timesteps | 55296 |\n", - "| train/ | |\n", - "| approx_kl | 0.011905432 |\n", - "| clip_fraction | 0.12 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.785 |\n", - "| explained_variance | 0.838 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.121 |\n", - "| n_updates | 260 |\n", - "| policy_gradient_loss | -0.0206 |\n", - "| value_loss | 0.851 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9.07 |\n", - "| ep_rew_mean | 2.13 |\n", - "| time/ | |\n", - "| fps | 365 |\n", - "| iterations | 28 |\n", - "| time_elapsed | 156 |\n", - "| total_timesteps | 57344 |\n", - "| train/ | |\n", - "| approx_kl | 0.009603689 |\n", - "| clip_fraction | 0.0931 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.808 |\n", - "| explained_variance | 0.684 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.391 |\n", - "| n_updates | 270 |\n", - "| policy_gradient_loss | -0.0184 |\n", - "| value_loss | 1.63 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 8.87 |\n", - "| ep_rew_mean | 2.51 |\n", - "| time/ | |\n", - "| fps | 366 |\n", - "| iterations | 29 |\n", - "| time_elapsed | 162 |\n", - "| total_timesteps | 59392 |\n", - "| train/ | |\n", - "| approx_kl | 0.0071417904 |\n", - "| clip_fraction | 0.0738 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.736 |\n", - "| explained_variance | 0.826 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.83 |\n", - "| n_updates | 280 |\n", - "| policy_gradient_loss | -0.0131 |\n", - "| value_loss | 1.72 |\n", - "------------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9.02 |\n", - "| ep_rew_mean | 2.74 |\n", - "| time/ | |\n", - "| fps | 366 |\n", - "| iterations | 30 |\n", - "| time_elapsed | 167 |\n", - "| total_timesteps | 61440 |\n", - "| train/ | |\n", - "| approx_kl | 0.009269893 |\n", - "| clip_fraction | 0.0918 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.686 |\n", - "| explained_variance | 0.912 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.163 |\n", - "| n_updates | 290 |\n", - "| policy_gradient_loss | -0.0166 |\n", - "| value_loss | 0.537 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9.51 |\n", - "| ep_rew_mean | 2.29 |\n", - "| time/ | |\n", - "| fps | 367 |\n", - "| iterations | 31 |\n", - "| time_elapsed | 172 |\n", - "| total_timesteps | 63488 |\n", - "| train/ | |\n", - "| approx_kl | 0.010982089 |\n", - "| clip_fraction | 0.0863 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.657 |\n", - "| explained_variance | 0.946 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.339 |\n", - "| n_updates | 300 |\n", - "| policy_gradient_loss | -0.0149 |\n", - "| value_loss | 0.341 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 8.94 |\n", - "| ep_rew_mean | 2.7 |\n", - "| time/ | |\n", - "| fps | 368 |\n", - "| iterations | 32 |\n", - "| time_elapsed | 177 |\n", - "| total_timesteps | 65536 |\n", - "| train/ | |\n", - "| approx_kl | 0.01880536 |\n", - "| clip_fraction | 0.0622 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.667 |\n", - "| explained_variance | 0.778 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 1.13 |\n", - "| n_updates | 310 |\n", - "| policy_gradient_loss | -0.0107 |\n", - "| value_loss | 2.3 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 8.82 |\n", - "| ep_rew_mean | 2.6 |\n", - "| time/ | |\n", - "| fps | 369 |\n", - "| iterations | 33 |\n", - "| time_elapsed | 182 |\n", - "| total_timesteps | 67584 |\n", - "| train/ | |\n", - "| approx_kl | 0.013803964 |\n", - "| clip_fraction | 0.1 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.644 |\n", - "| explained_variance | 0.952 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.0438 |\n", - "| n_updates | 320 |\n", - "| policy_gradient_loss | -0.0186 |\n", - "| value_loss | 0.31 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9 |\n", - "| ep_rew_mean | 2.9 |\n", - "| time/ | |\n", - "| fps | 370 |\n", - "| iterations | 34 |\n", - "| time_elapsed | 188 |\n", - "| total_timesteps | 69632 |\n", - "| train/ | |\n", - "| approx_kl | 0.011061303 |\n", - "| clip_fraction | 0.0942 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.597 |\n", - "| explained_variance | 0.905 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.293 |\n", - "| n_updates | 330 |\n", - "| policy_gradient_loss | -0.0158 |\n", - "| value_loss | 0.61 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9.96 |\n", - "| ep_rew_mean | 2.6 |\n", - "| time/ | |\n", - "| fps | 370 |\n", - "| iterations | 35 |\n", - "| time_elapsed | 193 |\n", - "| total_timesteps | 71680 |\n", - "| train/ | |\n", - "| approx_kl | 0.016911192 |\n", - "| clip_fraction | 0.165 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.551 |\n", - "| explained_variance | 0.926 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.229 |\n", - "| n_updates | 340 |\n", - "| policy_gradient_loss | -0.0229 |\n", - "| value_loss | 0.519 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9.5 |\n", - "| ep_rew_mean | 3.32 |\n", - "| time/ | |\n", - "| fps | 370 |\n", - "| iterations | 36 |\n", - "| time_elapsed | 198 |\n", - "| total_timesteps | 73728 |\n", - "| train/ | |\n", - "| approx_kl | 0.012311206 |\n", - "| clip_fraction | 0.119 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.595 |\n", - "| explained_variance | 0.709 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 1.68 |\n", - "| n_updates | 350 |\n", - "| policy_gradient_loss | -0.0275 |\n", - "| value_loss | 1.95 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9.6 |\n", - "| ep_rew_mean | 3.3 |\n", - "| time/ | |\n", - "| fps | 370 |\n", - "| iterations | 37 |\n", - "| time_elapsed | 204 |\n", - "| total_timesteps | 75776 |\n", - "| train/ | |\n", - "| approx_kl | 0.059752032 |\n", - "| clip_fraction | 0.154 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.534 |\n", - "| explained_variance | 0.785 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.835 |\n", - "| n_updates | 360 |\n", - "| policy_gradient_loss | -0.0234 |\n", - "| value_loss | 1.46 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 10.8 |\n", - "| ep_rew_mean | 2.7 |\n", - "| time/ | |\n", - "| fps | 370 |\n", - "| iterations | 38 |\n", - "| time_elapsed | 209 |\n", - "| total_timesteps | 77824 |\n", - "| train/ | |\n", - "| approx_kl | 0.01475055 |\n", - "| clip_fraction | 0.0968 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.455 |\n", - "| explained_variance | 0.917 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.153 |\n", - "| n_updates | 370 |\n", - "| policy_gradient_loss | -0.019 |\n", - "| value_loss | 0.428 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 10.2 |\n", - "| ep_rew_mean | 3.01 |\n", - "| time/ | |\n", - "| fps | 370 |\n", - "| iterations | 39 |\n", - "| time_elapsed | 215 |\n", - "| total_timesteps | 79872 |\n", - "| train/ | |\n", - "| approx_kl | 0.012021113 |\n", - "| clip_fraction | 0.0851 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.558 |\n", - "| explained_variance | 0.702 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 1.8 |\n", - "| n_updates | 380 |\n", - "| policy_gradient_loss | -0.0284 |\n", - "| value_loss | 4.45 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9.83 |\n", - "| ep_rew_mean | 3.57 |\n", - "| time/ | |\n", - "| fps | 370 |\n", - "| iterations | 40 |\n", - "| time_elapsed | 220 |\n", - "| total_timesteps | 81920 |\n", - "| train/ | |\n", - "| approx_kl | 0.010166377 |\n", - "| clip_fraction | 0.0623 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.454 |\n", - "| explained_variance | 0.728 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 1.43 |\n", - "| n_updates | 390 |\n", - "| policy_gradient_loss | -0.0201 |\n", - "| value_loss | 2.21 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 10.3 |\n", - "| ep_rew_mean | 3.14 |\n", - "| time/ | |\n", - "| fps | 371 |\n", - "| iterations | 41 |\n", - "| time_elapsed | 226 |\n", - "| total_timesteps | 83968 |\n", - "| train/ | |\n", - "| approx_kl | 0.017603599 |\n", - "| clip_fraction | 0.0748 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.384 |\n", - "| explained_variance | 0.957 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.152 |\n", - "| n_updates | 400 |\n", - "| policy_gradient_loss | -0.0122 |\n", - "| value_loss | 0.286 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9.95 |\n", - "| ep_rew_mean | 3.81 |\n", - "| time/ | |\n", - "| fps | 371 |\n", - "| iterations | 42 |\n", - "| time_elapsed | 231 |\n", - "| total_timesteps | 86016 |\n", - "| train/ | |\n", - "| approx_kl | 0.028185518 |\n", - "| clip_fraction | 0.0954 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.383 |\n", - "| explained_variance | 0.811 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.412 |\n", - "| n_updates | 410 |\n", - "| policy_gradient_loss | -0.0224 |\n", - "| value_loss | 2.05 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 10.5 |\n", - "| ep_rew_mean | 3.57 |\n", - "| time/ | |\n", - "| fps | 371 |\n", - "| iterations | 43 |\n", - "| time_elapsed | 236 |\n", - "| total_timesteps | 88064 |\n", - "| train/ | |\n", - "| approx_kl | 0.02107103 |\n", - "| clip_fraction | 0.0702 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.375 |\n", - "| explained_variance | 0.732 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.156 |\n", - "| n_updates | 420 |\n", - "| policy_gradient_loss | -0.0342 |\n", - "| value_loss | 1.54 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9.76 |\n", - "| ep_rew_mean | 3.7 |\n", - "| time/ | |\n", - "| fps | 371 |\n", - "| iterations | 44 |\n", - "| time_elapsed | 242 |\n", - "| total_timesteps | 90112 |\n", - "| train/ | |\n", - "| approx_kl | 0.007913441 |\n", - "| clip_fraction | 0.0349 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.346 |\n", - "| explained_variance | 0.827 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 2.92 |\n", - "| n_updates | 430 |\n", - "| policy_gradient_loss | -0.00355 |\n", - "| value_loss | 2.2 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "---------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 25.8 |\n", - "| ep_rew_mean | -12.4 |\n", - "| time/ | |\n", - "| fps | 371 |\n", - "| iterations | 45 |\n", - "| time_elapsed | 247 |\n", - "| total_timesteps | 92160 |\n", - "| train/ | |\n", - "| approx_kl | 0.5432366 |\n", - "| clip_fraction | 0.109 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.316 |\n", - "| explained_variance | 0.958 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.231 |\n", - "| n_updates | 440 |\n", - "| policy_gradient_loss | -0.0068 |\n", - "| value_loss | 0.313 |\n", - "---------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 45.9 |\n", - "| ep_rew_mean | -32.4 |\n", - "| time/ | |\n", - "| fps | 372 |\n", - "| iterations | 46 |\n", - "| time_elapsed | 252 |\n", - "| total_timesteps | 94208 |\n", - "| train/ | |\n", - "| approx_kl | 0.008981178 |\n", - "| clip_fraction | 0.0144 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.11 |\n", - "| explained_variance | -0.0603 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 1.91 |\n", - "| n_updates | 450 |\n", - "| policy_gradient_loss | -0.00156 |\n", - "| value_loss | 17.2 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 62.7 |\n", - "| ep_rew_mean | -49.3 |\n", - "| time/ | |\n", - "| fps | 373 |\n", - "| iterations | 47 |\n", - "| time_elapsed | 257 |\n", - "| total_timesteps | 96256 |\n", - "| train/ | |\n", - "| approx_kl | 0.0025778997 |\n", - "| clip_fraction | 0.0184 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.165 |\n", - "| explained_variance | 0.396 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 19.1 |\n", - "| n_updates | 460 |\n", - "| policy_gradient_loss | -0.00601 |\n", - "| value_loss | 45.6 |\n", - "------------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 49.3 |\n", - "| ep_rew_mean | -35.6 |\n", - "| time/ | |\n", - "| fps | 373 |\n", - "| iterations | 48 |\n", - "| time_elapsed | 262 |\n", - "| total_timesteps | 98304 |\n", - "| train/ | |\n", - "| approx_kl | 0.014914533 |\n", - "| clip_fraction | 0.0816 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.272 |\n", - "| explained_variance | 0.394 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 33.5 |\n", - "| n_updates | 470 |\n", - "| policy_gradient_loss | -0.0164 |\n", - "| value_loss | 74.1 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 39.9 |\n", - "| ep_rew_mean | -26.3 |\n", - "| time/ | |\n", - "| fps | 374 |\n", - "| iterations | 49 |\n", - "| time_elapsed | 268 |\n", - "| total_timesteps | 100352 |\n", - "| train/ | |\n", - "| approx_kl | 0.0012997694 |\n", - "| clip_fraction | 0.026 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.24 |\n", - "| explained_variance | 0.465 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 44.5 |\n", - "| n_updates | 480 |\n", - "| policy_gradient_loss | -0.00881 |\n", - "| value_loss | 76.6 |\n", - "------------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 27 |\n", - "| ep_rew_mean | -13.3 |\n", - "| time/ | |\n", - "| fps | 374 |\n", - "| iterations | 50 |\n", - "| time_elapsed | 273 |\n", - "| total_timesteps | 102400 |\n", - "| train/ | |\n", - "| approx_kl | 0.0014951692 |\n", - "| clip_fraction | 0.037 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.25 |\n", - "| explained_variance | 0.594 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 30.2 |\n", - "| n_updates | 490 |\n", - "| policy_gradient_loss | -0.00958 |\n", - "| value_loss | 77.4 |\n", - "------------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "---------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 17.4 |\n", - "| ep_rew_mean | -3.57 |\n", - "| time/ | |\n", - "| fps | 374 |\n", - "| iterations | 51 |\n", - "| time_elapsed | 279 |\n", - "| total_timesteps | 104448 |\n", - "| train/ | |\n", - "| approx_kl | 0.0443189 |\n", - "| clip_fraction | 0.176 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.457 |\n", - "| explained_variance | 0.338 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 44.4 |\n", - "| n_updates | 500 |\n", - "| policy_gradient_loss | -0.0247 |\n", - "| value_loss | 70.8 |\n", - "---------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 13.3 |\n", - "| ep_rew_mean | 0.45 |\n", - "| time/ | |\n", - "| fps | 374 |\n", - "| iterations | 52 |\n", - "| time_elapsed | 284 |\n", - "| total_timesteps | 106496 |\n", - "| train/ | |\n", - "| approx_kl | 0.005663138 |\n", - "| clip_fraction | 0.069 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.463 |\n", - "| explained_variance | 0.558 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 14.7 |\n", - "| n_updates | 510 |\n", - "| policy_gradient_loss | -0.0117 |\n", - "| value_loss | 36.8 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 19.8 |\n", - "| ep_rew_mean | -5.92 |\n", - "| time/ | |\n", - "| fps | 374 |\n", - "| iterations | 53 |\n", - "| time_elapsed | 289 |\n", - "| total_timesteps | 108544 |\n", - "| train/ | |\n", - "| approx_kl | 0.020181399 |\n", - "| clip_fraction | 0.176 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.499 |\n", - "| explained_variance | 0.182 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 3.45 |\n", - "| n_updates | 520 |\n", - "| policy_gradient_loss | -0.0254 |\n", - "| value_loss | 9.2 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 11.4 |\n", - "| ep_rew_mean | 2.26 |\n", - "| time/ | |\n", - "| fps | 374 |\n", - "| iterations | 54 |\n", - "| time_elapsed | 295 |\n", - "| total_timesteps | 110592 |\n", - "| train/ | |\n", - "| approx_kl | 0.009738399 |\n", - "| clip_fraction | 0.108 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.538 |\n", - "| explained_variance | 0.891 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 1.88 |\n", - "| n_updates | 530 |\n", - "| policy_gradient_loss | -0.00573 |\n", - "| value_loss | 22 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 10.4 |\n", - "| ep_rew_mean | 3.06 |\n", - "| time/ | |\n", - "| fps | 375 |\n", - "| iterations | 55 |\n", - "| time_elapsed | 300 |\n", - "| total_timesteps | 112640 |\n", - "| train/ | |\n", - "| approx_kl | 0.01536967 |\n", - "| clip_fraction | 0.191 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.49 |\n", - "| explained_variance | 0.315 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.971 |\n", - "| n_updates | 540 |\n", - "| policy_gradient_loss | -0.0219 |\n", - "| value_loss | 6.46 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "---------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 10.8 |\n", - "| ep_rew_mean | 1.63 |\n", - "| time/ | |\n", - "| fps | 375 |\n", - "| iterations | 56 |\n", - "| time_elapsed | 305 |\n", - "| total_timesteps | 114688 |\n", - "| train/ | |\n", - "| approx_kl | 0.0688808 |\n", - "| clip_fraction | 0.264 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.496 |\n", - "| explained_variance | 0.838 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.205 |\n", - "| n_updates | 550 |\n", - "| policy_gradient_loss | -0.0406 |\n", - "| value_loss | 0.836 |\n", - "---------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 10.3 |\n", - "| ep_rew_mean | 2.98 |\n", - "| time/ | |\n", - "| fps | 375 |\n", - "| iterations | 57 |\n", - "| time_elapsed | 311 |\n", - "| total_timesteps | 116736 |\n", - "| train/ | |\n", - "| approx_kl | 0.04210388 |\n", - "| clip_fraction | 0.326 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.534 |\n", - "| explained_variance | 0.734 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.907 |\n", - "| n_updates | 560 |\n", - "| policy_gradient_loss | -0.0421 |\n", - "| value_loss | 1.91 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 10.3 |\n", - "| ep_rew_mean | 3.28 |\n", - "| time/ | |\n", - "| fps | 375 |\n", - "| iterations | 58 |\n", - "| time_elapsed | 316 |\n", - "| total_timesteps | 118784 |\n", - "| train/ | |\n", - "| approx_kl | 0.051502623 |\n", - "| clip_fraction | 0.224 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.434 |\n", - "| explained_variance | 0.875 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.404 |\n", - "| n_updates | 570 |\n", - "| policy_gradient_loss | -0.0345 |\n", - "| value_loss | 0.827 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 10.2 |\n", - "| ep_rew_mean | 3.78 |\n", - "| time/ | |\n", - "| fps | 375 |\n", - "| iterations | 59 |\n", - "| time_elapsed | 322 |\n", - "| total_timesteps | 120832 |\n", - "| train/ | |\n", - "| approx_kl | 0.07014565 |\n", - "| clip_fraction | 0.165 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.391 |\n", - "| explained_variance | 0.903 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.145 |\n", - "| n_updates | 580 |\n", - "| policy_gradient_loss | -0.0254 |\n", - "| value_loss | 0.681 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 16.4 |\n", - "| ep_rew_mean | -3.11 |\n", - "| time/ | |\n", - "| fps | 375 |\n", - "| iterations | 60 |\n", - "| time_elapsed | 327 |\n", - "| total_timesteps | 122880 |\n", - "| train/ | |\n", - "| approx_kl | 0.118292876 |\n", - "| clip_fraction | 0.151 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.466 |\n", - "| explained_variance | 0.952 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.0778 |\n", - "| n_updates | 590 |\n", - "| policy_gradient_loss | -0.0214 |\n", - "| value_loss | 0.316 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 12.5 |\n", - "| ep_rew_mean | 1.05 |\n", - "| time/ | |\n", - "| fps | 375 |\n", - "| iterations | 61 |\n", - "| time_elapsed | 333 |\n", - "| total_timesteps | 124928 |\n", - "| train/ | |\n", - "| approx_kl | 0.061776154 |\n", - "| clip_fraction | 0.321 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.665 |\n", - "| explained_variance | 0.401 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 9.22 |\n", - "| n_updates | 600 |\n", - "| policy_gradient_loss | -0.0454 |\n", - "| value_loss | 17 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 10.8 |\n", - "| ep_rew_mean | 2.8 |\n", - "| time/ | |\n", - "| fps | 374 |\n", - "| iterations | 62 |\n", - "| time_elapsed | 338 |\n", - "| total_timesteps | 126976 |\n", - "| train/ | |\n", - "| approx_kl | 0.01477613 |\n", - "| clip_fraction | 0.153 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.579 |\n", - "| explained_variance | 0.682 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 6.12 |\n", - "| n_updates | 610 |\n", - "| policy_gradient_loss | -0.027 |\n", - "| value_loss | 10.5 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 11.4 |\n", - "| ep_rew_mean | 2.68 |\n", - "| time/ | |\n", - "| fps | 374 |\n", - "| iterations | 63 |\n", - "| time_elapsed | 344 |\n", - "| total_timesteps | 129024 |\n", - "| train/ | |\n", - "| approx_kl | 0.015770137 |\n", - "| clip_fraction | 0.128 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.507 |\n", - "| explained_variance | 0.869 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 2.13 |\n", - "| n_updates | 620 |\n", - "| policy_gradient_loss | -0.0253 |\n", - "| value_loss | 5.99 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 10.2 |\n", - "| ep_rew_mean | 3.48 |\n", - "| time/ | |\n", - "| fps | 375 |\n", - "| iterations | 64 |\n", - "| time_elapsed | 349 |\n", - "| total_timesteps | 131072 |\n", - "| train/ | |\n", - "| approx_kl | 0.007838536 |\n", - "| clip_fraction | 0.0755 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.446 |\n", - "| explained_variance | 0.749 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 11.9 |\n", - "| n_updates | 630 |\n", - "| policy_gradient_loss | -0.0106 |\n", - "| value_loss | 9.21 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 10 |\n", - "| ep_rew_mean | 3.8 |\n", - "| time/ | |\n", - "| fps | 375 |\n", - "| iterations | 65 |\n", - "| time_elapsed | 354 |\n", - "| total_timesteps | 133120 |\n", - "| train/ | |\n", - "| approx_kl | 0.035350725 |\n", - "| clip_fraction | 0.101 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.368 |\n", - "| explained_variance | 0.867 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.388 |\n", - "| n_updates | 640 |\n", - "| policy_gradient_loss | -0.0213 |\n", - "| value_loss | 1.11 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 14.6 |\n", - "| ep_rew_mean | -0.59 |\n", - "| time/ | |\n", - "| fps | 375 |\n", - "| iterations | 66 |\n", - "| time_elapsed | 360 |\n", - "| total_timesteps | 135168 |\n", - "| train/ | |\n", - "| approx_kl | 0.042963736 |\n", - "| clip_fraction | 0.0817 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.321 |\n", - "| explained_variance | 0.941 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.00919 |\n", - "| n_updates | 650 |\n", - "| policy_gradient_loss | -0.0127 |\n", - "| value_loss | 0.325 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 10.3 |\n", - "| ep_rew_mean | 3.64 |\n", - "| time/ | |\n", - "| fps | 374 |\n", - "| iterations | 67 |\n", - "| time_elapsed | 366 |\n", - "| total_timesteps | 137216 |\n", - "| train/ | |\n", - "| approx_kl | 0.08396668 |\n", - "| clip_fraction | 0.184 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.409 |\n", - "| explained_variance | 0.803 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 2.09 |\n", - "| n_updates | 660 |\n", - "| policy_gradient_loss | -0.0203 |\n", - "| value_loss | 8.99 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 19.8 |\n", - "| ep_rew_mean | -9.15 |\n", - "| time/ | |\n", - "| fps | 374 |\n", - "| iterations | 68 |\n", - "| time_elapsed | 371 |\n", - "| total_timesteps | 139264 |\n", - "| train/ | |\n", - "| approx_kl | 0.15326424 |\n", - "| clip_fraction | 0.199 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.39 |\n", - "| explained_variance | 0.894 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.122 |\n", - "| n_updates | 670 |\n", - "| policy_gradient_loss | -0.0201 |\n", - "| value_loss | 0.415 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9.38 |\n", - "| ep_rew_mean | 3.18 |\n", - "| time/ | |\n", - "| fps | 374 |\n", - "| iterations | 69 |\n", - "| time_elapsed | 376 |\n", - "| total_timesteps | 141312 |\n", - "| train/ | |\n", - "| approx_kl | 0.21504487 |\n", - "| clip_fraction | 0.411 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.483 |\n", - "| explained_variance | 0.408 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 6.24 |\n", - "| n_updates | 680 |\n", - "| policy_gradient_loss | -0.0591 |\n", - "| value_loss | 18.4 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9.98 |\n", - "| ep_rew_mean | 3.8 |\n", - "| time/ | |\n", - "| fps | 375 |\n", - "| iterations | 70 |\n", - "| time_elapsed | 382 |\n", - "| total_timesteps | 143360 |\n", - "| train/ | |\n", - "| approx_kl | 0.050312966 |\n", - "| clip_fraction | 0.0584 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.341 |\n", - "| explained_variance | 0.707 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.0602 |\n", - "| n_updates | 690 |\n", - "| policy_gradient_loss | -0.00661 |\n", - "| value_loss | 0.845 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 11.7 |\n", - "| ep_rew_mean | 2.07 |\n", - "| time/ | |\n", - "| fps | 375 |\n", - "| iterations | 71 |\n", - "| time_elapsed | 387 |\n", - "| total_timesteps | 145408 |\n", - "| train/ | |\n", - "| approx_kl | 0.018565401 |\n", - "| clip_fraction | 0.0643 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.322 |\n", - "| explained_variance | 0.977 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.348 |\n", - "| n_updates | 700 |\n", - "| policy_gradient_loss | -0.0112 |\n", - "| value_loss | 0.175 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 10.2 |\n", - "| ep_rew_mean | 3.47 |\n", - "| time/ | |\n", - "| fps | 375 |\n", - "| iterations | 72 |\n", - "| time_elapsed | 392 |\n", - "| total_timesteps | 147456 |\n", - "| train/ | |\n", - "| approx_kl | 0.10186449 |\n", - "| clip_fraction | 0.19 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.395 |\n", - "| explained_variance | 0.47 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 1.66 |\n", - "| n_updates | 710 |\n", - "| policy_gradient_loss | -0.0419 |\n", - "| value_loss | 4.71 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 10.2 |\n", - "| ep_rew_mean | 3.72 |\n", - "| time/ | |\n", - "| fps | 375 |\n", - "| iterations | 73 |\n", - "| time_elapsed | 398 |\n", - "| total_timesteps | 149504 |\n", - "| train/ | |\n", - "| approx_kl | 0.01903234 |\n", - "| clip_fraction | 0.0737 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.333 |\n", - "| explained_variance | 0.723 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 2.71 |\n", - "| n_updates | 720 |\n", - "| policy_gradient_loss | -0.022 |\n", - "| value_loss | 2.97 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9.94 |\n", - "| ep_rew_mean | 3.92 |\n", - "| time/ | |\n", - "| fps | 375 |\n", - "| iterations | 74 |\n", - "| time_elapsed | 403 |\n", - "| total_timesteps | 151552 |\n", - "| train/ | |\n", - "| approx_kl | 0.026403807 |\n", - "| clip_fraction | 0.065 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.322 |\n", - "| explained_variance | 0.934 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.24 |\n", - "| n_updates | 730 |\n", - "| policy_gradient_loss | -0.00481 |\n", - "| value_loss | 0.481 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 10.2 |\n", - "| ep_rew_mean | 3.81 |\n", - "| time/ | |\n", - "| fps | 376 |\n", - "| iterations | 75 |\n", - "| time_elapsed | 408 |\n", - "| total_timesteps | 153600 |\n", - "| train/ | |\n", - "| approx_kl | 0.011051587 |\n", - "| clip_fraction | 0.0593 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.324 |\n", - "| explained_variance | 0.984 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.00908 |\n", - "| n_updates | 740 |\n", - "| policy_gradient_loss | -0.00591 |\n", - "| value_loss | 0.113 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 10.2 |\n", - "| ep_rew_mean | 3.63 |\n", - "| time/ | |\n", - "| fps | 376 |\n", - "| iterations | 76 |\n", - "| time_elapsed | 413 |\n", - "| total_timesteps | 155648 |\n", - "| train/ | |\n", - "| approx_kl | 0.00972967 |\n", - "| clip_fraction | 0.0544 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.339 |\n", - "| explained_variance | 0.922 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.0226 |\n", - "| n_updates | 750 |\n", - "| policy_gradient_loss | -0.00438 |\n", - "| value_loss | 0.758 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9.92 |\n", - "| ep_rew_mean | 3.86 |\n", - "| time/ | |\n", - "| fps | 376 |\n", - "| iterations | 77 |\n", - "| time_elapsed | 418 |\n", - "| total_timesteps | 157696 |\n", - "| train/ | |\n", - "| approx_kl | 0.013084366 |\n", - "| clip_fraction | 0.0481 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.348 |\n", - "| explained_variance | 0.812 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.0736 |\n", - "| n_updates | 760 |\n", - "| policy_gradient_loss | -0.0157 |\n", - "| value_loss | 1.47 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 5.05 |\n", - "| ep_rew_mean | -2.29 |\n", - "| time/ | |\n", - "| fps | 376 |\n", - "| iterations | 78 |\n", - "| time_elapsed | 424 |\n", - "| total_timesteps | 159744 |\n", - "| train/ | |\n", - "| approx_kl | 0.42402273 |\n", - "| clip_fraction | 0.215 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.49 |\n", - "| explained_variance | 0.983 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.00926 |\n", - "| n_updates | 770 |\n", - "| policy_gradient_loss | -0.0284 |\n", - "| value_loss | 0.109 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.53 |\n", - "| ep_rew_mean | -2.17 |\n", - "| time/ | |\n", - "| fps | 376 |\n", - "| iterations | 79 |\n", - "| time_elapsed | 429 |\n", - "| total_timesteps | 161792 |\n", - "| train/ | |\n", - "| approx_kl | 0.27915305 |\n", - "| clip_fraction | 0.138 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.376 |\n", - "| explained_variance | 0.503 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.373 |\n", - "| n_updates | 780 |\n", - "| policy_gradient_loss | -0.00444 |\n", - "| value_loss | 5.03 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.38 |\n", - "| ep_rew_mean | -1.74 |\n", - "| time/ | |\n", - "| fps | 376 |\n", - "| iterations | 80 |\n", - "| time_elapsed | 435 |\n", - "| total_timesteps | 163840 |\n", - "| train/ | |\n", - "| approx_kl | 0.019562341 |\n", - "| clip_fraction | 0.0921 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.457 |\n", - "| explained_variance | 0.562 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.638 |\n", - "| n_updates | 790 |\n", - "| policy_gradient_loss | -0.0128 |\n", - "| value_loss | 0.895 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.34 |\n", - "| ep_rew_mean | -1.7 |\n", - "| time/ | |\n", - "| fps | 376 |\n", - "| iterations | 81 |\n", - "| time_elapsed | 440 |\n", - "| total_timesteps | 165888 |\n", - "| train/ | |\n", - "| approx_kl | 0.18844175 |\n", - "| clip_fraction | 0.147 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.279 |\n", - "| explained_variance | 0.102 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 1.03 |\n", - "| n_updates | 800 |\n", - "| policy_gradient_loss | 0.013 |\n", - "| value_loss | 11.4 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 5.54 |\n", - "| ep_rew_mean | -0.66 |\n", - "| time/ | |\n", - "| fps | 376 |\n", - "| iterations | 82 |\n", - "| time_elapsed | 445 |\n", - "| total_timesteps | 167936 |\n", - "| train/ | |\n", - "| approx_kl | 0.06374202 |\n", - "| clip_fraction | 0.21 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.398 |\n", - "| explained_variance | 0.566 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.599 |\n", - "| n_updates | 810 |\n", - "| policy_gradient_loss | -0.0207 |\n", - "| value_loss | 1.59 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 7.65 |\n", - "| ep_rew_mean | 1.61 |\n", - "| time/ | |\n", - "| fps | 376 |\n", - "| iterations | 83 |\n", - "| time_elapsed | 451 |\n", - "| total_timesteps | 169984 |\n", - "| train/ | |\n", - "| approx_kl | 0.06125373 |\n", - "| clip_fraction | 0.251 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.434 |\n", - "| explained_variance | 0.299 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 1.27 |\n", - "| n_updates | 820 |\n", - "| policy_gradient_loss | -0.0399 |\n", - "| value_loss | 3.7 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 8.86 |\n", - "| ep_rew_mean | 2.46 |\n", - "| time/ | |\n", - "| fps | 376 |\n", - "| iterations | 84 |\n", - "| time_elapsed | 457 |\n", - "| total_timesteps | 172032 |\n", - "| train/ | |\n", - "| approx_kl | 0.047695376 |\n", + "| approx_kl | 0.021500897 |\n", "| clip_fraction | 0.171 |\n", "| clip_range | 0.2 |\n", - "| entropy_loss | -0.387 |\n", - "| explained_variance | 0.336 |\n", + "| entropy_loss | -3.17 |\n", + "| explained_variance | 0.378 |\n", "| learning_rate | 0.0003 |\n", - "| loss | 1.72 |\n", - "| n_updates | 830 |\n", - "| policy_gradient_loss | -0.0407 |\n", - "| value_loss | 3.91 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9.84 |\n", - "| ep_rew_mean | 3.66 |\n", - "| time/ | |\n", - "| fps | 376 |\n", - "| iterations | 85 |\n", - "| time_elapsed | 462 |\n", - "| total_timesteps | 174080 |\n", - "| train/ | |\n", - "| approx_kl | 0.13684572 |\n", - "| clip_fraction | 0.133 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.346 |\n", - "| explained_variance | 0.625 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 1.13 |\n", - "| n_updates | 840 |\n", - "| policy_gradient_loss | -0.0231 |\n", - "| value_loss | 2.36 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 25.5 |\n", - "| ep_rew_mean | -12.2 |\n", - "| time/ | |\n", - "| fps | 376 |\n", - "| iterations | 86 |\n", - "| time_elapsed | 467 |\n", - "| total_timesteps | 176128 |\n", - "| train/ | |\n", - "| approx_kl | 0.36170986 |\n", - "| clip_fraction | 0.332 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.473 |\n", - "| explained_variance | 0.919 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.364 |\n", - "| n_updates | 850 |\n", - "| policy_gradient_loss | -0.0374 |\n", - "| value_loss | 0.278 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 17.2 |\n", - "| ep_rew_mean | -4.45 |\n", - "| time/ | |\n", - "| fps | 377 |\n", - "| iterations | 87 |\n", - "| time_elapsed | 472 |\n", - "| total_timesteps | 178176 |\n", - "| train/ | |\n", - "| approx_kl | 0.102079734 |\n", - "| clip_fraction | 0.34 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.613 |\n", - "| explained_variance | 0.0593 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 5.95 |\n", - "| n_updates | 860 |\n", - "| policy_gradient_loss | -0.04 |\n", - "| value_loss | 20.6 |\n", + "| loss | 0.185 |\n", + "| n_updates | 30 |\n", + "| policy_gradient_loss | -0.0214 |\n", + "| value_loss | 0.479 |\n", "-----------------------------------------\n" ] }, @@ -2518,385 +272,30 @@ "text": [ "-----------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 14.5 |\n", - "| ep_rew_mean | -2.36 |\n", + "| ep_len_mean | 2.92 |\n", + "| ep_rew_mean | -3.36 |\n", "| time/ | |\n", - "| fps | 378 |\n", - "| iterations | 88 |\n", - "| time_elapsed | 476 |\n", - "| total_timesteps | 180224 |\n", + "| fps | 682 |\n", + "| iterations | 5 |\n", + "| time_elapsed | 14 |\n", + "| total_timesteps | 10240 |\n", "| train/ | |\n", - "| approx_kl | 0.042367905 |\n", - "| clip_fraction | 0.264 |\n", + "| approx_kl | 0.018113121 |\n", + "| clip_fraction | 0.101 |\n", "| clip_range | 0.2 |\n", - "| entropy_loss | -0.799 |\n", - "| explained_variance | 0.69 |\n", + "| entropy_loss | -3.11 |\n", + "| explained_variance | 0.448 |\n", "| learning_rate | 0.0003 |\n", - "| loss | 4.22 |\n", - "| n_updates | 870 |\n", - "| policy_gradient_loss | -0.0265 |\n", - "| value_loss | 16.7 |\n", + "| loss | 0.203 |\n", + "| n_updates | 40 |\n", + "| policy_gradient_loss | -0.0183 |\n", + "| value_loss | 0.455 |\n", "-----------------------------------------\n" ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 12.3 |\n", - "| ep_rew_mean | 0.46 |\n", - "| time/ | |\n", - "| fps | 379 |\n", - "| iterations | 89 |\n", - "| time_elapsed | 480 |\n", - "| total_timesteps | 182272 |\n", - "| train/ | |\n", - "| approx_kl | 0.02418825 |\n", - "| clip_fraction | 0.262 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.744 |\n", - "| explained_variance | 0.75 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 3.08 |\n", - "| n_updates | 880 |\n", - "| policy_gradient_loss | -0.026 |\n", - "| value_loss | 16.2 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 11 |\n", - "| ep_rew_mean | 2.19 |\n", - "| time/ | |\n", - "| fps | 380 |\n", - "| iterations | 90 |\n", - "| time_elapsed | 484 |\n", - "| total_timesteps | 184320 |\n", - "| train/ | |\n", - "| approx_kl | 0.024347484 |\n", - "| clip_fraction | 0.271 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.669 |\n", - "| explained_variance | 0.675 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 4.76 |\n", - "| n_updates | 890 |\n", - "| policy_gradient_loss | -0.0243 |\n", - "| value_loss | 10.1 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 12.1 |\n", - "| ep_rew_mean | 1.35 |\n", - "| time/ | |\n", - "| fps | 381 |\n", - "| iterations | 91 |\n", - "| time_elapsed | 488 |\n", - "| total_timesteps | 186368 |\n", - "| train/ | |\n", - "| approx_kl | 0.02668532 |\n", - "| clip_fraction | 0.256 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.576 |\n", - "| explained_variance | 0.706 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.797 |\n", - "| n_updates | 900 |\n", - "| policy_gradient_loss | -0.0378 |\n", - "| value_loss | 1.6 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 11 |\n", - "| ep_rew_mean | 2.55 |\n", - "| time/ | |\n", - "| fps | 382 |\n", - "| iterations | 92 |\n", - "| time_elapsed | 492 |\n", - "| total_timesteps | 188416 |\n", - "| train/ | |\n", - "| approx_kl | 0.017341316 |\n", - "| clip_fraction | 0.0919 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.681 |\n", - "| explained_variance | 0.865 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 9.01 |\n", - "| n_updates | 910 |\n", - "| policy_gradient_loss | -0.0076 |\n", - "| value_loss | 25.5 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 10.2 |\n", - "| ep_rew_mean | 2.27 |\n", - "| time/ | |\n", - "| fps | 383 |\n", - "| iterations | 93 |\n", - "| time_elapsed | 496 |\n", - "| total_timesteps | 190464 |\n", - "| train/ | |\n", - "| approx_kl | 0.070169866 |\n", - "| clip_fraction | 0.224 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.476 |\n", - "| explained_variance | 0.791 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 13 |\n", - "| n_updates | 920 |\n", - "| policy_gradient_loss | -0.0203 |\n", - "| value_loss | 5.41 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9.33 |\n", - "| ep_rew_mean | 3.17 |\n", - "| time/ | |\n", - "| fps | 384 |\n", - "| iterations | 94 |\n", - "| time_elapsed | 500 |\n", - "| total_timesteps | 192512 |\n", - "| train/ | |\n", - "| approx_kl | 0.062242664 |\n", - "| clip_fraction | 0.184 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.504 |\n", - "| explained_variance | 0.426 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 1.94 |\n", - "| n_updates | 930 |\n", - "| policy_gradient_loss | -0.0392 |\n", - "| value_loss | 6.22 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9.85 |\n", - "| ep_rew_mean | 3.79 |\n", - "| time/ | |\n", - "| fps | 385 |\n", - "| iterations | 95 |\n", - "| time_elapsed | 504 |\n", - "| total_timesteps | 194560 |\n", - "| train/ | |\n", - "| approx_kl | 0.042221397 |\n", - "| clip_fraction | 0.0712 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.328 |\n", - "| explained_variance | 0.893 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.236 |\n", - "| n_updates | 940 |\n", - "| policy_gradient_loss | -0.0162 |\n", - "| value_loss | 0.692 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 8.17 |\n", - "| ep_rew_mean | 2.09 |\n", - "| time/ | |\n", - "| fps | 387 |\n", - "| iterations | 96 |\n", - "| time_elapsed | 507 |\n", - "| total_timesteps | 196608 |\n", - "| train/ | |\n", - "| approx_kl | 0.036962293 |\n", - "| clip_fraction | 0.154 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.374 |\n", - "| explained_variance | 0.981 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0447 |\n", - "| n_updates | 950 |\n", - "| policy_gradient_loss | -0.0142 |\n", - "| value_loss | 0.151 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 9.38 |\n", - "| ep_rew_mean | 3.36 |\n", - "| time/ | |\n", - "| fps | 387 |\n", - "| iterations | 97 |\n", - "| time_elapsed | 512 |\n", - "| total_timesteps | 198656 |\n", - "| train/ | |\n", - "| approx_kl | 0.06385146 |\n", - "| clip_fraction | 0.245 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.366 |\n", - "| explained_variance | 0.553 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 1.4 |\n", - "| n_updates | 960 |\n", - "| policy_gradient_loss | -0.0399 |\n", - "| value_loss | 2.79 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 24.8 |\n", - "| ep_rew_mean | -11.4 |\n", - "| time/ | |\n", - "| fps | 389 |\n", - "| iterations | 98 |\n", - "| time_elapsed | 515 |\n", - "| total_timesteps | 200704 |\n", - "| train/ | |\n", - "| approx_kl | 0.26313344 |\n", - "| clip_fraction | 0.178 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.363 |\n", - "| explained_variance | 0.898 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.247 |\n", - "| n_updates | 970 |\n", - "| policy_gradient_loss | -0.0285 |\n", - "| value_loss | 0.521 |\n", - "----------------------------------------\n" - ] - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "528f9bb0103a402d83eb31e85dd97caf", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(Label(value='0.438 MB of 0.438 MB uploaded\\r'), FloatProgress(value=1.0, max=1.0)))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "wandb: WARNING Source type is set to 'repo' but some required information is missing from the environment. A job will not be created from this run. See https://docs.wandb.ai/guides/launch/create-job\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - "

Run history:


global_step▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
rollout/ep_len_mean▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▄█▅▄▂▂▃▂▂▄▂▂▂▁▁▂▂▃▂▂▂▄
rollout/ep_rew_mean▇▇▇▇▇▇▇▇▇█████████▅▁▅▆██▇██▆███▇▇██▇███▅
time/fps█▂▂▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃
train/approx_kl▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▂▂▁▂▃▂▁▁█▁▂▃▂▁▂▂▅
train/clip_fraction█▇▇▅▅▆▅▄▆▄▂▂▂▂▃▂▂▂▁▂▄▄▅▅▆▃▂▄▂▂▂▄▂▅▃▅▅▅▂▄
train/clip_range▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/entropy_loss▁▁▂▃▃▄▅▅▆▆▆▇▇▇▇▇▇▇██▇▇▇▇▇▇█▇▇▇█▇▇▇▇▆▇▇█▇
train/explained_variance▃▄▅▆▆▆▆▄▆▆▅▇█▇▆██▆▁▄▄▃▇▇▄▇█▇▆▆██▅▃▆▆▆▇▇▇
train/learning_rate▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/loss▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▆█▂▁▁▂▁▁▁▁▁▁▁▁▁▁▂▂▃▁▁
train/policy_gradient_loss▁▂▂▄▄▄▄▅▃▄▆▇▇▆▅▆▇▄█▆▆▅▄▄▃▅▇▆▇▆█▅▇▄▆▅▆▆▆▅
train/value_loss▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃██▂▁▁▃▂▁▁▁▁▁▁▁▁▁▃▂▂▁▁

Run summary:


global_step200704
rollout/ep_len_mean24.77
rollout/ep_rew_mean-11.45
time/fps389.0
train/approx_kl0.26313
train/clip_fraction0.17793
train/clip_range0.2
train/entropy_loss-0.36315
train/explained_variance0.89819
train/learning_rate0.0003
train/loss0.24744
train/policy_gradient_loss-0.02851
train/value_loss0.52073

" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View run confused-meadow-3 at: https://wandb.ai/fulltime/wordle/runs/ot2i0b8h
Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 2 other file(s)" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Find logs at: ./wandb/run-20240319_162920-ot2i0b8h/logs" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" } ], "source": [ - "model = PPO(config[\"policy_type\"], env=env, verbose=0, tensorboard_log=f\"runs/{run.id}\")\n", + "model = PPO(config[\"policy_type\"], env=env, verbose=2, tensorboard_log=f\"runs/{run.id}\", batch_size=64)\n", "\n", "# Train for a certain number of timesteps\n", "model.learn(\n", @@ -2908,57 +307,32 @@ "\tprogress_bar=True\n", ")\n", "\n", - "run.finish()\n", - "\n", - "# Save the model\n", - "model.save(\"wordle_ppo_model\")" + "run.finish()" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "model.save(\"wordle_ppo_model\")" + "model.save(model_save_path)" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "model = PPO.load(\"wordle_ppo_model\")" + "model = PPO.load(model_save_path)" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1000/1000 [00:20<00:00, 49.06it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-6.703\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], + "outputs": [], "source": [ "rewards = 0\n", "for i in tqdm(range(1000)):\n", @@ -2970,6 +344,13 @@ " rewards += reward\n", "print(rewards / 1000)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/dqn_wordle.ipynb b/dqn_wordle.ipynb deleted file mode 100644 index dd02c83..0000000 --- a/dqn_wordle.ipynb +++ /dev/null @@ -1,338 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import gym\n", - "import gym_wordle\n", - "from stable_baselines3 import DQN, PPO, common\n", - "import numpy as np\n", - "import tqdm" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - ">\n" - ] - } - ], - "source": [ - "env = gym_wordle.wordle.WordleEnv()\n", - "env = common.monitor.Monitor(env)\n", - "\n", - "print(env)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using cuda device\n", - "Wrapping the env in a DummyVecEnv.\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "6921a0721569456abf5bceac7e7b6b34", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.97 |\n", - "| ep_rew_mean | -63.8 |\n", - "| exploration_rate | 0.05 |\n", - "| time/ | |\n", - "| episodes | 10000 |\n", - "| fps | 1628 |\n", - "| time_elapsed | 30 |\n", - "| total_timesteps | 49995 |\n", - "----------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 5 |\n", - "| ep_rew_mean | -70.5 |\n", - "| exploration_rate | 0.05 |\n", - "| time/ | |\n", - "| episodes | 20000 |\n", - "| fps | 662 |\n", - "| time_elapsed | 150 |\n", - "| total_timesteps | 99992 |\n", - "| train/ | |\n", - "| learning_rate | 0.0001 |\n", - "| loss | 11.7 |\n", - "| n_updates | 12497 |\n", - "----------------------------------\n" - ] - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "total_timesteps = 100_000\n", - "model = DQN(\"MlpPolicy\", env, verbose=1, device='cuda')\n", - "model.learn(total_timesteps=total_timesteps, log_interval=10_000, progress_bar=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "model.save(\"dqn_new_state\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Repository\\cse151b-final-project\\env\\Lib\\site-packages\\stable_baselines3\\common\\save_util.py:166: UserWarning: Could not deserialize object lr_schedule. Consider using `custom_objects` argument to replace this object.\n", - "Exception: code() argument 13 must be str, not int\n", - " warnings.warn(\n", - "c:\\Repository\\cse151b-final-project\\env\\Lib\\site-packages\\stable_baselines3\\common\\save_util.py:166: UserWarning: Could not deserialize object exploration_schedule. Consider using `custom_objects` argument to replace this object.\n", - "Exception: code() argument 13 must be str, not int\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "# model = DQN.load(\"dqn_wordle\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 0. 1. 1. 1.\n", - " 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 0. 1.\n", - " 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1.\n", - " 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 0. 1. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 1. 1. 0. 1. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0.\n", - " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", - "[1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1.\n", - " 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1.\n", - " 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0.\n", - " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", - "[1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1.\n", - " 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1.\n", - " 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0.\n", - " 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 0. 0. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.\n", - " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", - "[1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1.\n", - " 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1.\n", - " 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.\n", - " 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.\n", - " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", - "[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1.\n", - " 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1.\n", - " 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", - " 1. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0.\n", - " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]\n", - "[1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1. 1. 1.\n", - " 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1.\n", - " 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0.\n", - " 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 0. 0. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 1. 1. 0. 0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0.\n", - " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", - "[1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1.\n", - " 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1.\n", - " 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.\n", - " 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", - " 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", - " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]\n", - "[1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1. 1. 1.\n", - " 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1.\n", - " 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0.\n", - " 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 0. 0. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 1. 1. 0. 0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.\n", - " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", - "[1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1. 1. 1.\n", - " 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1.\n", - " 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0.\n", - " 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 0. 0. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 1. 1. 0. 0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0.\n", - " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", - "[1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1.\n", - " 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1.\n", - " 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1.\n", - " 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", - " 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.\n", - " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", - "0\n" - ] - } - ], - "source": [ - "env = gym_wordle.wordle.WordleEnv()\n", - "\n", - "for i in range(1000):\n", - " \n", - " state, info = env.reset()\n", - "\n", - " done = False\n", - "\n", - " wins = 0\n", - "\n", - " while not done:\n", - "\n", - " action, _states = model.predict(state, deterministic=True)\n", - "\n", - " state, reward, done, truncated, info = env.step(action)\n", - "\n", - " print(state)\n", - " if info[\"correct\"]:\n", - " wins += 1\n", - "\n", - "print(wins)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(array([1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", - " 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", - " 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", - " 0., 0., 0., 0., 0., 0., 0., 1.]),\n", - " -50)" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "state, reward" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/letter_guess.py b/letter_guess.py index 94f5865..8822f8e 100644 --- a/letter_guess.py +++ b/letter_guess.py @@ -72,15 +72,16 @@ class LetterGuessingEnv(gym.Env): self.guess_prefix = '' self.round += 1 - # end after 5 rounds of total guesses - if self.round == 2: + # end after 3 rounds of total guesses + if self.round == 3: # reward = 5 done = True obs = self._get_obs() - if reward < -50: + if reward < -5: print(obs, reward, done) + exit(0) return obs, reward, done, False, {} @@ -91,7 +92,7 @@ class LetterGuessingEnv(gym.Env): self.letter_positions = np.ones((26, 4), dtype=np.int32) self.guessed_letters = set() self.guess_prefix = "" # Reset the guess prefix for the new episode - self.round = 1 + self.round = 0 return self._get_obs(), {} def encode_word(self, word):