cse151b-final-project/dqn_wordle.ipynb

1161 lines
55 KiB
Plaintext
Raw Normal View History

2024-03-13 18:04:30 +00:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 9,
2024-03-13 18:04:30 +00:00
"metadata": {},
"outputs": [],
"source": [
"import gym\n",
"import gym_wordle\n",
"from stable_baselines3 import DQN, PPO, common\n",
2024-03-13 20:57:23 +00:00
"import numpy as np\n",
"import tqdm"
2024-03-13 18:04:30 +00:00
]
},
{
"cell_type": "code",
"execution_count": 10,
2024-03-13 18:04:30 +00:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<Monitor<WordleEnv instance>>\n"
]
}
],
2024-03-13 18:04:30 +00:00
"source": [
"env = gym_wordle.wordle.WordleEnv()\n",
"env = common.monitor.Monitor(env)\n",
2024-03-13 18:04:30 +00:00
"\n",
"print(env)"
]
},
{
"cell_type": "code",
"execution_count": 11,
2024-03-13 18:04:30 +00:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using cuda device\n",
2024-03-14 22:00:19 +00:00
"Wrapping the env in a DummyVecEnv.\n",
"---------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -126 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 455 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 1 |\n",
"| time_elapsed | 4 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 2048 |\n",
"---------------------------------\n",
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -123 |\n",
"| time/ | |\n",
"| fps | 376 |\n",
"| iterations | 2 |\n",
2024-03-14 22:00:19 +00:00
"| time_elapsed | 10 |\n",
"| total_timesteps | 4096 |\n",
2024-03-14 22:00:19 +00:00
"| train/ | |\n",
"| approx_kl | 0.006769434 |\n",
"| clip_fraction | 0.0309 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.47 |\n",
"| explained_variance | 0.00119 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 1.87e+03 |\n",
"| n_updates | 10 |\n",
"| policy_gradient_loss | -0.0533 |\n",
"| value_loss | 5.21e+03 |\n",
2024-03-14 22:00:19 +00:00
"-----------------------------------------\n",
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -126 |\n",
"| time/ | |\n",
"| fps | 357 |\n",
"| iterations | 3 |\n",
"| time_elapsed | 17 |\n",
"| total_timesteps | 6144 |\n",
"| train/ | |\n",
"| approx_kl | 0.00641025 |\n",
"| clip_fraction | 0.0321 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.47 |\n",
"| explained_variance | -0.0916 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 2.06e+03 |\n",
"| n_updates | 20 |\n",
"| policy_gradient_loss | -0.0489 |\n",
"| value_loss | 4.36e+03 |\n",
"----------------------------------------\n",
2024-03-14 22:00:19 +00:00
"------------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -121 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 347 |\n",
"| iterations | 4 |\n",
"| time_elapsed | 23 |\n",
"| total_timesteps | 8192 |\n",
2024-03-14 22:00:19 +00:00
"| train/ | |\n",
"| approx_kl | 0.0073487614 |\n",
"| clip_fraction | 0.0466 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.47 |\n",
"| explained_variance | -0.0298 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 1.81e+03 |\n",
"| n_updates | 30 |\n",
"| policy_gradient_loss | -0.0539 |\n",
"| value_loss | 3.73e+03 |\n",
2024-03-14 22:00:19 +00:00
"------------------------------------------\n",
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -121 |\n",
"| time/ | |\n",
"| fps | 343 |\n",
"| iterations | 5 |\n",
"| time_elapsed | 29 |\n",
"| total_timesteps | 10240 |\n",
"| train/ | |\n",
"| approx_kl | 0.00845159 |\n",
"| clip_fraction | 0.068 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.47 |\n",
"| explained_variance | -0.0105 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 864 |\n",
"| n_updates | 40 |\n",
"| policy_gradient_loss | -0.0601 |\n",
"| value_loss | 2.99e+03 |\n",
"----------------------------------------\n",
2024-03-14 22:00:19 +00:00
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -124 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 341 |\n",
"| iterations | 6 |\n",
"| time_elapsed | 35 |\n",
"| total_timesteps | 12288 |\n",
2024-03-14 22:00:19 +00:00
"| train/ | |\n",
"| approx_kl | 0.009948943 |\n",
"| clip_fraction | 0.0943 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.47 |\n",
"| explained_variance | -0.00467 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 1.07e+03 |\n",
"| n_updates | 50 |\n",
"| policy_gradient_loss | -0.0664 |\n",
"| value_loss | 2.52e+03 |\n",
2024-03-14 22:00:19 +00:00
"-----------------------------------------\n",
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -122 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 339 |\n",
"| iterations | 7 |\n",
"| time_elapsed | 42 |\n",
"| total_timesteps | 14336 |\n",
2024-03-14 22:00:19 +00:00
"| train/ | |\n",
"| approx_kl | 0.011411648 |\n",
"| clip_fraction | 0.121 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.47 |\n",
"| explained_variance | -0.00249 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 903 |\n",
"| n_updates | 60 |\n",
"| policy_gradient_loss | -0.0719 |\n",
"| value_loss | 2.2e+03 |\n",
2024-03-14 22:00:19 +00:00
"-----------------------------------------\n",
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -123 |\n",
"| time/ | |\n",
"| fps | 338 |\n",
"| iterations | 8 |\n",
"| time_elapsed | 48 |\n",
"| total_timesteps | 16384 |\n",
"| train/ | |\n",
"| approx_kl | 0.01300336 |\n",
"| clip_fraction | 0.159 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.47 |\n",
"| explained_variance | -0.00149 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 839 |\n",
"| n_updates | 70 |\n",
"| policy_gradient_loss | -0.0779 |\n",
"| value_loss | 1.88e+03 |\n",
"----------------------------------------\n",
2024-03-14 22:00:19 +00:00
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -124 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 336 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 9 |\n",
"| time_elapsed | 54 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 18432 |\n",
"| train/ | |\n",
"| approx_kl | 0.015219824 |\n",
"| clip_fraction | 0.211 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.47 |\n",
"| explained_variance | -0.000915 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 770 |\n",
2024-03-14 22:00:19 +00:00
"| n_updates | 80 |\n",
"| policy_gradient_loss | -0.0854 |\n",
"| value_loss | 1.61e+03 |\n",
2024-03-14 22:00:19 +00:00
"-----------------------------------------\n",
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -128 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 335 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 10 |\n",
"| time_elapsed | 61 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 20480 |\n",
"| train/ | |\n",
"| approx_kl | 0.017209966 |\n",
"| clip_fraction | 0.27 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.47 |\n",
"| explained_variance | -0.000583 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 689 |\n",
2024-03-14 22:00:19 +00:00
"| n_updates | 90 |\n",
"| policy_gradient_loss | -0.0912 |\n",
"| value_loss | 1.43e+03 |\n",
2024-03-14 22:00:19 +00:00
"-----------------------------------------\n",
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -126 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 335 |\n",
"| iterations | 11 |\n",
"| time_elapsed | 67 |\n",
"| total_timesteps | 22528 |\n",
2024-03-14 22:00:19 +00:00
"| train/ | |\n",
"| approx_kl | 0.020546965 |\n",
"| clip_fraction | 0.348 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.46 |\n",
"| explained_variance | -0.000374 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 605 |\n",
"| n_updates | 100 |\n",
"| policy_gradient_loss | -0.0989 |\n",
"| value_loss | 1.27e+03 |\n",
2024-03-14 22:00:19 +00:00
"-----------------------------------------\n",
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -126 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 334 |\n",
"| iterations | 12 |\n",
"| time_elapsed | 73 |\n",
"| total_timesteps | 24576 |\n",
2024-03-14 22:00:19 +00:00
"| train/ | |\n",
"| approx_kl | 0.03119991 |\n",
"| clip_fraction | 0.478 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.46 |\n",
"| explained_variance | -0.000229 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 510 |\n",
"| n_updates | 110 |\n",
"| policy_gradient_loss | -0.109 |\n",
"| value_loss | 1.17e+03 |\n",
2024-03-14 22:00:19 +00:00
"----------------------------------------\n",
"---------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -123 |\n",
"| time/ | |\n",
"| fps | 334 |\n",
"| iterations | 13 |\n",
"| time_elapsed | 79 |\n",
"| total_timesteps | 26624 |\n",
"| train/ | |\n",
"| approx_kl | 0.0502273 |\n",
"| clip_fraction | 0.605 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.46 |\n",
"| explained_variance | -0.000101 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 582 |\n",
"| n_updates | 120 |\n",
"| policy_gradient_loss | -0.127 |\n",
"| value_loss | 1.13e+03 |\n",
"---------------------------------------\n",
2024-03-14 22:00:19 +00:00
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -121 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 334 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 14 |\n",
"| time_elapsed | 85 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 28672 |\n",
"| train/ | |\n",
"| approx_kl | 0.060225103 |\n",
"| clip_fraction | 0.736 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.45 |\n",
"| explained_variance | -3.97e-05 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 530 |\n",
2024-03-14 22:00:19 +00:00
"| n_updates | 130 |\n",
"| policy_gradient_loss | -0.142 |\n",
"| value_loss | 1.13e+03 |\n",
2024-03-14 22:00:19 +00:00
"-----------------------------------------\n",
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -123 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 332 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 15 |\n",
"| time_elapsed | 92 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 30720 |\n",
"| train/ | |\n",
"| approx_kl | 0.057931915 |\n",
"| clip_fraction | 0.743 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.45 |\n",
"| explained_variance | -2e-05 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 571 |\n",
2024-03-14 22:00:19 +00:00
"| n_updates | 140 |\n",
"| policy_gradient_loss | -0.144 |\n",
"| value_loss | 1.14e+03 |\n",
2024-03-14 22:00:19 +00:00
"-----------------------------------------\n",
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -122 |\n",
"| time/ | |\n",
"| fps | 329 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 16 |\n",
"| time_elapsed | 99 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 32768 |\n",
"| train/ | |\n",
"| approx_kl | 0.06145256 |\n",
"| clip_fraction | 0.737 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.44 |\n",
"| explained_variance | -1.12e-05 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 612 |\n",
2024-03-14 22:00:19 +00:00
"| n_updates | 150 |\n",
"| policy_gradient_loss | -0.143 |\n",
"| value_loss | 1.14e+03 |\n",
2024-03-14 22:00:19 +00:00
"----------------------------------------\n",
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -120 |\n",
"| time/ | |\n",
"| fps | 328 |\n",
"| iterations | 17 |\n",
"| time_elapsed | 106 |\n",
"| total_timesteps | 34816 |\n",
"| train/ | |\n",
"| approx_kl | 0.062183782 |\n",
"| clip_fraction | 0.731 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.43 |\n",
"| explained_variance | -7.63e-06 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 560 |\n",
"| n_updates | 160 |\n",
"| policy_gradient_loss | -0.142 |\n",
"| value_loss | 1.14e+03 |\n",
"-----------------------------------------\n",
2024-03-14 22:00:19 +00:00
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -121 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 326 |\n",
"| iterations | 18 |\n",
"| time_elapsed | 112 |\n",
"| total_timesteps | 36864 |\n",
2024-03-14 22:00:19 +00:00
"| train/ | |\n",
"| approx_kl | 0.06656339 |\n",
"| clip_fraction | 0.748 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.43 |\n",
"| explained_variance | -5.48e-06 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 605 |\n",
"| n_updates | 170 |\n",
"| policy_gradient_loss | -0.145 |\n",
"| value_loss | 1.12e+03 |\n",
2024-03-14 22:00:19 +00:00
"----------------------------------------\n",
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -124 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 326 |\n",
"| iterations | 19 |\n",
"| time_elapsed | 119 |\n",
"| total_timesteps | 38912 |\n",
2024-03-14 22:00:19 +00:00
"| train/ | |\n",
"| approx_kl | 0.07115179 |\n",
"| clip_fraction | 0.767 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.42 |\n",
"| explained_variance | -3.93e-06 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 561 |\n",
"| n_updates | 180 |\n",
"| policy_gradient_loss | -0.146 |\n",
"| value_loss | 1.13e+03 |\n",
2024-03-14 22:00:19 +00:00
"----------------------------------------\n",
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -123 |\n",
"| time/ | |\n",
"| fps | 326 |\n",
"| iterations | 20 |\n",
"| time_elapsed | 125 |\n",
"| total_timesteps | 40960 |\n",
2024-03-14 22:00:19 +00:00
"| train/ | |\n",
"| approx_kl | 0.07023676 |\n",
"| clip_fraction | 0.74 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.41 |\n",
"| explained_variance | -2.86e-06 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 590 |\n",
"| n_updates | 190 |\n",
"| policy_gradient_loss | -0.144 |\n",
"| value_loss | 1.17e+03 |\n",
2024-03-14 22:00:19 +00:00
"----------------------------------------\n",
"---------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -124 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 326 |\n",
"| iterations | 21 |\n",
"| time_elapsed | 131 |\n",
"| total_timesteps | 43008 |\n",
2024-03-14 22:00:19 +00:00
"| train/ | |\n",
"| approx_kl | 0.0665413 |\n",
"| clip_fraction | 0.746 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.4 |\n",
"| explained_variance | -2.15e-06 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 550 |\n",
"| n_updates | 200 |\n",
"| policy_gradient_loss | -0.145 |\n",
"| value_loss | 1.16e+03 |\n",
2024-03-14 22:00:19 +00:00
"---------------------------------------\n",
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -124 |\n",
"| time/ | |\n",
"| fps | 325 |\n",
"| iterations | 22 |\n",
"| time_elapsed | 138 |\n",
"| total_timesteps | 45056 |\n",
2024-03-14 22:00:19 +00:00
"| train/ | |\n",
"| approx_kl | 0.08091866 |\n",
"| clip_fraction | 0.745 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.38 |\n",
"| explained_variance | -1.91e-06 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 594 |\n",
"| n_updates | 210 |\n",
2024-03-14 22:00:19 +00:00
"| policy_gradient_loss | -0.144 |\n",
"| value_loss | 1.15e+03 |\n",
2024-03-14 22:00:19 +00:00
"----------------------------------------\n",
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -125 |\n",
"| time/ | |\n",
"| fps | 325 |\n",
"| iterations | 23 |\n",
"| time_elapsed | 144 |\n",
"| total_timesteps | 47104 |\n",
"| train/ | |\n",
"| approx_kl | 0.070498824 |\n",
"| clip_fraction | 0.734 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.36 |\n",
"| explained_variance | -1.19e-06 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 573 |\n",
"| n_updates | 220 |\n",
"| policy_gradient_loss | -0.144 |\n",
"| value_loss | 1.18e+03 |\n",
"-----------------------------------------\n",
2024-03-14 22:00:19 +00:00
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -123 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 325 |\n",
"| iterations | 24 |\n",
"| time_elapsed | 151 |\n",
"| total_timesteps | 49152 |\n",
2024-03-14 22:00:19 +00:00
"| train/ | |\n",
"| approx_kl | 0.06726791 |\n",
"| clip_fraction | 0.728 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.34 |\n",
2024-03-14 22:00:19 +00:00
"| explained_variance | -1.07e-06 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 564 |\n",
"| n_updates | 230 |\n",
"| policy_gradient_loss | -0.144 |\n",
2024-03-14 22:00:19 +00:00
"| value_loss | 1.18e+03 |\n",
"----------------------------------------\n",
"---------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -126 |\n",
"| time/ | |\n",
"| fps | 324 |\n",
"| iterations | 25 |\n",
"| time_elapsed | 157 |\n",
"| total_timesteps | 51200 |\n",
"| train/ | |\n",
"| approx_kl | 0.0721001 |\n",
"| clip_fraction | 0.727 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.31 |\n",
"| explained_variance | -8.34e-07 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 579 |\n",
"| n_updates | 240 |\n",
"| policy_gradient_loss | -0.143 |\n",
"| value_loss | 1.14e+03 |\n",
"---------------------------------------\n",
2024-03-14 22:00:19 +00:00
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -124 |\n",
"| time/ | |\n",
"| fps | 324 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 26 |\n",
"| time_elapsed | 164 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 53248 |\n",
"| train/ | |\n",
"| approx_kl | 0.08537817 |\n",
"| clip_fraction | 0.767 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.27 |\n",
2024-03-14 22:00:19 +00:00
"| explained_variance | -8.34e-07 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 648 |\n",
2024-03-14 22:00:19 +00:00
"| n_updates | 250 |\n",
"| policy_gradient_loss | -0.145 |\n",
"| value_loss | 1.16e+03 |\n",
2024-03-14 22:00:19 +00:00
"----------------------------------------\n",
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -123 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 324 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 27 |\n",
"| time_elapsed | 170 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 55296 |\n",
"| train/ | |\n",
"| approx_kl | 0.07838201 |\n",
"| clip_fraction | 0.757 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.21 |\n",
2024-03-14 22:00:19 +00:00
"| explained_variance | -5.96e-07 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 580 |\n",
2024-03-14 22:00:19 +00:00
"| n_updates | 260 |\n",
"| policy_gradient_loss | -0.144 |\n",
"| value_loss | 1.14e+03 |\n",
2024-03-14 22:00:19 +00:00
"----------------------------------------\n",
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -123 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 324 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 28 |\n",
"| time_elapsed | 176 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 57344 |\n",
"| train/ | |\n",
"| approx_kl | 0.08116107 |\n",
"| clip_fraction | 0.748 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -9.09 |\n",
2024-03-14 22:00:19 +00:00
"| explained_variance | -4.77e-07 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 602 |\n",
"| n_updates | 270 |\n",
"| policy_gradient_loss | -0.144 |\n",
"| value_loss | 1.15e+03 |\n",
2024-03-14 22:00:19 +00:00
"----------------------------------------\n",
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -118 |\n",
"| time/ | |\n",
"| fps | 324 |\n",
"| iterations | 29 |\n",
"| time_elapsed | 183 |\n",
"| total_timesteps | 59392 |\n",
"| train/ | |\n",
"| approx_kl | 0.085108414 |\n",
"| clip_fraction | 0.741 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -8.87 |\n",
"| explained_variance | -4.77e-07 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 562 |\n",
"| n_updates | 280 |\n",
"| policy_gradient_loss | -0.142 |\n",
"| value_loss | 1.18e+03 |\n",
"-----------------------------------------\n",
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -124 |\n",
"| time/ | |\n",
"| fps | 323 |\n",
"| iterations | 30 |\n",
"| time_elapsed | 189 |\n",
"| total_timesteps | 61440 |\n",
"| train/ | |\n",
"| approx_kl | 0.066152625 |\n",
"| clip_fraction | 0.722 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -8.75 |\n",
"| explained_variance | -4.77e-07 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 568 |\n",
"| n_updates | 290 |\n",
"| policy_gradient_loss | -0.138 |\n",
"| value_loss | 1.14e+03 |\n",
"-----------------------------------------\n",
2024-03-14 22:00:19 +00:00
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -120 |\n",
"| time/ | |\n",
"| fps | 324 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 31 |\n",
"| time_elapsed | 195 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 63488 |\n",
"| train/ | |\n",
"| approx_kl | 0.06854295 |\n",
"| clip_fraction | 0.7 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -8.73 |\n",
2024-03-14 22:00:19 +00:00
"| explained_variance | -4.77e-07 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 577 |\n",
2024-03-14 22:00:19 +00:00
"| n_updates | 300 |\n",
"| policy_gradient_loss | -0.139 |\n",
2024-03-14 22:00:19 +00:00
"| value_loss | 1.14e+03 |\n",
"----------------------------------------\n",
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -124 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 324 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 32 |\n",
"| time_elapsed | 201 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 65536 |\n",
"| train/ | |\n",
"| approx_kl | 0.07200403 |\n",
"| clip_fraction | 0.702 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -8.65 |\n",
2024-03-14 22:00:19 +00:00
"| explained_variance | -4.77e-07 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 570 |\n",
2024-03-14 22:00:19 +00:00
"| n_updates | 310 |\n",
"| policy_gradient_loss | -0.134 |\n",
"| value_loss | 1.15e+03 |\n",
2024-03-14 22:00:19 +00:00
"----------------------------------------\n",
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -123 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 324 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 33 |\n",
"| time_elapsed | 208 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 67584 |\n",
"| train/ | |\n",
"| approx_kl | 0.07691643 |\n",
"| clip_fraction | 0.692 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -8.64 |\n",
2024-03-14 22:00:19 +00:00
"| explained_variance | -2.38e-07 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 618 |\n",
2024-03-14 22:00:19 +00:00
"| n_updates | 320 |\n",
"| policy_gradient_loss | -0.137 |\n",
"| value_loss | 1.16e+03 |\n",
2024-03-14 22:00:19 +00:00
"----------------------------------------\n",
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -122 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 325 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 34 |\n",
"| time_elapsed | 214 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 69632 |\n",
"| train/ | |\n",
"| approx_kl | 0.07179158 |\n",
"| clip_fraction | 0.69 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -8.56 |\n",
2024-03-14 22:00:19 +00:00
"| explained_variance | -2.38e-07 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 684 |\n",
2024-03-14 22:00:19 +00:00
"| n_updates | 330 |\n",
"| policy_gradient_loss | -0.139 |\n",
"| value_loss | 1.15e+03 |\n",
"----------------------------------------\n",
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -120 |\n",
"| time/ | |\n",
"| fps | 325 |\n",
"| iterations | 35 |\n",
"| time_elapsed | 220 |\n",
"| total_timesteps | 71680 |\n",
"| train/ | |\n",
"| approx_kl | 0.06354737 |\n",
"| clip_fraction | 0.676 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -8.45 |\n",
"| explained_variance | -2.38e-07 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 573 |\n",
"| n_updates | 340 |\n",
"| policy_gradient_loss | -0.137 |\n",
"| value_loss | 1.17e+03 |\n",
2024-03-14 22:00:19 +00:00
"----------------------------------------\n",
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -122 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 325 |\n",
"| iterations | 36 |\n",
"| time_elapsed | 226 |\n",
"| total_timesteps | 73728 |\n",
2024-03-14 22:00:19 +00:00
"| train/ | |\n",
"| approx_kl | 0.061548397 |\n",
"| clip_fraction | 0.658 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -8.38 |\n",
"| explained_variance | -1.19e-07 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 572 |\n",
"| n_updates | 350 |\n",
"| policy_gradient_loss | -0.134 |\n",
"| value_loss | 1.12e+03 |\n",
"-----------------------------------------\n",
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -122 |\n",
"| time/ | |\n",
"| fps | 325 |\n",
"| iterations | 37 |\n",
"| time_elapsed | 232 |\n",
"| total_timesteps | 75776 |\n",
"| train/ | |\n",
"| approx_kl | 0.059452366 |\n",
"| clip_fraction | 0.651 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -8.33 |\n",
2024-03-14 22:00:19 +00:00
"| explained_variance | -2.38e-07 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 551 |\n",
"| n_updates | 360 |\n",
"| policy_gradient_loss | -0.133 |\n",
2024-03-14 22:00:19 +00:00
"| value_loss | 1.16e+03 |\n",
"-----------------------------------------\n",
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -122 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 325 |\n",
"| iterations | 38 |\n",
"| time_elapsed | 239 |\n",
"| total_timesteps | 77824 |\n",
2024-03-14 22:00:19 +00:00
"| train/ | |\n",
"| approx_kl | 0.06572275 |\n",
"| clip_fraction | 0.667 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -8.17 |\n",
"| explained_variance | -1.19e-07 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 554 |\n",
"| n_updates | 370 |\n",
"| policy_gradient_loss | -0.132 |\n",
2024-03-14 22:00:19 +00:00
"| value_loss | 1.15e+03 |\n",
"----------------------------------------\n",
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -125 |\n",
"| time/ | |\n",
"| fps | 324 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 39 |\n",
"| time_elapsed | 245 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 79872 |\n",
"| train/ | |\n",
"| approx_kl | 0.05422177 |\n",
"| clip_fraction | 0.637 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -7.93 |\n",
"| explained_variance | -1.19e-07 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 600 |\n",
2024-03-14 22:00:19 +00:00
"| n_updates | 380 |\n",
"| policy_gradient_loss | -0.127 |\n",
"| value_loss | 1.16e+03 |\n",
2024-03-14 22:00:19 +00:00
"----------------------------------------\n",
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -122 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 324 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 40 |\n",
"| time_elapsed | 252 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 81920 |\n",
"| train/ | |\n",
"| approx_kl | 0.05258019 |\n",
"| clip_fraction | 0.591 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -7.8 |\n",
"| explained_variance | -1.19e-07 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 682 |\n",
2024-03-14 22:00:19 +00:00
"| n_updates | 390 |\n",
"| policy_gradient_loss | -0.123 |\n",
"| value_loss | 1.15e+03 |\n",
2024-03-14 22:00:19 +00:00
"----------------------------------------\n",
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -123 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 324 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 41 |\n",
"| time_elapsed | 258 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 83968 |\n",
"| train/ | |\n",
"| approx_kl | 0.053135283 |\n",
"| clip_fraction | 0.574 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -7.49 |\n",
"| explained_variance | 0 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 560 |\n",
2024-03-14 22:00:19 +00:00
"| n_updates | 400 |\n",
"| policy_gradient_loss | -0.118 |\n",
"| value_loss | 1.16e+03 |\n",
2024-03-14 22:00:19 +00:00
"-----------------------------------------\n",
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -122 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 324 |\n",
"| iterations | 42 |\n",
"| time_elapsed | 265 |\n",
"| total_timesteps | 86016 |\n",
2024-03-14 22:00:19 +00:00
"| train/ | |\n",
"| approx_kl | 0.04523302 |\n",
"| clip_fraction | 0.543 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -7.3 |\n",
"| explained_variance | -2.38e-07 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 472 |\n",
"| n_updates | 410 |\n",
"| policy_gradient_loss | -0.105 |\n",
"| value_loss | 1.16e+03 |\n",
2024-03-14 22:00:19 +00:00
"----------------------------------------\n",
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -121 |\n",
"| time/ | |\n",
"| fps | 324 |\n",
"| iterations | 43 |\n",
"| time_elapsed | 271 |\n",
"| total_timesteps | 88064 |\n",
"| train/ | |\n",
"| approx_kl | 0.044511747 |\n",
"| clip_fraction | 0.487 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -7.08 |\n",
"| explained_variance | -2.38e-07 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 510 |\n",
"| n_updates | 420 |\n",
"| policy_gradient_loss | -0.101 |\n",
"| value_loss | 1.15e+03 |\n",
"-----------------------------------------\n",
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -122 |\n",
"| time/ | |\n",
"| fps | 324 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 44 |\n",
"| time_elapsed | 277 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 90112 |\n",
"| train/ | |\n",
"| approx_kl | 0.048598923 |\n",
"| clip_fraction | 0.489 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -6.8 |\n",
"| explained_variance | -1.19e-07 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 499 |\n",
2024-03-14 22:00:19 +00:00
"| n_updates | 430 |\n",
"| policy_gradient_loss | -0.096 |\n",
2024-03-14 22:00:19 +00:00
"| value_loss | 1.15e+03 |\n",
"-----------------------------------------\n",
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -124 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 324 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 45 |\n",
"| time_elapsed | 284 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 92160 |\n",
"| train/ | |\n",
"| approx_kl | 0.043928873 |\n",
"| clip_fraction | 0.514 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -6.63 |\n",
"| explained_variance | -2.38e-07 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 559 |\n",
2024-03-14 22:00:19 +00:00
"| n_updates | 440 |\n",
"| policy_gradient_loss | -0.0893 |\n",
"| value_loss | 1.13e+03 |\n",
2024-03-14 22:00:19 +00:00
"-----------------------------------------\n",
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -126 |\n",
"| time/ | |\n",
"| fps | 323 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 46 |\n",
"| time_elapsed | 290 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 94208 |\n",
"| train/ | |\n",
"| approx_kl | 0.053060684 |\n",
2024-03-14 22:00:19 +00:00
"| clip_fraction | 0.495 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -6.49 |\n",
"| explained_variance | 0 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 644 |\n",
2024-03-14 22:00:19 +00:00
"| n_updates | 450 |\n",
"| policy_gradient_loss | -0.0849 |\n",
"| value_loss | 1.16e+03 |\n",
2024-03-14 22:00:19 +00:00
"-----------------------------------------\n",
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -123 |\n",
"| time/ | |\n",
"| fps | 323 |\n",
"| iterations | 47 |\n",
"| time_elapsed | 297 |\n",
"| total_timesteps | 96256 |\n",
2024-03-14 22:00:19 +00:00
"| train/ | |\n",
"| approx_kl | 0.056993663 |\n",
"| clip_fraction | 0.587 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -6.33 |\n",
"| explained_variance | 0 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 608 |\n",
"| n_updates | 460 |\n",
"| policy_gradient_loss | -0.0832 |\n",
"| value_loss | 1.17e+03 |\n",
2024-03-14 22:00:19 +00:00
"-----------------------------------------\n",
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -126 |\n",
"| time/ | |\n",
"| fps | 323 |\n",
"| iterations | 48 |\n",
"| time_elapsed | 303 |\n",
"| total_timesteps | 98304 |\n",
"| train/ | |\n",
"| approx_kl | 0.05388363 |\n",
"| clip_fraction | 0.536 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -6.15 |\n",
"| explained_variance | 0 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 572 |\n",
"| n_updates | 470 |\n",
"| policy_gradient_loss | -0.0811 |\n",
"| value_loss | 1.15e+03 |\n",
"----------------------------------------\n",
2024-03-14 22:00:19 +00:00
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6 |\n",
"| ep_rew_mean | -125 |\n",
2024-03-14 22:00:19 +00:00
"| time/ | |\n",
"| fps | 323 |\n",
2024-03-14 22:00:19 +00:00
"| iterations | 49 |\n",
"| time_elapsed | 310 |\n",
2024-03-14 22:00:19 +00:00
"| total_timesteps | 100352 |\n",
"| train/ | |\n",
"| approx_kl | 0.039147377 |\n",
"| clip_fraction | 0.465 |\n",
2024-03-14 22:00:19 +00:00
"| clip_range | 0.2 |\n",
"| entropy_loss | -6.07 |\n",
"| explained_variance | 1.19e-07 |\n",
2024-03-14 22:00:19 +00:00
"| learning_rate | 0.0003 |\n",
"| loss | 523 |\n",
2024-03-14 22:00:19 +00:00
"| n_updates | 480 |\n",
"| policy_gradient_loss | -0.0778 |\n",
"| value_loss | 1.16e+03 |\n",
2024-03-14 22:00:19 +00:00
"-----------------------------------------\n"
]
2024-03-14 22:00:19 +00:00
},
{
"data": {
"text/plain": [
"<stable_baselines3.ppo.ppo.PPO at 0x7f2b51e133d0>"
2024-03-14 22:00:19 +00:00
]
},
"execution_count": 11,
2024-03-14 22:00:19 +00:00
"metadata": {},
"output_type": "execute_result"
}
],
2024-03-13 18:04:30 +00:00
"source": [
2024-03-14 22:00:19 +00:00
"total_timesteps = 100000\n",
"model = PPO(\"MlpPolicy\", env, verbose=1)\n",
"model.learn(total_timesteps=total_timesteps)"
2024-03-13 18:04:30 +00:00
]
},
{
"cell_type": "code",
"execution_count": 12,
2024-03-13 18:04:30 +00:00
"metadata": {},
"outputs": [],
"source": [
"model.save(\"dqn_wordle\")"
2024-03-13 18:04:30 +00:00
]
},
{
"cell_type": "code",
"execution_count": 13,
2024-03-13 18:04:30 +00:00
"metadata": {},
2024-03-13 20:57:23 +00:00
"outputs": [],
2024-03-13 18:04:30 +00:00
"source": [
"model = PPO.load(\"dqn_wordle\")"
2024-03-13 18:04:30 +00:00
]
},
{
"cell_type": "code",
"execution_count": 23,
2024-03-13 18:04:30 +00:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-14 22:00:19 +00:00
"0\n"
]
}
],
2024-03-13 20:57:23 +00:00
"source": [
"env = gym_wordle.wordle.WordleEnv()\n",
"\n",
"for i in range(1000):\n",
" \n",
" state = env.reset()\n",
"\n",
" done = False\n",
"\n",
" wins = 0\n",
"\n",
" while not done:\n",
"\n",
" action, _states = model.predict(state, deterministic=True)\n",
"\n",
" state, reward, done, info = env.step(action)\n",
"\n",
" if info[\"correct\"]:\n",
" wins += 1\n",
2024-03-14 22:00:19 +00:00
"\n",
"print(wins)\n"
2024-03-13 20:57:23 +00:00
]
2024-03-13 18:04:30 +00:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}