{ "cells": [ { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "import gym\n", "import gym_wordle\n", "from stable_baselines3 import DQN, PPO, common\n", "import numpy as np\n", "import tqdm" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">\n" ] } ], "source": [ "env = gym_wordle.wordle.WordleEnv()\n", "env = common.monitor.Monitor(env)\n", "\n", "print(env)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Using cuda device\n", "Wrapping the env in a DummyVecEnv.\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -126 |\n", "| time/ | |\n", "| fps | 455 |\n", "| iterations | 1 |\n", "| time_elapsed | 4 |\n", "| total_timesteps | 2048 |\n", "---------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -123 |\n", "| time/ | |\n", "| fps | 376 |\n", "| iterations | 2 |\n", "| time_elapsed | 10 |\n", "| total_timesteps | 4096 |\n", "| train/ | |\n", "| approx_kl | 0.006769434 |\n", "| clip_fraction | 0.0309 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.47 |\n", "| explained_variance | 0.00119 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.87e+03 |\n", "| n_updates | 10 |\n", "| policy_gradient_loss | -0.0533 |\n", "| value_loss | 5.21e+03 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -126 |\n", "| time/ | |\n", "| fps | 357 |\n", "| iterations | 3 |\n", "| time_elapsed | 17 |\n", "| total_timesteps | 6144 |\n", "| train/ | |\n", "| approx_kl | 0.00641025 |\n", "| clip_fraction | 0.0321 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.47 |\n", "| explained_variance | -0.0916 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.06e+03 |\n", "| n_updates | 20 |\n", "| policy_gradient_loss | -0.0489 |\n", "| value_loss | 4.36e+03 |\n", "----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -121 |\n", "| time/ | |\n", "| fps | 347 |\n", "| iterations | 4 |\n", "| time_elapsed | 23 |\n", "| total_timesteps | 8192 |\n", "| train/ | |\n", "| approx_kl | 0.0073487614 |\n", "| clip_fraction | 0.0466 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.47 |\n", "| explained_variance | -0.0298 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.81e+03 |\n", "| n_updates | 30 |\n", "| policy_gradient_loss | -0.0539 |\n", "| value_loss | 3.73e+03 |\n", "------------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -121 |\n", "| time/ | |\n", "| fps | 343 |\n", "| iterations | 5 |\n", "| time_elapsed | 29 |\n", "| total_timesteps | 10240 |\n", "| train/ | |\n", "| approx_kl | 0.00845159 |\n", "| clip_fraction | 0.068 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.47 |\n", "| explained_variance | -0.0105 |\n", "| learning_rate | 0.0003 |\n", "| loss | 864 |\n", "| n_updates | 40 |\n", "| policy_gradient_loss | -0.0601 |\n", "| value_loss | 2.99e+03 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -124 |\n", "| time/ | |\n", "| fps | 341 |\n", "| iterations | 6 |\n", "| time_elapsed | 35 |\n", "| total_timesteps | 12288 |\n", "| train/ | |\n", "| approx_kl | 0.009948943 |\n", "| clip_fraction | 0.0943 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.47 |\n", "| explained_variance | -0.00467 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.07e+03 |\n", "| n_updates | 50 |\n", "| policy_gradient_loss | -0.0664 |\n", "| value_loss | 2.52e+03 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -122 |\n", "| time/ | |\n", "| fps | 339 |\n", "| iterations | 7 |\n", "| time_elapsed | 42 |\n", "| total_timesteps | 14336 |\n", "| train/ | |\n", "| approx_kl | 0.011411648 |\n", "| clip_fraction | 0.121 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.47 |\n", "| explained_variance | -0.00249 |\n", "| learning_rate | 0.0003 |\n", "| loss | 903 |\n", "| n_updates | 60 |\n", "| policy_gradient_loss | -0.0719 |\n", "| value_loss | 2.2e+03 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -123 |\n", "| time/ | |\n", "| fps | 338 |\n", "| iterations | 8 |\n", "| time_elapsed | 48 |\n", "| total_timesteps | 16384 |\n", "| train/ | |\n", "| approx_kl | 0.01300336 |\n", "| clip_fraction | 0.159 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.47 |\n", "| explained_variance | -0.00149 |\n", "| learning_rate | 0.0003 |\n", "| loss | 839 |\n", "| n_updates | 70 |\n", "| policy_gradient_loss | -0.0779 |\n", "| value_loss | 1.88e+03 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -124 |\n", "| time/ | |\n", "| fps | 336 |\n", "| iterations | 9 |\n", "| time_elapsed | 54 |\n", "| total_timesteps | 18432 |\n", "| train/ | |\n", "| approx_kl | 0.015219824 |\n", "| clip_fraction | 0.211 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.47 |\n", "| explained_variance | -0.000915 |\n", "| learning_rate | 0.0003 |\n", "| loss | 770 |\n", "| n_updates | 80 |\n", "| policy_gradient_loss | -0.0854 |\n", "| value_loss | 1.61e+03 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -128 |\n", "| time/ | |\n", "| fps | 335 |\n", "| iterations | 10 |\n", "| time_elapsed | 61 |\n", "| total_timesteps | 20480 |\n", "| train/ | |\n", "| approx_kl | 0.017209966 |\n", "| clip_fraction | 0.27 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.47 |\n", "| explained_variance | -0.000583 |\n", "| learning_rate | 0.0003 |\n", "| loss | 689 |\n", "| n_updates | 90 |\n", "| policy_gradient_loss | -0.0912 |\n", "| value_loss | 1.43e+03 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -126 |\n", "| time/ | |\n", "| fps | 335 |\n", "| iterations | 11 |\n", "| time_elapsed | 67 |\n", "| total_timesteps | 22528 |\n", "| train/ | |\n", "| approx_kl | 0.020546965 |\n", "| clip_fraction | 0.348 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.46 |\n", "| explained_variance | -0.000374 |\n", "| learning_rate | 0.0003 |\n", "| loss | 605 |\n", "| n_updates | 100 |\n", "| policy_gradient_loss | -0.0989 |\n", "| value_loss | 1.27e+03 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -126 |\n", "| time/ | |\n", "| fps | 334 |\n", "| iterations | 12 |\n", "| time_elapsed | 73 |\n", "| total_timesteps | 24576 |\n", "| train/ | |\n", "| approx_kl | 0.03119991 |\n", "| clip_fraction | 0.478 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.46 |\n", "| explained_variance | -0.000229 |\n", "| learning_rate | 0.0003 |\n", "| loss | 510 |\n", "| n_updates | 110 |\n", "| policy_gradient_loss | -0.109 |\n", "| value_loss | 1.17e+03 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -123 |\n", "| time/ | |\n", "| fps | 334 |\n", "| iterations | 13 |\n", "| time_elapsed | 79 |\n", "| total_timesteps | 26624 |\n", "| train/ | |\n", "| approx_kl | 0.0502273 |\n", "| clip_fraction | 0.605 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.46 |\n", "| explained_variance | -0.000101 |\n", "| learning_rate | 0.0003 |\n", "| loss | 582 |\n", "| n_updates | 120 |\n", "| policy_gradient_loss | -0.127 |\n", "| value_loss | 1.13e+03 |\n", "---------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -121 |\n", "| time/ | |\n", "| fps | 334 |\n", "| iterations | 14 |\n", "| time_elapsed | 85 |\n", "| total_timesteps | 28672 |\n", "| train/ | |\n", "| approx_kl | 0.060225103 |\n", "| clip_fraction | 0.736 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.45 |\n", "| explained_variance | -3.97e-05 |\n", "| learning_rate | 0.0003 |\n", "| loss | 530 |\n", "| n_updates | 130 |\n", "| policy_gradient_loss | -0.142 |\n", "| value_loss | 1.13e+03 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -123 |\n", "| time/ | |\n", "| fps | 332 |\n", "| iterations | 15 |\n", "| time_elapsed | 92 |\n", "| total_timesteps | 30720 |\n", "| train/ | |\n", "| approx_kl | 0.057931915 |\n", "| clip_fraction | 0.743 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.45 |\n", "| explained_variance | -2e-05 |\n", "| learning_rate | 0.0003 |\n", "| loss | 571 |\n", "| n_updates | 140 |\n", "| policy_gradient_loss | -0.144 |\n", "| value_loss | 1.14e+03 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -122 |\n", "| time/ | |\n", "| fps | 329 |\n", "| iterations | 16 |\n", "| time_elapsed | 99 |\n", "| total_timesteps | 32768 |\n", "| train/ | |\n", "| approx_kl | 0.06145256 |\n", "| clip_fraction | 0.737 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.44 |\n", "| explained_variance | -1.12e-05 |\n", "| learning_rate | 0.0003 |\n", "| loss | 612 |\n", "| n_updates | 150 |\n", "| policy_gradient_loss | -0.143 |\n", "| value_loss | 1.14e+03 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -120 |\n", "| time/ | |\n", "| fps | 328 |\n", "| iterations | 17 |\n", "| time_elapsed | 106 |\n", "| total_timesteps | 34816 |\n", "| train/ | |\n", "| approx_kl | 0.062183782 |\n", "| clip_fraction | 0.731 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.43 |\n", "| explained_variance | -7.63e-06 |\n", "| learning_rate | 0.0003 |\n", "| loss | 560 |\n", "| n_updates | 160 |\n", "| policy_gradient_loss | -0.142 |\n", "| value_loss | 1.14e+03 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -121 |\n", "| time/ | |\n", "| fps | 326 |\n", "| iterations | 18 |\n", "| time_elapsed | 112 |\n", "| total_timesteps | 36864 |\n", "| train/ | |\n", "| approx_kl | 0.06656339 |\n", "| clip_fraction | 0.748 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.43 |\n", "| explained_variance | -5.48e-06 |\n", "| learning_rate | 0.0003 |\n", "| loss | 605 |\n", "| n_updates | 170 |\n", "| policy_gradient_loss | -0.145 |\n", "| value_loss | 1.12e+03 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -124 |\n", "| time/ | |\n", "| fps | 326 |\n", "| iterations | 19 |\n", "| time_elapsed | 119 |\n", "| total_timesteps | 38912 |\n", "| train/ | |\n", "| approx_kl | 0.07115179 |\n", "| clip_fraction | 0.767 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.42 |\n", "| explained_variance | -3.93e-06 |\n", "| learning_rate | 0.0003 |\n", "| loss | 561 |\n", "| n_updates | 180 |\n", "| policy_gradient_loss | -0.146 |\n", "| value_loss | 1.13e+03 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -123 |\n", "| time/ | |\n", "| fps | 326 |\n", "| iterations | 20 |\n", "| time_elapsed | 125 |\n", "| total_timesteps | 40960 |\n", "| train/ | |\n", "| approx_kl | 0.07023676 |\n", "| clip_fraction | 0.74 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.41 |\n", "| explained_variance | -2.86e-06 |\n", "| learning_rate | 0.0003 |\n", "| loss | 590 |\n", "| n_updates | 190 |\n", "| policy_gradient_loss | -0.144 |\n", "| value_loss | 1.17e+03 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -124 |\n", "| time/ | |\n", "| fps | 326 |\n", "| iterations | 21 |\n", "| time_elapsed | 131 |\n", "| total_timesteps | 43008 |\n", "| train/ | |\n", "| approx_kl | 0.0665413 |\n", "| clip_fraction | 0.746 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.4 |\n", "| explained_variance | -2.15e-06 |\n", "| learning_rate | 0.0003 |\n", "| loss | 550 |\n", "| n_updates | 200 |\n", "| policy_gradient_loss | -0.145 |\n", "| value_loss | 1.16e+03 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -124 |\n", "| time/ | |\n", "| fps | 325 |\n", "| iterations | 22 |\n", "| time_elapsed | 138 |\n", "| total_timesteps | 45056 |\n", "| train/ | |\n", "| approx_kl | 0.08091866 |\n", "| clip_fraction | 0.745 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.38 |\n", "| explained_variance | -1.91e-06 |\n", "| learning_rate | 0.0003 |\n", "| loss | 594 |\n", "| n_updates | 210 |\n", "| policy_gradient_loss | -0.144 |\n", "| value_loss | 1.15e+03 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -125 |\n", "| time/ | |\n", "| fps | 325 |\n", "| iterations | 23 |\n", "| time_elapsed | 144 |\n", "| total_timesteps | 47104 |\n", "| train/ | |\n", "| approx_kl | 0.070498824 |\n", "| clip_fraction | 0.734 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.36 |\n", "| explained_variance | -1.19e-06 |\n", "| learning_rate | 0.0003 |\n", "| loss | 573 |\n", "| n_updates | 220 |\n", "| policy_gradient_loss | -0.144 |\n", "| value_loss | 1.18e+03 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -123 |\n", "| time/ | |\n", "| fps | 325 |\n", "| iterations | 24 |\n", "| time_elapsed | 151 |\n", "| total_timesteps | 49152 |\n", "| train/ | |\n", "| approx_kl | 0.06726791 |\n", "| clip_fraction | 0.728 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.34 |\n", "| explained_variance | -1.07e-06 |\n", "| learning_rate | 0.0003 |\n", "| loss | 564 |\n", "| n_updates | 230 |\n", "| policy_gradient_loss | -0.144 |\n", "| value_loss | 1.18e+03 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -126 |\n", "| time/ | |\n", "| fps | 324 |\n", "| iterations | 25 |\n", "| time_elapsed | 157 |\n", "| total_timesteps | 51200 |\n", "| train/ | |\n", "| approx_kl | 0.0721001 |\n", "| clip_fraction | 0.727 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.31 |\n", "| explained_variance | -8.34e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 579 |\n", "| n_updates | 240 |\n", "| policy_gradient_loss | -0.143 |\n", "| value_loss | 1.14e+03 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -124 |\n", "| time/ | |\n", "| fps | 324 |\n", "| iterations | 26 |\n", "| time_elapsed | 164 |\n", "| total_timesteps | 53248 |\n", "| train/ | |\n", "| approx_kl | 0.08537817 |\n", "| clip_fraction | 0.767 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.27 |\n", "| explained_variance | -8.34e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 648 |\n", "| n_updates | 250 |\n", "| policy_gradient_loss | -0.145 |\n", "| value_loss | 1.16e+03 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -123 |\n", "| time/ | |\n", "| fps | 324 |\n", "| iterations | 27 |\n", "| time_elapsed | 170 |\n", "| total_timesteps | 55296 |\n", "| train/ | |\n", "| approx_kl | 0.07838201 |\n", "| clip_fraction | 0.757 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.21 |\n", "| explained_variance | -5.96e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 580 |\n", "| n_updates | 260 |\n", "| policy_gradient_loss | -0.144 |\n", "| value_loss | 1.14e+03 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -123 |\n", "| time/ | |\n", "| fps | 324 |\n", "| iterations | 28 |\n", "| time_elapsed | 176 |\n", "| total_timesteps | 57344 |\n", "| train/ | |\n", "| approx_kl | 0.08116107 |\n", "| clip_fraction | 0.748 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -9.09 |\n", "| explained_variance | -4.77e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 602 |\n", "| n_updates | 270 |\n", "| policy_gradient_loss | -0.144 |\n", "| value_loss | 1.15e+03 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -118 |\n", "| time/ | |\n", "| fps | 324 |\n", "| iterations | 29 |\n", "| time_elapsed | 183 |\n", "| total_timesteps | 59392 |\n", "| train/ | |\n", "| approx_kl | 0.085108414 |\n", "| clip_fraction | 0.741 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -8.87 |\n", "| explained_variance | -4.77e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 562 |\n", "| n_updates | 280 |\n", "| policy_gradient_loss | -0.142 |\n", "| value_loss | 1.18e+03 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -124 |\n", "| time/ | |\n", "| fps | 323 |\n", "| iterations | 30 |\n", "| time_elapsed | 189 |\n", "| total_timesteps | 61440 |\n", "| train/ | |\n", "| approx_kl | 0.066152625 |\n", "| clip_fraction | 0.722 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -8.75 |\n", "| explained_variance | -4.77e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 568 |\n", "| n_updates | 290 |\n", "| policy_gradient_loss | -0.138 |\n", "| value_loss | 1.14e+03 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -120 |\n", "| time/ | |\n", "| fps | 324 |\n", "| iterations | 31 |\n", "| time_elapsed | 195 |\n", "| total_timesteps | 63488 |\n", "| train/ | |\n", "| approx_kl | 0.06854295 |\n", "| clip_fraction | 0.7 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -8.73 |\n", "| explained_variance | -4.77e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 577 |\n", "| n_updates | 300 |\n", "| policy_gradient_loss | -0.139 |\n", "| value_loss | 1.14e+03 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -124 |\n", "| time/ | |\n", "| fps | 324 |\n", "| iterations | 32 |\n", "| time_elapsed | 201 |\n", "| total_timesteps | 65536 |\n", "| train/ | |\n", "| approx_kl | 0.07200403 |\n", "| clip_fraction | 0.702 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -8.65 |\n", "| explained_variance | -4.77e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 570 |\n", "| n_updates | 310 |\n", "| policy_gradient_loss | -0.134 |\n", "| value_loss | 1.15e+03 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -123 |\n", "| time/ | |\n", "| fps | 324 |\n", "| iterations | 33 |\n", "| time_elapsed | 208 |\n", "| total_timesteps | 67584 |\n", "| train/ | |\n", "| approx_kl | 0.07691643 |\n", "| clip_fraction | 0.692 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -8.64 |\n", "| explained_variance | -2.38e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 618 |\n", "| n_updates | 320 |\n", "| policy_gradient_loss | -0.137 |\n", "| value_loss | 1.16e+03 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -122 |\n", "| time/ | |\n", "| fps | 325 |\n", "| iterations | 34 |\n", "| time_elapsed | 214 |\n", "| total_timesteps | 69632 |\n", "| train/ | |\n", "| approx_kl | 0.07179158 |\n", "| clip_fraction | 0.69 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -8.56 |\n", "| explained_variance | -2.38e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 684 |\n", "| n_updates | 330 |\n", "| policy_gradient_loss | -0.139 |\n", "| value_loss | 1.15e+03 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -120 |\n", "| time/ | |\n", "| fps | 325 |\n", "| iterations | 35 |\n", "| time_elapsed | 220 |\n", "| total_timesteps | 71680 |\n", "| train/ | |\n", "| approx_kl | 0.06354737 |\n", "| clip_fraction | 0.676 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -8.45 |\n", "| explained_variance | -2.38e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 573 |\n", "| n_updates | 340 |\n", "| policy_gradient_loss | -0.137 |\n", "| value_loss | 1.17e+03 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -122 |\n", "| time/ | |\n", "| fps | 325 |\n", "| iterations | 36 |\n", "| time_elapsed | 226 |\n", "| total_timesteps | 73728 |\n", "| train/ | |\n", "| approx_kl | 0.061548397 |\n", "| clip_fraction | 0.658 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -8.38 |\n", "| explained_variance | -1.19e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 572 |\n", "| n_updates | 350 |\n", "| policy_gradient_loss | -0.134 |\n", "| value_loss | 1.12e+03 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -122 |\n", "| time/ | |\n", "| fps | 325 |\n", "| iterations | 37 |\n", "| time_elapsed | 232 |\n", "| total_timesteps | 75776 |\n", "| train/ | |\n", "| approx_kl | 0.059452366 |\n", "| clip_fraction | 0.651 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -8.33 |\n", "| explained_variance | -2.38e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 551 |\n", "| n_updates | 360 |\n", "| policy_gradient_loss | -0.133 |\n", "| value_loss | 1.16e+03 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -122 |\n", "| time/ | |\n", "| fps | 325 |\n", "| iterations | 38 |\n", "| time_elapsed | 239 |\n", "| total_timesteps | 77824 |\n", "| train/ | |\n", "| approx_kl | 0.06572275 |\n", "| clip_fraction | 0.667 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -8.17 |\n", "| explained_variance | -1.19e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 554 |\n", "| n_updates | 370 |\n", "| policy_gradient_loss | -0.132 |\n", "| value_loss | 1.15e+03 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -125 |\n", "| time/ | |\n", "| fps | 324 |\n", "| iterations | 39 |\n", "| time_elapsed | 245 |\n", "| total_timesteps | 79872 |\n", "| train/ | |\n", "| approx_kl | 0.05422177 |\n", "| clip_fraction | 0.637 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -7.93 |\n", "| explained_variance | -1.19e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 600 |\n", "| n_updates | 380 |\n", "| policy_gradient_loss | -0.127 |\n", "| value_loss | 1.16e+03 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -122 |\n", "| time/ | |\n", "| fps | 324 |\n", "| iterations | 40 |\n", "| time_elapsed | 252 |\n", "| total_timesteps | 81920 |\n", "| train/ | |\n", "| approx_kl | 0.05258019 |\n", "| clip_fraction | 0.591 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -7.8 |\n", "| explained_variance | -1.19e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 682 |\n", "| n_updates | 390 |\n", "| policy_gradient_loss | -0.123 |\n", "| value_loss | 1.15e+03 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -123 |\n", "| time/ | |\n", "| fps | 324 |\n", "| iterations | 41 |\n", "| time_elapsed | 258 |\n", "| total_timesteps | 83968 |\n", "| train/ | |\n", "| approx_kl | 0.053135283 |\n", "| clip_fraction | 0.574 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -7.49 |\n", "| explained_variance | 0 |\n", "| learning_rate | 0.0003 |\n", "| loss | 560 |\n", "| n_updates | 400 |\n", "| policy_gradient_loss | -0.118 |\n", "| value_loss | 1.16e+03 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -122 |\n", "| time/ | |\n", "| fps | 324 |\n", "| iterations | 42 |\n", "| time_elapsed | 265 |\n", "| total_timesteps | 86016 |\n", "| train/ | |\n", "| approx_kl | 0.04523302 |\n", "| clip_fraction | 0.543 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -7.3 |\n", "| explained_variance | -2.38e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 472 |\n", "| n_updates | 410 |\n", "| policy_gradient_loss | -0.105 |\n", "| value_loss | 1.16e+03 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -121 |\n", "| time/ | |\n", "| fps | 324 |\n", "| iterations | 43 |\n", "| time_elapsed | 271 |\n", "| total_timesteps | 88064 |\n", "| train/ | |\n", "| approx_kl | 0.044511747 |\n", "| clip_fraction | 0.487 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -7.08 |\n", "| explained_variance | -2.38e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 510 |\n", "| n_updates | 420 |\n", "| policy_gradient_loss | -0.101 |\n", "| value_loss | 1.15e+03 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -122 |\n", "| time/ | |\n", "| fps | 324 |\n", "| iterations | 44 |\n", "| time_elapsed | 277 |\n", "| total_timesteps | 90112 |\n", "| train/ | |\n", "| approx_kl | 0.048598923 |\n", "| clip_fraction | 0.489 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -6.8 |\n", "| explained_variance | -1.19e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 499 |\n", "| n_updates | 430 |\n", "| policy_gradient_loss | -0.096 |\n", "| value_loss | 1.15e+03 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -124 |\n", "| time/ | |\n", "| fps | 324 |\n", "| iterations | 45 |\n", "| time_elapsed | 284 |\n", "| total_timesteps | 92160 |\n", "| train/ | |\n", "| approx_kl | 0.043928873 |\n", "| clip_fraction | 0.514 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -6.63 |\n", "| explained_variance | -2.38e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 559 |\n", "| n_updates | 440 |\n", "| policy_gradient_loss | -0.0893 |\n", "| value_loss | 1.13e+03 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -126 |\n", "| time/ | |\n", "| fps | 323 |\n", "| iterations | 46 |\n", "| time_elapsed | 290 |\n", "| total_timesteps | 94208 |\n", "| train/ | |\n", "| approx_kl | 0.053060684 |\n", "| clip_fraction | 0.495 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -6.49 |\n", "| explained_variance | 0 |\n", "| learning_rate | 0.0003 |\n", "| loss | 644 |\n", "| n_updates | 450 |\n", "| policy_gradient_loss | -0.0849 |\n", "| value_loss | 1.16e+03 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -123 |\n", "| time/ | |\n", "| fps | 323 |\n", "| iterations | 47 |\n", "| time_elapsed | 297 |\n", "| total_timesteps | 96256 |\n", "| train/ | |\n", "| approx_kl | 0.056993663 |\n", "| clip_fraction | 0.587 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -6.33 |\n", "| explained_variance | 0 |\n", "| learning_rate | 0.0003 |\n", "| loss | 608 |\n", "| n_updates | 460 |\n", "| policy_gradient_loss | -0.0832 |\n", "| value_loss | 1.17e+03 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -126 |\n", "| time/ | |\n", "| fps | 323 |\n", "| iterations | 48 |\n", "| time_elapsed | 303 |\n", "| total_timesteps | 98304 |\n", "| train/ | |\n", "| approx_kl | 0.05388363 |\n", "| clip_fraction | 0.536 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -6.15 |\n", "| explained_variance | 0 |\n", "| learning_rate | 0.0003 |\n", "| loss | 572 |\n", "| n_updates | 470 |\n", "| policy_gradient_loss | -0.0811 |\n", "| value_loss | 1.15e+03 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6 |\n", "| ep_rew_mean | -125 |\n", "| time/ | |\n", "| fps | 323 |\n", "| iterations | 49 |\n", "| time_elapsed | 310 |\n", "| total_timesteps | 100352 |\n", "| train/ | |\n", "| approx_kl | 0.039147377 |\n", "| clip_fraction | 0.465 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -6.07 |\n", "| explained_variance | 1.19e-07 |\n", "| learning_rate | 0.0003 |\n", "| loss | 523 |\n", "| n_updates | 480 |\n", "| policy_gradient_loss | -0.0778 |\n", "| value_loss | 1.16e+03 |\n", "-----------------------------------------\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_timesteps = 100000\n", "model = PPO(\"MlpPolicy\", env, verbose=1)\n", "model.learn(total_timesteps=total_timesteps)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "model.save(\"dqn_wordle\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "model = PPO.load(\"dqn_wordle\")" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n" ] } ], "source": [ "env = gym_wordle.wordle.WordleEnv()\n", "\n", "for i in range(1000):\n", " \n", " state = env.reset()\n", "\n", " done = False\n", "\n", " wins = 0\n", "\n", " while not done:\n", "\n", " action, _states = model.predict(state, deterministic=True)\n", "\n", " state, reward, done, info = env.step(action)\n", "\n", " if info[\"correct\"]:\n", " wins += 1\n", "\n", "print(wins)\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 2 }