cse151b-final-project/dqn_wordle.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import gym_wordle\n",
    "from stable_baselines3 import DQN, PPO, common\n",
    "import numpy as np\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<Monitor<WordleEnv instance>>\n"
     ]
    }
   ],
   "source": [
    "env = gym_wordle.wordle.WordleEnv()\n",
    "env = common.monitor.Monitor(env)\n",
    "\n",
    "print(env)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using cuda device\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 6        |\n",
      "|    ep_rew_mean     | 2.14     |\n",
      "| time/              |          |\n",
      "|    fps             | 750      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 2        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 6           |\n",
      "|    ep_rew_mean          | 4.59        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 625         |\n",
      "|    iterations           | 2           |\n",
      "|    time_elapsed         | 6           |\n",
      "|    total_timesteps      | 4096        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.022059526 |\n",
      "|    clip_fraction        | 0.331       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -9.47       |\n",
      "|    explained_variance   | -0.0118     |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 130         |\n",
      "|    n_updates            | 10          |\n",
      "|    policy_gradient_loss | -0.0851     |\n",
      "|    value_loss           | 253         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 6           |\n",
      "|    ep_rew_mean          | 5.86        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 585         |\n",
      "|    iterations           | 3           |\n",
      "|    time_elapsed         | 10          |\n",
      "|    total_timesteps      | 6144        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.024416003 |\n",
      "|    clip_fraction        | 0.462       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -9.47       |\n",
      "|    explained_variance   | 0.152       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 85.2        |\n",
      "|    n_updates            | 20          |\n",
      "|    policy_gradient_loss | -0.0987     |\n",
      "|    value_loss           | 218         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 6           |\n",
      "|    ep_rew_mean          | 4.75        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 566         |\n",
      "|    iterations           | 4           |\n",
      "|    time_elapsed         | 14          |\n",
      "|    total_timesteps      | 8192        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.026305672 |\n",
      "|    clip_fraction        | 0.45        |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -9.47       |\n",
      "|    explained_variance   | 0.161       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 144         |\n",
      "|    n_updates            | 30          |\n",
      "|    policy_gradient_loss | -0.105      |\n",
      "|    value_loss           | 220         |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 6          |\n",
      "|    ep_rew_mean          | 1.47       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 554        |\n",
      "|    iterations           | 5          |\n",
      "|    time_elapsed         | 18         |\n",
      "|    total_timesteps      | 10240      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.02928267 |\n",
      "|    clip_fraction        | 0.498      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -9.46      |\n",
      "|    explained_variance   | 0.167      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 127        |\n",
      "|    n_updates            | 40         |\n",
      "|    policy_gradient_loss | -0.116     |\n",
      "|    value_loss           | 207        |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 6           |\n",
      "|    ep_rew_mean          | 1.62        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 546         |\n",
      "|    iterations           | 6           |\n",
      "|    time_elapsed         | 22          |\n",
      "|    total_timesteps      | 12288       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.028425258 |\n",
      "|    clip_fraction        | 0.483       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -9.46       |\n",
      "|    explained_variance   | 0.143       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 109         |\n",
      "|    n_updates            | 50          |\n",
      "|    policy_gradient_loss | -0.117      |\n",
      "|    value_loss           | 240         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 5.98        |\n",
      "|    ep_rew_mean          | 6.14        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 541         |\n",
      "|    iterations           | 7           |\n",
      "|    time_elapsed         | 26          |\n",
      "|    total_timesteps      | 14336       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.026178032 |\n",
      "|    clip_fraction        | 0.453       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -9.46       |\n",
      "|    explained_variance   | 0.174       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 141         |\n",
      "|    n_updates            | 60          |\n",
      "|    policy_gradient_loss | -0.116      |\n",
      "|    value_loss           | 235         |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 6          |\n",
      "|    ep_rew_mean          | 3.03       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 537        |\n",
      "|    iterations           | 8          |\n",
      "|    time_elapsed         | 30         |\n",
      "|    total_timesteps      | 16384      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.02457074 |\n",
      "|    clip_fraction        | 0.423      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -9.45      |\n",
      "|    explained_variance   | 0.171      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 111        |\n",
      "|    n_updates            | 70         |\n",
      "|    policy_gradient_loss | -0.112     |\n",
      "|    value_loss           | 212        |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 6           |\n",
      "|    ep_rew_mean          | 9.54        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 532         |\n",
      "|    iterations           | 9           |\n",
      "|    time_elapsed         | 34          |\n",
      "|    total_timesteps      | 18432       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.024578478 |\n",
      "|    clip_fraction        | 0.417       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -9.45       |\n",
      "|    explained_variance   | 0.178       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 121         |\n",
      "|    n_updates            | 80          |\n",
      "|    policy_gradient_loss | -0.114      |\n",
      "|    value_loss           | 232         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 6           |\n",
      "|    ep_rew_mean          | 3.81        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 527         |\n",
      "|    iterations           | 10          |\n",
      "|    time_elapsed         | 38          |\n",
      "|    total_timesteps      | 20480       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.022704324 |\n",
      "|    clip_fraction        | 0.379       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -9.45       |\n",
      "|    explained_variance   | 0.194       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 108         |\n",
      "|    n_updates            | 90          |\n",
      "|    policy_gradient_loss | -0.112      |\n",
      "|    value_loss           | 216         |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<stable_baselines3.ppo.ppo.PPO at 0x7f86ef4ddcd0>"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "total_timesteps = 20_000\n",
    "model = PPO(\"MlpPolicy\", env, verbose=1, device='cuda')\n",
    "model.learn(total_timesteps=total_timesteps)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save(\"dqn_wordle\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = PPO.load(\"dqn_wordle\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1000/1000 [00:03<00:00, 252.17it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 7 18  1 19 16  3  3  3  2  3]\n",
      " [16  9  5 14  4  3  3  3  3  3]\n",
      " [16  9  5 14  4  3  3  3  3  3]\n",
      " [16  9  5 14  4  3  3  3  3  3]\n",
      " [ 7 18  1 19 16  3  3  3  2  3]\n",
      " [ 7 18  1 19 16  3  3  3  2  3]] -54 {'correct': False, 'guesses': defaultdict(<class 'int'>, {'grasp': 3, 'piend': 3})}\n",
      "0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "env = gym_wordle.wordle.WordleEnv()\n",
    "\n",
    "for i in tqdm(range(1000)):\n",
    "        \n",
    "    state, info = env.reset()\n",
    "\n",
    "    done = False\n",
    "\n",
    "    wins = 0\n",
    "\n",
    "    while not done:\n",
    "\n",
    "        action, _states = model.predict(state, deterministic=True)\n",
    "\n",
    "        state, reward, done, truncated, info = env.step(action)\n",
    "\n",
    "    if info[\"correct\"]:\n",
    "        wins += 1\n",
    "\n",
    "print(state, reward, info)\n",
    "\n",
    "print(wins)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
switch to notebook 2024-03-13 18:04:30 +00:00			`{`
			`"cells": [`
			`{`
			`"cell_type": "code",`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`"execution_count": 1,`
switch to notebook 2024-03-13 18:04:30 +00:00			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"import gym_wordle\n",`
copy the wordle env locally and fix the obs return 2024-03-14 21:49:17 +00:00			`"from stable_baselines3 import DQN, PPO, common\n",`
minor changes 2024-03-13 20:57:23 +00:00			`"import numpy as np\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"from tqdm import tqdm"`
switch to notebook 2024-03-13 18:04:30 +00:00			`]`
			`},`
			`{`
			`"cell_type": "code",`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`"execution_count": 2,`
switch to notebook 2024-03-13 18:04:30 +00:00			`"metadata": {},`
copy the wordle env locally and fix the obs return 2024-03-14 21:49:17 +00:00			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"<Monitor<WordleEnv instance>>\n"`
			`]`
			`}`
			`],`
switch to notebook 2024-03-13 18:04:30 +00:00			`"source": [`
copy the wordle env locally and fix the obs return 2024-03-14 21:49:17 +00:00			`"env = gym_wordle.wordle.WordleEnv()\n",`
			`"env = common.monitor.Monitor(env)\n",`
switch to notebook 2024-03-13 18:04:30 +00:00			`"\n",`
			`"print(env)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`"execution_count": 3,`
switch to notebook 2024-03-13 18:04:30 +00:00			`"metadata": {},`
copy the wordle env locally and fix the obs return 2024-03-14 21:49:17 +00:00			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"Using cuda device\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"Wrapping the env in a DummyVecEnv.\n",`
			`"---------------------------------\n",`
			`"\| rollout/ \| \|\n",`
			`"\| ep_len_mean \| 6 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| ep_rew_mean \| 2.14 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| time/ \| \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| fps \| 750 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| iterations \| 1 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| time_elapsed \| 2 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| total_timesteps \| 2048 \|\n",`
			`"---------------------------------\n",`
			`"-----------------------------------------\n",`
			`"\| rollout/ \| \|\n",`
			`"\| ep_len_mean \| 6 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| ep_rew_mean \| 4.59 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| time/ \| \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| fps \| 625 \|\n",`
upload wordle env, fix indexing issue in wordle env, attempt to improve reward (no improvement) 2024-03-14 23:47:11 +00:00			`"\| iterations \| 2 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| time_elapsed \| 6 \|\n",`
upload wordle env, fix indexing issue in wordle env, attempt to improve reward (no improvement) 2024-03-14 23:47:11 +00:00			`"\| total_timesteps \| 4096 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| train/ \| \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| approx_kl \| 0.022059526 \|\n",`
			`"\| clip_fraction \| 0.331 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| clip_range \| 0.2 \|\n",`
			`"\| entropy_loss \| -9.47 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| explained_variance \| -0.0118 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| learning_rate \| 0.0003 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| loss \| 130 \|\n",`
upload wordle env, fix indexing issue in wordle env, attempt to improve reward (no improvement) 2024-03-14 23:47:11 +00:00			`"\| n_updates \| 10 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| policy_gradient_loss \| -0.0851 \|\n",`
			`"\| value_loss \| 253 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"-----------------------------------------\n",`
			`"-----------------------------------------\n",`
			`"\| rollout/ \| \|\n",`
			`"\| ep_len_mean \| 6 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| ep_rew_mean \| 5.86 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| time/ \| \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| fps \| 585 \|\n",`
			`"\| iterations \| 3 \|\n",`
			`"\| time_elapsed \| 10 \|\n",`
			`"\| total_timesteps \| 6144 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| train/ \| \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| approx_kl \| 0.024416003 \|\n",`
			`"\| clip_fraction \| 0.462 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| clip_range \| 0.2 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| entropy_loss \| -9.47 \|\n",`
			`"\| explained_variance \| 0.152 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| learning_rate \| 0.0003 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| loss \| 85.2 \|\n",`
			`"\| n_updates \| 20 \|\n",`
			`"\| policy_gradient_loss \| -0.0987 \|\n",`
			`"\| value_loss \| 218 \|\n",`
			`"-----------------------------------------\n",`
			`"-----------------------------------------\n",`
			`"\| rollout/ \| \|\n",`
			`"\| ep_len_mean \| 6 \|\n",`
			`"\| ep_rew_mean \| 4.75 \|\n",`
			`"\| time/ \| \|\n",`
			`"\| fps \| 566 \|\n",`
			`"\| iterations \| 4 \|\n",`
			`"\| time_elapsed \| 14 \|\n",`
			`"\| total_timesteps \| 8192 \|\n",`
			`"\| train/ \| \|\n",`
			`"\| approx_kl \| 0.026305672 \|\n",`
			`"\| clip_fraction \| 0.45 \|\n",`
			`"\| clip_range \| 0.2 \|\n",`
			`"\| entropy_loss \| -9.47 \|\n",`
			`"\| explained_variance \| 0.161 \|\n",`
			`"\| learning_rate \| 0.0003 \|\n",`
			`"\| loss \| 144 \|\n",`
			`"\| n_updates \| 30 \|\n",`
			`"\| policy_gradient_loss \| -0.105 \|\n",`
			`"\| value_loss \| 220 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"-----------------------------------------\n",`
			`"----------------------------------------\n",`
			`"\| rollout/ \| \|\n",`
			`"\| ep_len_mean \| 6 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| ep_rew_mean \| 1.47 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| time/ \| \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| fps \| 554 \|\n",`
			`"\| iterations \| 5 \|\n",`
			`"\| time_elapsed \| 18 \|\n",`
			`"\| total_timesteps \| 10240 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| train/ \| \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| approx_kl \| 0.02928267 \|\n",`
			`"\| clip_fraction \| 0.498 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| clip_range \| 0.2 \|\n",`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`"\| entropy_loss \| -9.46 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| explained_variance \| 0.167 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| learning_rate \| 0.0003 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| loss \| 127 \|\n",`
			`"\| n_updates \| 40 \|\n",`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`"\| policy_gradient_loss \| -0.116 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| value_loss \| 207 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"----------------------------------------\n",`
upload wordle env, fix indexing issue in wordle env, attempt to improve reward (no improvement) 2024-03-14 23:47:11 +00:00			`"-----------------------------------------\n",`
			`"\| rollout/ \| \|\n",`
			`"\| ep_len_mean \| 6 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| ep_rew_mean \| 1.62 \|\n",`
upload wordle env, fix indexing issue in wordle env, attempt to improve reward (no improvement) 2024-03-14 23:47:11 +00:00			`"\| time/ \| \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| fps \| 546 \|\n",`
			`"\| iterations \| 6 \|\n",`
			`"\| time_elapsed \| 22 \|\n",`
			`"\| total_timesteps \| 12288 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| train/ \| \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| approx_kl \| 0.028425258 \|\n",`
			`"\| clip_fraction \| 0.483 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| clip_range \| 0.2 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| entropy_loss \| -9.46 \|\n",`
			`"\| explained_variance \| 0.143 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| learning_rate \| 0.0003 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| loss \| 109 \|\n",`
			`"\| n_updates \| 50 \|\n",`
			`"\| policy_gradient_loss \| -0.117 \|\n",`
			`"\| value_loss \| 240 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"-----------------------------------------\n",`
			`"-----------------------------------------\n",`
			`"\| rollout/ \| \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| ep_len_mean \| 5.98 \|\n",`
			`"\| ep_rew_mean \| 6.14 \|\n",`
			`"\| time/ \| \|\n",`
			`"\| fps \| 541 \|\n",`
			`"\| iterations \| 7 \|\n",`
			`"\| time_elapsed \| 26 \|\n",`
			`"\| total_timesteps \| 14336 \|\n",`
			`"\| train/ \| \|\n",`
			`"\| approx_kl \| 0.026178032 \|\n",`
			`"\| clip_fraction \| 0.453 \|\n",`
			`"\| clip_range \| 0.2 \|\n",`
			`"\| entropy_loss \| -9.46 \|\n",`
			`"\| explained_variance \| 0.174 \|\n",`
			`"\| learning_rate \| 0.0003 \|\n",`
			`"\| loss \| 141 \|\n",`
			`"\| n_updates \| 60 \|\n",`
			`"\| policy_gradient_loss \| -0.116 \|\n",`
			`"\| value_loss \| 235 \|\n",`
			`"-----------------------------------------\n",`
			`"----------------------------------------\n",`
			`"\| rollout/ \| \|\n",`
			`"\| ep_len_mean \| 6 \|\n",`
			`"\| ep_rew_mean \| 3.03 \|\n",`
			`"\| time/ \| \|\n",`
			`"\| fps \| 537 \|\n",`
			`"\| iterations \| 8 \|\n",`
			`"\| time_elapsed \| 30 \|\n",`
			`"\| total_timesteps \| 16384 \|\n",`
			`"\| train/ \| \|\n",`
			`"\| approx_kl \| 0.02457074 \|\n",`
			`"\| clip_fraction \| 0.423 \|\n",`
			`"\| clip_range \| 0.2 \|\n",`
			`"\| entropy_loss \| -9.45 \|\n",`
			`"\| explained_variance \| 0.171 \|\n",`
			`"\| learning_rate \| 0.0003 \|\n",`
			`"\| loss \| 111 \|\n",`
			`"\| n_updates \| 70 \|\n",`
			`"\| policy_gradient_loss \| -0.112 \|\n",`
			`"\| value_loss \| 212 \|\n",`
			`"----------------------------------------\n",`
			`"-----------------------------------------\n",`
			`"\| rollout/ \| \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| ep_len_mean \| 6 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| ep_rew_mean \| 9.54 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| time/ \| \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| fps \| 532 \|\n",`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`"\| iterations \| 9 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| time_elapsed \| 34 \|\n",`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`"\| total_timesteps \| 18432 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| train/ \| \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| approx_kl \| 0.024578478 \|\n",`
			`"\| clip_fraction \| 0.417 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| clip_range \| 0.2 \|\n",`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`"\| entropy_loss \| -9.45 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| explained_variance \| 0.178 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| learning_rate \| 0.0003 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| loss \| 121 \|\n",`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`"\| n_updates \| 80 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| policy_gradient_loss \| -0.114 \|\n",`
			`"\| value_loss \| 232 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"-----------------------------------------\n",`
			`"-----------------------------------------\n",`
			`"\| rollout/ \| \|\n",`
			`"\| ep_len_mean \| 6 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| ep_rew_mean \| 3.81 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| time/ \| \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| fps \| 527 \|\n",`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`"\| iterations \| 10 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| time_elapsed \| 38 \|\n",`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`"\| total_timesteps \| 20480 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| train/ \| \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| approx_kl \| 0.022704324 \|\n",`
			`"\| clip_fraction \| 0.379 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| clip_range \| 0.2 \|\n",`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`"\| entropy_loss \| -9.45 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| explained_variance \| 0.194 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\| learning_rate \| 0.0003 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| loss \| 108 \|\n",`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`"\| n_updates \| 90 \|\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"\| policy_gradient_loss \| -0.112 \|\n",`
			`"\| value_loss \| 216 \|\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"-----------------------------------------\n"`
copy the wordle env locally and fix the obs return 2024-03-14 21:49:17 +00:00			`]`
remove debug prints 2024-03-14 22:00:19 +00:00			`},`
			`{`
			`"data": {`
			`"text/plain": [`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"<stable_baselines3.ppo.ppo.PPO at 0x7f86ef4ddcd0>"`
remove debug prints 2024-03-14 22:00:19 +00:00			`]`
			`},`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`"execution_count": 3,`
remove debug prints 2024-03-14 22:00:19 +00:00			`"metadata": {},`
			`"output_type": "execute_result"`
copy the wordle env locally and fix the obs return 2024-03-14 21:49:17 +00:00			`}`
			`],`
switch to notebook 2024-03-13 18:04:30 +00:00			`"source": [`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`"total_timesteps = 20_000\n",`
			`"model = PPO(\"MlpPolicy\", env, verbose=1, device='cuda')\n",`
copy the wordle env locally and fix the obs return 2024-03-14 21:49:17 +00:00			`"model.learn(total_timesteps=total_timesteps)"`
switch to notebook 2024-03-13 18:04:30 +00:00			`]`
			`},`
			`{`
			`"cell_type": "code",`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`"execution_count": 4,`
switch to notebook 2024-03-13 18:04:30 +00:00			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
copy the wordle env locally and fix the obs return 2024-03-14 21:49:17 +00:00			`"model.save(\"dqn_wordle\")"`
switch to notebook 2024-03-13 18:04:30 +00:00			`]`
			`},`
			`{`
			`"cell_type": "code",`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`"execution_count": 5,`
switch to notebook 2024-03-13 18:04:30 +00:00			`"metadata": {},`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"outputs": [],`
switch to notebook 2024-03-13 18:04:30 +00:00			`"source": [`
copy the wordle env locally and fix the obs return 2024-03-14 21:49:17 +00:00			`"model = PPO.load(\"dqn_wordle\")"`
switch to notebook 2024-03-13 18:04:30 +00:00			`]`
			`},`
			`{`
			`"cell_type": "code",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"execution_count": 7,`
switch to notebook 2024-03-13 18:04:30 +00:00			`"metadata": {},`
copy the wordle env locally and fix the obs return 2024-03-14 21:49:17 +00:00			`"outputs": [`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`{`
			`"name": "stderr",`
			`"output_type": "stream",`
			`"text": [`
			`"100%\|██████████\| 1000/1000 [00:03<00:00, 252.17it/s]"`
			`]`
			`},`
copy the wordle env locally and fix the obs return 2024-03-14 21:49:17 +00:00			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"[[ 7 18 1 19 16 3 3 3 2 3]\n",`
			`" [16 9 5 14 4 3 3 3 3 3]\n",`
			`" [16 9 5 14 4 3 3 3 3 3]\n",`
			`" [16 9 5 14 4 3 3 3 3 3]\n",`
			`" [ 7 18 1 19 16 3 3 3 2 3]\n",`
			`" [ 7 18 1 19 16 3 3 3 2 3]] -54 {'correct': False, 'guesses': defaultdict(<class 'int'>, {'grasp': 3, 'piend': 3})}\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"0\n"`
copy the wordle env locally and fix the obs return 2024-03-14 21:49:17 +00:00			`]`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`},`
			`{`
			`"name": "stderr",`
			`"output_type": "stream",`
			`"text": [`
			`"\n"`
			`]`
copy the wordle env locally and fix the obs return 2024-03-14 21:49:17 +00:00			`}`
			`],`
minor changes 2024-03-13 20:57:23 +00:00			`"source": [`
copy the wordle env locally and fix the obs return 2024-03-14 21:49:17 +00:00			`"env = gym_wordle.wordle.WordleEnv()\n",`
			`"\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"for i in tqdm(range(1000)):\n",`
copy the wordle env locally and fix the obs return 2024-03-14 21:49:17 +00:00			`" \n",`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`" state, info = env.reset()\n",`
copy the wordle env locally and fix the obs return 2024-03-14 21:49:17 +00:00			`"\n",`
			`" done = False\n",`
			`"\n",`
			`" wins = 0\n",`
			`"\n",`
			`" while not done:\n",`
			`"\n",`
			`" action, _states = model.predict(state, deterministic=True)\n",`
			`"\n",`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`" state, reward, done, truncated, info = env.step(action)\n",`
copy the wordle env locally and fix the obs return 2024-03-14 21:49:17 +00:00			`"\n",`
			`" if info[\"correct\"]:\n",`
			`" wins += 1\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"\n",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"print(state, reward, info)\n",`
			`"\n",`
remove debug prints 2024-03-14 22:00:19 +00:00			`"print(wins)\n"`
minor changes 2024-03-13 20:57:23 +00:00			`]`
updated wordle to gymnasium env 2024-03-16 01:19:58 +00:00			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": []`
switch to notebook 2024-03-13 18:04:30 +00:00			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
			`"display_name": "Python 3 (ipykernel)",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
try penalizing duplicate guesses 2024-03-16 01:48:21 +00:00			`"version": "3.8.10"`
switch to notebook 2024-03-13 18:04:30 +00:00			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 2`
			`}`