f

added state saving
add letter_guess symlink, add model loading into ai.py
2026-06-12 11:57:31 +00:00 · 2024-03-20 19:53:50 -07:00 · 2024-03-20 19:52:13 -07:00 · 2024-03-20 17:31:27 -07:00 · 2024-03-20 12:59:14 -07:00 · 2024-03-20 12:53:40 -07:00
7 changed files with 436 additions and 3070 deletions
@@ -1,338 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import gym\n",
-    "import gym_wordle\n",
-    "from stable_baselines3 import DQN, PPO, common\n",
-    "import numpy as np\n",
-    "import tqdm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<Monitor<WordleEnv instance>>\n"
-     ]
-    }
-   ],
-   "source": [
-    "env = gym_wordle.wordle.WordleEnv()\n",
-    "env = common.monitor.Monitor(env)\n",
-    "\n",
-    "print(env)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Using cuda device\n",
-      "Wrapping the env in a DummyVecEnv.\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6921a0721569456abf5bceac7e7b6b34",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Output()"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "----------------------------------\n",
-      "| rollout/            |          |\n",
-      "|    ep_len_mean      | 4.97     |\n",
-      "|    ep_rew_mean      | -63.8    |\n",
-      "|    exploration_rate | 0.05     |\n",
-      "| time/               |          |\n",
-      "|    episodes         | 10000    |\n",
-      "|    fps              | 1628     |\n",
-      "|    time_elapsed     | 30       |\n",
-      "|    total_timesteps  | 49995    |\n",
-      "----------------------------------\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "----------------------------------\n",
-      "| rollout/            |          |\n",
-      "|    ep_len_mean      | 5        |\n",
-      "|    ep_rew_mean      | -70.5    |\n",
-      "|    exploration_rate | 0.05     |\n",
-      "| time/               |          |\n",
-      "|    episodes         | 20000    |\n",
-      "|    fps              | 662      |\n",
-      "|    time_elapsed     | 150      |\n",
-      "|    total_timesteps  | 99992    |\n",
-      "| train/              |          |\n",
-      "|    learning_rate    | 0.0001   |\n",
-      "|    loss             | 11.7     |\n",
-      "|    n_updates        | 12497    |\n",
-      "----------------------------------\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">\n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<stable_baselines3.dqn.dqn.DQN at 0x1bfd6cc0210>"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "total_timesteps = 100_000\n",
-    "model = DQN(\"MlpPolicy\", env, verbose=1, device='cuda')\n",
-    "model.learn(total_timesteps=total_timesteps, log_interval=10_000, progress_bar=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model.save(\"dqn_new_state\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "c:\\Repository\\cse151b-final-project\\env\\Lib\\site-packages\\stable_baselines3\\common\\save_util.py:166: UserWarning: Could not deserialize object lr_schedule. Consider using `custom_objects` argument to replace this object.\n",
-      "Exception: code() argument 13 must be str, not int\n",
-      "  warnings.warn(\n",
-      "c:\\Repository\\cse151b-final-project\\env\\Lib\\site-packages\\stable_baselines3\\common\\save_util.py:166: UserWarning: Could not deserialize object exploration_schedule. Consider using `custom_objects` argument to replace this object.\n",
-      "Exception: code() argument 13 must be str, not int\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
-   "source": [
-    "# model = DQN.load(\"dqn_wordle\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 0. 1. 1. 1.\n",
-      " 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 0. 1.\n",
-      " 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1.\n",
-      " 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 0. 1. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 1. 1. 0. 1. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0.\n",
-      " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
-      "[1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1.\n",
-      " 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1.\n",
-      " 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0.\n",
-      " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
-      "[1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1.\n",
-      " 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1.\n",
-      " 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0.\n",
-      " 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 0. 0. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.\n",
-      " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
-      "[1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1.\n",
-      " 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1.\n",
-      " 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.\n",
-      " 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.\n",
-      " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
-      "[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1.\n",
-      " 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1.\n",
-      " 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n",
-      " 1. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0.\n",
-      " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]\n",
-      "[1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1. 1. 1.\n",
-      " 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1.\n",
-      " 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0.\n",
-      " 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 0. 0. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 1. 1. 0. 0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0.\n",
-      " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
-      "[1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1.\n",
-      " 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1.\n",
-      " 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.\n",
-      " 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n",
-      " 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n",
-      " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]\n",
-      "[1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1. 1. 1.\n",
-      " 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1.\n",
-      " 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0.\n",
-      " 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 0. 0. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 1. 1. 0. 0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.\n",
-      " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
-      "[1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1. 1. 1.\n",
-      " 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1.\n",
-      " 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0.\n",
-      " 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 0. 0. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 1. 1. 0. 0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0.\n",
-      " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
-      "[1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1.\n",
-      " 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1.\n",
-      " 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1.\n",
-      " 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
-      " 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.\n",
-      " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
-      "0\n"
-     ]
-    }
-   ],
-   "source": [
-    "env = gym_wordle.wordle.WordleEnv()\n",
-    "\n",
-    "for i in range(1000):\n",
-    "        \n",
-    "    state, info = env.reset()\n",
-    "\n",
-    "    done = False\n",
-    "\n",
-    "    wins = 0\n",
-    "\n",
-    "    while not done:\n",
-    "\n",
-    "        action, _states = model.predict(state, deterministic=True)\n",
-    "\n",
-    "        state, reward, done, truncated, info = env.step(action)\n",
-    "\n",
-    "    print(state)\n",
-    "    if info[\"correct\"]:\n",
-    "        wins += 1\n",
-    "\n",
-    "print(wins)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(array([1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-       "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.,\n",
-       "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-       "        1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-       "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 1.,\n",
-       "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-       "        1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-       "        0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-       "        1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-       "        0., 0., 0., 0., 0., 0., 0., 1.]),\n",
-       " -50)"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "state, reward"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
@@ -3,9 +3,27 @@ import string

 import numpy as np

+from stable_baselines3 import PPO, DQN
+from letter_guess import LetterGuessingEnv
+
+
+def load_valid_words(file_path='wordle_words.txt'):
+    """
+    Load valid five-letter words from a specified text file.
+
+    Parameters:
+    - file_path (str): The path to the text file containing valid words.
+
+    Returns:
+    - list[str]: A list of valid words loaded from the file.
+    """
+    with open(file_path, 'r') as file:
+        valid_words = [line.strip() for line in file if len(line.strip()) == 5]
+    return valid_words
+

 class AI:
-    def __init__(self, vocab_file, num_letters=5, num_guesses=6):
+    def __init__(self, vocab_file, model_file, num_letters=5, num_guesses=6, use_q_model=False):
        self.vocab_file = vocab_file
        self.num_letters = num_letters
        self.num_guesses = 6
@@ -16,8 +34,38 @@ class AI:
        self.domains = None
        self.possible_letters = None

+        self.use_q_model = use_q_model
+        if use_q_model:
+            # we initialize the same q env as the model train ONLY to simplify storing/calculating the gym state, not used to control the game at all
+            self.q_env = LetterGuessingEnv(vocab_file)
+            self.q_env_state, _ = self.q_env.reset()
+
+            # load model
+            self.q_model = PPO.load(model_file)
+
        self.reset()

+    def solve_eval(self, results_callback):
+        num_guesses = 0
+        while [len(e) for e in self.domains] != [1 for _ in range(self.num_letters)]:
+            num_guesses += 1
+
+            # sample a word, this would use the q_env_state if the q_model is used
+            word = self.sample()
+
+            # get emulated results
+            results = results_callback(word)
+            if self.use_q_model:
+                self.q_env.set_state(self.q_env_state)
+                # step the q_env to match the guess we just made
+                for i in range(len(word)):
+                    char = word[i]
+                    action = ord(char) - ord('a')
+                    self.q_env_state, _, _, _, _ = self.q_env.step(action)
+
+            self.arc_consistency(word, results)
+        return num_guesses, word
+
    def solve(self):
        num_guesses = 0
        while [len(e) for e in self.domains] != [1 for _ in range(self.num_letters)]:
@@ -33,26 +81,27 @@ class AI:
            print('-----------------------------------------------')
            print(f'Guess #{num_guesses}/{self.num_guesses}: {word}')
            print('-----------------------------------------------')
-            self.arc_consistency(word)
+
+            print(f'Performing arc consistency check on {word}...')
+            print(f'Specify 0 for completely nonexistent letter at the specified index, 1 for existent letter but incorrect index, and 2 for correct letter at correct index.')
+            results = []
+
+            # Collect results
+            for l in word:
+                while True:
+                    result = input(f'{l}: ')
+                    if result not in ['0', '1', '2']:
+                        print('Incorrect option. Try again.')
+                        continue
+                    results.append(result)
+                    break
+
+            self.arc_consistency(word, results)

        print(f'You did it! The word is {"".join([e[0] for e in self.domains])}')
+        return num_guesses

-
-    def arc_consistency(self, word):
-        print(f'Performing arc consistency check on {word}...')
-        print(f'Specify 0 for completely nonexistent letter at the specified index, 1 for existent letter but incorrect index, and 2 for correct letter at correct index.')
-        results = []
-
-        # Collect results
-        for l in word:
-            while True:
-                result = input(f'{l}: ')
-                if result not in ['0', '1', '2']:
-                    print('Incorrect option. Try again.')
-                    continue
-                results.append(result)
-                break
-
+    def arc_consistency(self, word, results):
        self.possible_letters += [word[i] for i in range(len(word)) if results[i] == '1']

        for i in range(len(word)):
@@ -70,11 +119,13 @@ class AI:
            if results[i] == '2':
                self.domains[i] = [word[i]]

-
    def reset(self):
        self.domains = [list(string.ascii_lowercase) for _ in range(self.num_letters)]
        self.possible_letters = []

+        if self.use_q_model:
+            self.q_env_state, _ = self.q_env.reset()
+
    def sample(self):
        """
        Samples a best word given the current domains
@@ -87,9 +138,30 @@ class AI:
        pattern = re.compile(regex_string)

        # From the words with the highest scores, only return the best word that match the regex pattern
+        max_qval = float('-inf')
+        best_word = None
        for word, _ in self.best_words:
+            # reset the state back to before we guessed a word
            if pattern.match(word) and False not in [e in word for e in self.possible_letters]:
-                return word
+                if self.use_q_model:
+                    self.q_env.set_state(self.q_env_state)
+                    # Use policy to grade word
+                    # get the state and action pairs
+                    curr_qval = 0
+
+                    for l in word:
+                        action = ord(l) - ord('a')
+                        q_val = self.q_model.policy.evaluate_actions(self.q_env.get_obs(), action)
+                        _, _, _, _, _ = self.q_env.step(action)
+                        curr_qval += q_val
+
+                    if curr_qval > max_qval:
+                        max_qval = curr_qval
+                        best_word = word
+                else:
+                    # otherwise return the word from eric heuristic
+                    return word
+        return best_word

    def get_vocab(self, vocab_file):
        vocab = []
@@ -0,0 +1,58 @@
+import argparse
+from ai import AI
+import numpy as np
+from tqdm import tqdm
+
+global solution
+
+def result_callback(word):
+
+    global solution
+
+    result = ['0', '0', '0', '0', '0']
+
+    for i, letter in enumerate(word):
+
+        if solution[i] == word[i]:
+            result[i] = '2'
+        elif letter in solution:
+            result[i] = '1'
+        else:
+            pass
+
+    return result
+
+def main(args):
+    global solution 
+
+    if args.n is None:
+        raise Exception('Need to specify n (i.e. n = 1 for wordle, n = 4 for quordle, n = 16 for sedecordle).')
+
+    ai = AI(args.vocab_file, args.model_file, use_q_model=args.q_model)
+
+    total_guesses = 0
+    wins = 0
+    num_eval = args.num_eval
+
+    for i in tqdm(range(num_eval)):
+        idx = np.random.choice(range(len(ai.vocab)))
+        solution = ai.vocab[idx]
+        guesses, word = ai.solve_eval(results_callback=result_callback)
+        if word != solution:
+            total_guesses += 5
+        else:
+            total_guesses += guesses
+            wins += 1
+        ai.reset()
+
+    print(f"q_model?: {args.q_model} \t average guesses per game: {total_guesses / num_eval} \t win rate: {wins / num_eval}")
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--n', dest='n', type=int, default=None)
+    parser.add_argument('--vocab_file', dest='vocab_file', type=str, default='wordle_words.txt')
+    parser.add_argument('--num_eval', dest="num_eval", type=int, default=1000)
+    parser.add_argument('--model_file', dest="model_file", type=str, default='wordle_ppo_model')
+    parser.add_argument('--q_model', dest="q_model", type=bool, default=False)
+    args = parser.parse_args()
+    main(args)
@@ -0,0 +1 @@
+../letter_guess.py
@@ -0,0 +1,2 @@
+python eric_wordle/eval.py --n 1 --vocab_file wordle_words.txt  --num_eval 5000
+python eric_wordle/eval.py --n 1 --vocab_file wordle_words.txt  --num_eval 5000 --q_model True --model_file wordle_ppo_model
@@ -3,6 +3,7 @@ from gymnasium import spaces
 import numpy as np
 import random
 import re
+import copy


 class LetterGuessingEnv(gym.Env):
@@ -29,8 +30,28 @@ class LetterGuessingEnv(gym.Env):

        self.reset()

+    def clone_state(self):
+        # Clone the current state
+        return {
+            'target_word': self.target_word,
+            'letter_flags': copy.deepcopy(self.letter_flags),
+            'letter_positions': copy.deepcopy(self.letter_positions),
+            'guessed_letters': copy.deepcopy(self.guessed_letters),
+            'guess_prefix': self.guess_prefix,
+            'round': self.round
+        }
+
+    def set_state(self, state):
+        # Restore the state
+        self.target_word = state['target_word']
+        self.letter_flags = copy.deepcopy(state['letter_flags'])
+        self.letter_positions = copy.deepcopy(state['letter_positions'])
+        self.guessed_letters = copy.deepcopy(state['guessed_letters'])
+        self.guess_prefix = state['guess_prefix']
+        self.round = state['round']
+
    def step(self, action):
-        letter_index = action % 26  # Assuming action is the letter index directly
+        letter_index = action  # Assuming action is the letter index directly
        position = len(self.guess_prefix)  # The next position in the prefix is determined by its current length
        letter = chr(ord('a') + letter_index)

@@ -56,8 +77,8 @@ class LetterGuessingEnv(gym.Env):
            reward = 1  # Reward for adding new information by trying a new letter

            # Update the letter_positions matrix to reflect the new guess
-            if position == 4: 
-                self.letter_positions[:,:] = 1
+            if position == 4:
+                self.letter_positions[:, :] = 1
            else:
                self.letter_positions[:, position] = 0
                self.letter_positions[letter_index, position] = 1
@@ -72,15 +93,16 @@ class LetterGuessingEnv(gym.Env):
            self.guess_prefix = ''
            self.round += 1

-        # end after 5 rounds of total guesses
-        if self.round == 2:
+        # end after 3 rounds of total guesses
+        if self.round == 3:
            # reward = 5
            done = True

-        obs = self._get_obs()
-        
-        if reward < -50:
+        obs = self.get_obs()
+
+        if reward < -5:
            print(obs, reward, done)
+            exit(0)

        return obs, reward, done, False, {}

@@ -91,8 +113,8 @@ class LetterGuessingEnv(gym.Env):
        self.letter_positions = np.ones((26, 4), dtype=np.int32)
        self.guessed_letters = set()
        self.guess_prefix = ""  # Reset the guess prefix for the new episode
-        self.round = 1
-        return self._get_obs(), {}
+        self.round = 0
+        return self.get_obs(), {}

    def encode_word(self, word):
        encoded = np.zeros((26,))
@@ -101,7 +123,7 @@ class LetterGuessingEnv(gym.Env):
            encoded[index] = 1
        return encoded

-    def _get_obs(self):
+    def get_obs(self):
        return np.concatenate([self.letter_flags.flatten(), self.letter_positions.flatten()])

    def render(self, mode='human'):
Author	SHA1	Message	Date
Ethan Shapiro	284a29d7af	f	2024-03-20 19:53:50 -07:00
Ethan Shapiro	3747af9d22	added state saving	2024-03-20 19:52:13 -07:00
Arthur Lu	4fb81317f0	add letter_guess symlink, add model loading into ai.py	2024-03-20 17:31:27 -07:00
Arthur Lu	12601964bd	add eval script for convienience	2024-03-20 12:59:14 -07:00
Arthur Lu	c448e02512	add evaluation to eric's wordle solver (eval.py)	2024-03-20 12:53:40 -07:00
Arthur Lu	848d385482	run model train, abt 3 avg reward	2024-03-20 12:18:15 -07:00