attempt to use the other wordle gym, causing cuda errors

2026-01-11 16:18:47 +00:00 · 2024-03-13 14:27:34 -07:00
parent 5ec123e0f1
commit f641d77c47
8 changed files with 13442 additions and 115 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 **/data/*
 **/*.zip
+**/__pycache__
--- a/dqn_wordle.ipynb
+++ b/dqn_wordle.ipynb
@@ -1,114 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import gym\n",
-    "import gym_wordle\n",
-    "from stable_baselines3 import DQN\n",
-    "import numpy as np\n",
-    "import tqdm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "env = gym.make(\"Wordle-v0\")\n",
-    "\n",
-    "print(env)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "total_timesteps = 100000\n",
-    "model = DQN(\"MlpPolicy\", env, verbose=0)\n",
-    "model.learn(total_timesteps=total_timesteps, progress_bar=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def test(model):\n",
-    "\n",
-    "    end_rewards = []\n",
-    "\n",
-    "    for i in range(1000):\n",
-    "        \n",
-    "        state = env.reset()\n",
-    "\n",
-    "        done = False\n",
-    "\n",
-    "        while not done:\n",
-    "\n",
-    "            action, _states = model.predict(state, deterministic=True)\n",
-    "\n",
-    "            state, reward, done, info = env.step(action)\n",
-    "            \n",
-    "        end_rewards.append(reward == 0)\n",
-    "        \n",
-    "    return np.sum(end_rewards) / len(end_rewards)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model.save(\"dqn_wordle\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = DQN.load(\"dqn_wordle\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(test(model))"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/dqn_wordle.py
+++ b/dqn_wordle.py
@@ -0,0 +1,47 @@
+# %%
+from stable_baselines3 import DQN
+import numpy as np
+import wordle.state
+import gym
+
+# %%
+env = gym.make("WordleEnvFull-v0")
+
+print(env)
+
+# %%
+total_timesteps = 100000
+model = DQN("MlpPolicy", env, verbose=0)
+model.learn(total_timesteps=total_timesteps, progress_bar=True)
+
+# %%
+def test(model):
+
+    end_rewards = []
+
+    for i in range(1000):
+        
+        state = env.reset()
+
+        done = False
+
+        while not done:
+
+            action, _states = model.predict(state, deterministic=True)
+
+            state, reward, done, info = env.step(action)
+            
+        end_rewards.append(reward == 0)
+        
+    return np.sum(end_rewards) / len(end_rewards)
+
+# %%
+model.save("dqn_wordle")
+
+# %%
+model = DQN.load("dqn_wordle")
+
+# %%
+print(test(model))
+
+
--- a/wordle/init.py
+++ b/wordle/init.py
@@ -0,0 +1,83 @@
+from gym.envs.registration import (
+    registry,
+    register,
+    make,
+    spec,
+    load_env_plugins as _load_env_plugins,
+)
+
+
+# Classic
+# ----------------------------------------
+
+register(
+    id="WordleEnv10-v0",
+    entry_point="wordle.wordle:WordleEnv10",
+    max_episode_steps=200,
+)
+
+register(
+    id="WordleEnv100-v0",
+    entry_point="wordle.wordle:WordleEnv100",
+    max_episode_steps=500,
+)
+
+register(
+    id="WordleEnv100OneAction-v0",
+    entry_point="wordle.wordle:WordleEnv100OneAction",
+    max_episode_steps=500,
+)
+
+register(
+    id="WordleEnv100TwoAction-v0",
+    entry_point="wordle.wordle:WordleEnv100TwoAction",
+    max_episode_steps=500,
+)
+
+register(
+    id="WordleEnv100FullAction-v0",
+    entry_point="wordle.wordle:WordleEnv100FullAction",
+    max_episode_steps=500,
+)
+
+register(
+    id="WordleEnv100WithMask-v0",
+    entry_point="wordle.wordle:WordleEnv100WithMask",
+    max_episode_steps=500,
+)
+
+register(
+    id="WordleEnv1000-v0",
+    entry_point="wordle.wordle:WordleEnv1000",
+    max_episode_steps=500,
+)
+
+register(
+    id="WordleEnv1000WithMask-v0",
+    entry_point="wordle.wordle:WordleEnv1000WithMask",
+    max_episode_steps=500,
+)
+
+register(
+    id="WordleEnv1000FullAction-v0",
+    entry_point="wordle.wordle:WordleEnv1000FullAction",
+    max_episode_steps=500,
+)
+
+register(
+    id="WordleEnvFull-v0",
+    entry_point="wordle.wordle:WordleEnvFull",
+    max_episode_steps=500,
+)
+
+register(
+    id="WordleEnvReal-v0",
+    entry_point="wordle.wordle:WordleEnvReal",
+    max_episode_steps=500,
+)
+
+register(
+    id="WordleEnvRealWithMask-v0",
+    entry_point="wordle.wordle:WordleEnvRealWithMask",
+    max_episode_steps=500,
+)
--- a/wordle/const.py
+++ b/wordle/const.py
@@ -0,0 +1,3 @@
+WORDLE_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+WORDLE_N = 5
+REWARD = 10
--- a/wordle/state.py
+++ b/wordle/state.py
@@ -0,0 +1,162 @@
+"""
+Keep the state in a 1D int array
+
+index[0] = remaining steps
+Rest of data is laid out as binary array
+
+[1..27] = whether char has been guessed or not
+
+[[status, status, status, status, status]
+ for _ in "ABCD..."]
+where status has codes
+ [1, 0, 0] - char is definitely not in this spot
+ [0, 1, 0] - char is maybe in this spot
+ [0, 0, 1] - char is definitely in this spot
+"""
+import collections
+from typing import List
+import numpy as np
+
+from wordle.const import WORDLE_CHARS, WORDLE_N
+
+
+WordleState = np.ndarray
+
+
+def get_nvec(max_turns: int):
+    return [max_turns] + [2] * len(WORDLE_CHARS) + [2] * 3 * WORDLE_N * len(WORDLE_CHARS)
+
+
+def new(max_turns: int) -> WordleState:
+    return np.array(
+        [max_turns] + [0] * len(WORDLE_CHARS) + [0, 1, 0] * WORDLE_N * len(WORDLE_CHARS),
+        dtype=np.int32)
+
+
+def remaining_steps(state: WordleState) -> int:
+    return state[0]
+
+
+NO = 0
+SOMEWHERE = 1
+YES = 2
+
+
+def update_from_mask(state: WordleState, word: str, mask: List[int]) -> WordleState:
+    """
+    return a copy of state that has been updated to new state
+
+    From a mask we need slighty different logic since we don't know the
+    goal word.
+
+    :param state:
+    :param word:
+    :param goal_word:
+    :return:
+    """
+    state = state.copy()
+
+    prior_yes = []
+    prior_maybe = []
+    # We need two passes because first pass sets definitely yesses
+    # second pass sets the no's for those who aren't already yes
+    state[0] -= 1
+    for i, c in enumerate(word):
+        cint = ord(c) - ord(WORDLE_CHARS[0])
+        offset = 1 + len(WORDLE_CHARS) + cint * WORDLE_N * 3
+        state[1 + cint] = 1
+        if mask[i] == YES:
+            prior_yes.append(c)
+            # char at position i = yes, all other chars at position i == no
+            state[offset + 3 * i:offset + 3 * i + 3] = [0, 0, 1]
+            for ocint in range(len(WORDLE_CHARS)):
+                if ocint != cint:
+                    oc_offset = 1 + len(WORDLE_CHARS) + ocint * WORDLE_N * 3
+                    state[oc_offset + 3 * i:oc_offset + 3 * i + 3] = [1, 0, 0]
+
+    for i, c in enumerate(word):
+        cint = ord(c) - ord(WORDLE_CHARS[0])
+        offset = 1 + len(WORDLE_CHARS) + cint * WORDLE_N * 3
+        if mask[i] == SOMEWHERE:
+            prior_maybe.append(c)
+            # Char at position i = no, other chars stay as they are
+            state[offset + 3 * i:offset + 3 * i + 3] = [1, 0, 0]
+        elif mask[i] == NO:
+            # Need to check this first in case there's prior maybe + yes
+            if c in prior_maybe:
+                # Then the maybe could be anywhere except here
+                state[offset+3*i:offset+3*i+3] = [1, 0, 0]
+            elif c in prior_yes:
+                # No maybe, definitely a yes, so it's zero everywhere except the yesses
+                for j in range(WORDLE_N):
+                    # Only flip no if previously was maybe
+                    if state[offset + 3 * j:offset + 3 * j + 3][1] == 1:
+                        state[offset + 3 * j:offset + 3 * j + 3] = [1, 0, 0]
+            else:
+                # Just straight up no
+                state[offset:offset+3*WORDLE_N] = [1, 0, 0]*WORDLE_N
+
+    return state
+
+
+def get_mask(word: str, goal_word: str) -> List[int]:
+    # Definite yesses first
+    mask = [0, 0, 0, 0, 0]
+    counts = collections.Counter(goal_word)
+    for i, c in enumerate(word):
+        if goal_word[i] == c:
+            mask[i] = 2
+            counts[c] -= 1
+
+    for i, c in enumerate(word):
+        if mask[i] == 2:
+            continue
+        elif c in counts:
+            if counts[c] > 0:
+                mask[i] = 1
+                counts[c] -= 1
+            else:
+                for j in range(i+1, len(mask)):
+                    if mask[j] == 2:
+                        continue
+                    mask[j] = 0
+
+    return mask
+
+def update_mask(state: WordleState, word: str, goal_word: str) -> WordleState:
+    """
+    return a copy of state that has been updated to new state
+
+    :param state:
+    :param word:
+    :param goal_word:
+    :return:
+    """
+    mask = get_mask(word, goal_word)
+    return update_from_mask(state, word, mask)
+
+
+def update(state: WordleState, word: str, goal_word: str) -> WordleState:
+    state = state.copy()
+
+    state[0] -= 1
+    for i, c in enumerate(word):
+        cint = ord(c) - ord(WORDLE_CHARS[0])
+        offset = 1 + len(WORDLE_CHARS) + cint * WORDLE_N * 3
+        state[1 + cint] = 1
+        if goal_word[i] == c:
+            # char at position i = yes, all other chars at position i == no
+            state[offset + 3 * i:offset + 3 * i + 3] = [0, 0, 1]
+            for ocint in range(len(WORDLE_CHARS)):
+                if ocint != cint:
+                    oc_offset = 1 + len(WORDLE_CHARS) + ocint * WORDLE_N * 3
+                    state[oc_offset + 3 * i:oc_offset + 3 * i + 3] = [1, 0, 0]
+        elif c in goal_word:
+            # Char at position i = no, other chars stay as they are
+            state[offset + 3 * i:offset + 3 * i + 3] = [1, 0, 0]
+        else:
+            # Char at all positions = no
+            state[offset:offset + 3 * WORDLE_N] = [1, 0, 0] * WORDLE_N
+
+    return state
+
--- a/wordle/wordle.py
+++ b/wordle/wordle.py
@@ -0,0 +1,173 @@
+import os
+from typing import Optional, List
+
+import gym
+from gym import spaces
+import numpy as np
+
+import wordle.state
+from wordle.const import WORDLE_N, REWARD
+
+CUR_PATH = os.environ.get('PYTHONPATH', '.')
+import os
+dirname = os.path.dirname(__file__)
+VALID_WORDS_PATH = f'{dirname}/wordle_words.txt'
+
+
+def _load_words(limit: Optional[int]=None) -> List[str]:
+    with open(VALID_WORDS_PATH, 'r') as f:
+        lines = [x.strip().upper() for x in f.readlines()]
+        if not limit:
+            return lines
+        else:
+            return lines[:limit]
+
+
+class WordleEnvBase(gym.Env):
+    """
+    Actions:
+        Can play any 5 letter word in vocabulary
+        * 13k for full vocab
+    State space is defined as:
+        * 6 possibilities for turns (WORDLE_TURNS)
+        * Each VALID_CHAR has a state of 0/1 for whether it's been guessed before
+        * For each in VALID_CHARS [A-Z] can be in one of 3^WORDLE_N states: (No, Maybe, Yes)
+        for full game, this is (3^5)^26
+        Each state has 1 + 5*26 possibilities
+    Reward:
+        Reward is 10 for guessing the right word, -10 for not guessing the right word after 6 guesses.
+    Starting State:
+        Random goal word
+        Initial state with turn 0, all chars Unvisited + Maybe
+    """
+    def __init__(self, words: List[str],
+                 max_turns: int,
+                 allowable_words: Optional[int] = None,
+                 frequencies: Optional[List[float]]=None,
+                 mask_based_state_updates: bool=False):
+        assert all(len(w) == WORDLE_N for w in words), f'Not all words of length {WORDLE_N}, {words}'
+        self.words = words
+        self.max_turns = max_turns
+        self.allowable_words = allowable_words
+        self.mask_based_state_updates = mask_based_state_updates
+        if not self.allowable_words:
+            self.allowable_words = len(self.words)
+
+        self.frequencies = None
+        if frequencies:
+            assert len(words) == len(frequencies), f'{len(words), len(frequencies)}'
+            self.frequencies = np.array(frequencies, dtype=np.float32) / sum(frequencies)
+
+        self.action_space = spaces.Discrete(len(self.words))
+        self.observation_space = spaces.MultiDiscrete(wordle.state.get_nvec(self.max_turns))
+
+        self.done = True
+        self.goal_word: int = -1
+
+        self.state: wordle.state.WordleState = None
+        self.state_updater = wordle.state.update
+        if self.mask_based_state_updates:
+            self.state_updater = wordle.state.update_mask
+
+    def step(self, action: int):
+        if self.done:
+            raise ValueError(
+                "You are calling 'step()' even though this "
+                "environment has already returned done = True. You "
+                "should always call 'reset()' once you receive 'done = "
+                "True' -- any further steps are undefined behavior."
+            )
+        self.state = self.state_updater(state=self.state,
+                                        word=self.words[action],
+                                        goal_word=self.words[self.goal_word])
+
+        reward = 0
+        if action == self.goal_word:
+            self.done = True
+            #reward = REWARD
+            if wordle.state.remaining_steps(self.state) == self.max_turns-1:
+                reward = 0#-10*REWARD  # No reward for guessing off the bat
+            else:
+                #reward = REWARD*(self.state.remaining_steps() + 1) / self.max_turns
+                reward = REWARD
+        elif wordle.state.remaining_steps(self.state) == 0:
+            self.done = True
+            reward = -REWARD
+
+        return self.state.copy(), reward, self.done, False, {"goal_id": self.goal_word}
+
+    def reset(self, options = None, seed: Optional[int] = None):
+        self.state = wordle.state.new(self.max_turns)
+        self.done = False
+        self.goal_word = int(np.random.random()*self.allowable_words)
+
+        return self.state.copy(), {"goal_id": self.goal_word}
+
+    def set_goal_word(self, goal_word: str):
+        self.goal_word = self.words.index(goal_word)
+
+    def set_goal_id(self, goal_id: int):
+        self.goal_word = goal_id
+
+
+class WordleEnv10(WordleEnvBase):
+    def __init__(self):
+        super().__init__(words=_load_words(10), max_turns=6)
+
+
+class WordleEnv100(WordleEnvBase):
+    def __init__(self):
+        super().__init__(words=_load_words(100), max_turns=6)
+
+
+class WordleEnv100OneAction(WordleEnvBase):
+    def __init__(self):
+        super().__init__(words=_load_words(100), allowable_words=1, max_turns=6)
+
+
+class WordleEnv100WithMask(WordleEnvBase):
+    def __init__(self):
+        super().__init__(words=_load_words(100), max_turns=6,
+                         mask_based_state_updates=True)
+
+
+class WordleEnv100TwoAction(WordleEnvBase):
+    def __init__(self):
+        super().__init__(words=_load_words(100), allowable_words=2, max_turns=6)
+
+
+class WordleEnv100FullAction(WordleEnvBase):
+    def __init__(self):
+        super().__init__(words=_load_words(), allowable_words=100, max_turns=6)
+
+
+class WordleEnv1000(WordleEnvBase):
+    def __init__(self):
+        super().__init__(words=_load_words(1000), max_turns=6)
+
+
+class WordleEnv1000WithMask(WordleEnvBase):
+    def __init__(self):
+        super().__init__(words=_load_words(1000), max_turns=6,
+                         mask_based_state_updates=True)
+
+
+class WordleEnv1000FullAction(WordleEnvBase):
+    def __init__(self):
+        super().__init__(words=_load_words(), allowable_words=1000, max_turns=6)
+
+
+class WordleEnvFull(WordleEnvBase):
+    def __init__(self):
+        super().__init__(words=_load_words(), max_turns=6)
+
+
+class WordleEnvReal(WordleEnvBase):
+    def __init__(self):
+        super().__init__(words=_load_words(), allowable_words=2315, max_turns=6)
+
+
+class WordleEnvRealWithMask(WordleEnvBase):
+    def __init__(self):
+        super().__init__(words=_load_words(), allowable_words=2315, max_turns=6,
+                         mask_based_state_updates=True)
--- a/wordle/wordle_words.txt
+++ b/wordle/wordle_words.txt