Compare commits

...

2 Commits

Author SHA1 Message Date
Arthur Lu
5ec123e0f1 minor changes 2024-03-13 13:57:23 -07:00
Arthur Lu
e9622b6f68 switch to notebook 2024-03-13 11:04:30 -07:00
2 changed files with 114 additions and 24 deletions

114
dqn_wordle.ipynb Normal file
View File

@ -0,0 +1,114 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import gym\n",
"import gym_wordle\n",
"from stable_baselines3 import DQN\n",
"import numpy as np\n",
"import tqdm"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"env = gym.make(\"Wordle-v0\")\n",
"\n",
"print(env)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"total_timesteps = 100000\n",
"model = DQN(\"MlpPolicy\", env, verbose=0)\n",
"model.learn(total_timesteps=total_timesteps, progress_bar=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def test(model):\n",
"\n",
" end_rewards = []\n",
"\n",
" for i in range(1000):\n",
" \n",
" state = env.reset()\n",
"\n",
" done = False\n",
"\n",
" while not done:\n",
"\n",
" action, _states = model.predict(state, deterministic=True)\n",
"\n",
" state, reward, done, info = env.step(action)\n",
" \n",
" end_rewards.append(reward == 0)\n",
" \n",
" return np.sum(end_rewards) / len(end_rewards)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model.save(\"dqn_wordle\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model = DQN.load(\"dqn_wordle\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(test(model))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -1,24 +0,0 @@
import gym
import gym_wordle
from stable_baselines3 import DQN
env = gym.make("Wordle-v0")
done = False
print(env)
model = DQN("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000, log_interval=100)
model.save("dqn_wordle")
del model # remove to demonstrate saving and loading
model = DQN.load("dqn_wordle")
state = env.reset()
while not done:
action, _states = model.predict(state, deterministic=True)
state, reward, done, info = env.step(action)