2024-03-19 18:52:10 +00:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"def load_valid_words(file_path='wordle_words.txt'):\n",
" \"\"\"\n",
" Load valid five-letter words from a specified text file.\n",
"\n",
" Parameters:\n",
" - file_path (str): The path to the text file containing valid words.\n",
"\n",
" Returns:\n",
" - list[str]: A list of valid words loaded from the file.\n",
" \"\"\"\n",
" with open(file_path, 'r') as file:\n",
" valid_words = [line.strip() for line in file if len(line.strip()) == 5]\n",
" return valid_words"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
2024-03-19 23:49:01 +00:00
"outputs": [],
"source": [
"from stable_baselines3 import PPO # Or any other suitable RL algorithm\n",
"from stable_baselines3.common.env_checker import check_env\n",
"from letter_guess import LetterGuessingEnv\n",
"from tqdm import tqdm"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"env = LetterGuessingEnv(valid_words=load_valid_words()) # Make sure to load your valid words\n",
"check_env(env) # Optional: Verify the environment is compatible with SB3"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import wandb\n",
"from wandb.integration.sb3 import WandbCallback"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
2024-03-19 18:52:10 +00:00
"outputs": [
{
2024-03-19 23:49:01 +00:00
"name": "stderr",
"output_type": "stream",
"text": [
"Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mltcptgeneral\u001b[0m (\u001b[33mfulltime\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
2024-03-19 18:52:10 +00:00
]
2024-03-19 23:49:01 +00:00
},
{
"data": {
"text/html": [
"Tracking run with wandb version 0.16.4"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Run data is saved locally in <code>/home/art/cse151b-final-project/wandb/run-20240319_162920-ot2i0b8h</code>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Syncing run <strong><a href='https://wandb.ai/fulltime/wordle/runs/ot2i0b8h' target=\"_blank\">confused-meadow-3</a></strong> to <a href='https://wandb.ai/fulltime/wordle' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" View project at <a href='https://wandb.ai/fulltime/wordle' target=\"_blank\">https://wandb.ai/fulltime/wordle</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" View run at <a href='https://wandb.ai/fulltime/wordle/runs/ot2i0b8h' target=\"_blank\">https://wandb.ai/fulltime/wordle/runs/ot2i0b8h</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
2024-03-19 18:52:10 +00:00
}
],
"source": [
2024-03-19 23:49:01 +00:00
"config = {\n",
" \"policy_type\": \"MlpPolicy\",\n",
" \"total_timesteps\": 200_000\n",
"}\n",
"run = wandb.init(\n",
" project=\"wordle\",\n",
" config=config,\n",
" sync_tensorboard=True\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using cuda device\n",
"Wrapping the env with a `Monitor` wrapper\n",
"Wrapping the env in a DummyVecEnv.\n",
"Logging to runs/ot2i0b8h/PPO_1\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "585e7545478a485aa91c487b8630840f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Output()"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"---------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 2.48 |\n",
"| ep_rew_mean | -3.7 |\n",
"| time/ | |\n",
"| fps | 465 |\n",
"| iterations | 1 |\n",
"| time_elapsed | 4 |\n",
"| total_timesteps | 2048 |\n",
"---------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 2.49 |\n",
"| ep_rew_mean | -3.65 |\n",
"| time/ | |\n",
"| fps | 395 |\n",
"| iterations | 2 |\n",
"| time_elapsed | 10 |\n",
"| total_timesteps | 4096 |\n",
"| train/ | |\n",
"| approx_kl | 0.04501068 |\n",
"| clip_fraction | 0.427 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -3.23 |\n",
"| explained_variance | 0.189 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.205 |\n",
"| n_updates | 10 |\n",
"| policy_gradient_loss | -0.0667 |\n",
"| value_loss | 0.997 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 2.84 |\n",
"| ep_rew_mean | -3.4 |\n",
"| time/ | |\n",
"| fps | 381 |\n",
"| iterations | 3 |\n",
"| time_elapsed | 16 |\n",
"| total_timesteps | 6144 |\n",
"| train/ | |\n",
"| approx_kl | 0.01765968 |\n",
"| clip_fraction | 0.319 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -3.17 |\n",
"| explained_variance | 0.481 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.123 |\n",
"| n_updates | 20 |\n",
"| policy_gradient_loss | -0.0525 |\n",
"| value_loss | 0.383 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 2.98 |\n",
"| ep_rew_mean | -3.28 |\n",
"| time/ | |\n",
"| fps | 374 |\n",
"| iterations | 4 |\n",
"| time_elapsed | 21 |\n",
"| total_timesteps | 8192 |\n",
"| train/ | |\n",
"| approx_kl | 0.018652592 |\n",
"| clip_fraction | 0.368 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -3.11 |\n",
"| explained_variance | 0.428 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.181 |\n",
"| n_updates | 30 |\n",
"| policy_gradient_loss | -0.0572 |\n",
"| value_loss | 0.51 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 3.1 |\n",
"| ep_rew_mean | -3.24 |\n",
"| time/ | |\n",
"| fps | 369 |\n",
"| iterations | 5 |\n",
"| time_elapsed | 27 |\n",
"| total_timesteps | 10240 |\n",
"| train/ | |\n",
"| approx_kl | 0.023806999 |\n",
"| clip_fraction | 0.365 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -3.04 |\n",
"| explained_variance | 0.46 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.118 |\n",
"| n_updates | 40 |\n",
"| policy_gradient_loss | -0.0609 |\n",
"| value_loss | 0.499 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 3.15 |\n",
"| ep_rew_mean | -3.09 |\n",
"| time/ | |\n",
"| fps | 366 |\n",
"| iterations | 6 |\n",
"| time_elapsed | 33 |\n",
"| total_timesteps | 12288 |\n",
"| train/ | |\n",
"| approx_kl | 0.024716537 |\n",
"| clip_fraction | 0.372 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -2.94 |\n",
"| explained_variance | 0.495 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.266 |\n",
"| n_updates | 50 |\n",
"| policy_gradient_loss | -0.0578 |\n",
"| value_loss | 0.503 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 3.46 |\n",
"| ep_rew_mean | -2.8 |\n",
"| time/ | |\n",
"| fps | 365 |\n",
"| iterations | 7 |\n",
"| time_elapsed | 39 |\n",
"| total_timesteps | 14336 |\n",
"| train/ | |\n",
"| approx_kl | 0.023435738 |\n",
"| clip_fraction | 0.357 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -2.82 |\n",
"| explained_variance | 0.556 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.105 |\n",
"| n_updates | 60 |\n",
"| policy_gradient_loss | -0.0537 |\n",
"| value_loss | 0.491 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 3.54 |\n",
"| ep_rew_mean | -2.74 |\n",
"| time/ | |\n",
"| fps | 363 |\n",
"| iterations | 8 |\n",
"| time_elapsed | 45 |\n",
"| total_timesteps | 16384 |\n",
"| train/ | |\n",
"| approx_kl | 0.02574392 |\n",
"| clip_fraction | 0.29 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -2.71 |\n",
"| explained_variance | 0.608 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.125 |\n",
"| n_updates | 70 |\n",
"| policy_gradient_loss | -0.0445 |\n",
"| value_loss | 0.464 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 3.71 |\n",
"| ep_rew_mean | -2.63 |\n",
"| time/ | |\n",
"| fps | 362 |\n",
"| iterations | 9 |\n",
"| time_elapsed | 50 |\n",
"| total_timesteps | 18432 |\n",
"| train/ | |\n",
"| approx_kl | 0.021754535 |\n",
"| clip_fraction | 0.251 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -2.56 |\n",
"| explained_variance | 0.673 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.152 |\n",
"| n_updates | 80 |\n",
"| policy_gradient_loss | -0.0385 |\n",
"| value_loss | 0.4 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 3.8 |\n",
"| ep_rew_mean | -2.5 |\n",
"| time/ | |\n",
"| fps | 362 |\n",
"| iterations | 10 |\n",
"| time_elapsed | 56 |\n",
"| total_timesteps | 20480 |\n",
"| train/ | |\n",
"| approx_kl | 0.018548178 |\n",
"| clip_fraction | 0.239 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -2.46 |\n",
"| explained_variance | 0.702 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.218 |\n",
"| n_updates | 90 |\n",
"| policy_gradient_loss | -0.0361 |\n",
"| value_loss | 0.396 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 3.8 |\n",
"| ep_rew_mean | -2.34 |\n",
"| time/ | |\n",
"| fps | 362 |\n",
"| iterations | 11 |\n",
"| time_elapsed | 62 |\n",
"| total_timesteps | 22528 |\n",
"| train/ | |\n",
"| approx_kl | 0.016667131 |\n",
"| clip_fraction | 0.24 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -2.36 |\n",
"| explained_variance | 0.698 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.226 |\n",
"| n_updates | 100 |\n",
"| policy_gradient_loss | -0.037 |\n",
"| value_loss | 0.411 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 3.95 |\n",
"| ep_rew_mean | -2.31 |\n",
"| time/ | |\n",
"| fps | 361 |\n",
"| iterations | 12 |\n",
"| time_elapsed | 67 |\n",
"| total_timesteps | 24576 |\n",
"| train/ | |\n",
"| approx_kl | 0.020023255 |\n",
"| clip_fraction | 0.257 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -2.24 |\n",
"| explained_variance | 0.712 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.0958 |\n",
"| n_updates | 110 |\n",
"| policy_gradient_loss | -0.0381 |\n",
"| value_loss | 0.406 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 4.19 |\n",
"| ep_rew_mean | -2.03 |\n",
"| time/ | |\n",
"| fps | 360 |\n",
"| iterations | 13 |\n",
"| time_elapsed | 73 |\n",
"| total_timesteps | 26624 |\n",
"| train/ | |\n",
"| approx_kl | 0.019943349 |\n",
"| clip_fraction | 0.266 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -2.13 |\n",
"| explained_variance | 0.712 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.0974 |\n",
"| n_updates | 120 |\n",
"| policy_gradient_loss | -0.0387 |\n",
"| value_loss | 0.444 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 4.24 |\n",
"| ep_rew_mean | -1.96 |\n",
"| time/ | |\n",
"| fps | 360 |\n",
"| iterations | 14 |\n",
"| time_elapsed | 79 |\n",
"| total_timesteps | 28672 |\n",
"| train/ | |\n",
"| approx_kl | 0.022638176 |\n",
"| clip_fraction | 0.298 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -1.98 |\n",
"| explained_variance | 0.697 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.145 |\n",
"| n_updates | 130 |\n",
"| policy_gradient_loss | -0.0433 |\n",
"| value_loss | 0.486 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 4.45 |\n",
"| ep_rew_mean | -1.89 |\n",
"| time/ | |\n",
"| fps | 361 |\n",
"| iterations | 15 |\n",
"| time_elapsed | 84 |\n",
"| total_timesteps | 30720 |\n",
"| train/ | |\n",
"| approx_kl | 0.02262218 |\n",
"| clip_fraction | 0.354 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -1.77 |\n",
"| explained_variance | 0.727 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.0807 |\n",
"| n_updates | 140 |\n",
"| policy_gradient_loss | -0.0439 |\n",
"| value_loss | 0.454 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 4.91 |\n",
"| ep_rew_mean | -1.65 |\n",
"| time/ | |\n",
"| fps | 362 |\n",
"| iterations | 16 |\n",
"| time_elapsed | 90 |\n",
"| total_timesteps | 32768 |\n",
"| train/ | |\n",
"| approx_kl | 0.023807548 |\n",
"| clip_fraction | 0.251 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -1.62 |\n",
"| explained_variance | 0.654 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.171 |\n",
"| n_updates | 150 |\n",
"| policy_gradient_loss | -0.0355 |\n",
"| value_loss | 0.686 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 4.86 |\n",
"| ep_rew_mean | -1.42 |\n",
"| time/ | |\n",
"| fps | 361 |\n",
"| iterations | 17 |\n",
"| time_elapsed | 96 |\n",
"| total_timesteps | 34816 |\n",
"| train/ | |\n",
"| approx_kl | 0.018085614 |\n",
"| clip_fraction | 0.198 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -1.49 |\n",
"| explained_variance | 0.699 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.339 |\n",
"| n_updates | 160 |\n",
"| policy_gradient_loss | -0.0297 |\n",
"| value_loss | 0.674 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 5.4 |\n",
"| ep_rew_mean | -1.88 |\n",
"| time/ | |\n",
"| fps | 361 |\n",
"| iterations | 18 |\n",
"| time_elapsed | 102 |\n",
"| total_timesteps | 36864 |\n",
"| train/ | |\n",
"| approx_kl | 0.015559142 |\n",
"| clip_fraction | 0.222 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -1.35 |\n",
"| explained_variance | 0.719 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.354 |\n",
"| n_updates | 170 |\n",
"| policy_gradient_loss | -0.0349 |\n",
"| value_loss | 0.629 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 5.54 |\n",
"| ep_rew_mean | -1.22 |\n",
"| time/ | |\n",
"| fps | 360 |\n",
"| iterations | 19 |\n",
"| time_elapsed | 108 |\n",
"| total_timesteps | 38912 |\n",
"| train/ | |\n",
"| approx_kl | 0.014995611 |\n",
"| clip_fraction | 0.17 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -1.32 |\n",
"| explained_variance | 0.436 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 1.33 |\n",
"| n_updates | 180 |\n",
"| policy_gradient_loss | -0.031 |\n",
"| value_loss | 2.79 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 5.28 |\n",
"| ep_rew_mean | -1.2 |\n",
"| time/ | |\n",
"| fps | 360 |\n",
"| iterations | 20 |\n",
"| time_elapsed | 113 |\n",
"| total_timesteps | 40960 |\n",
"| train/ | |\n",
"| approx_kl | 0.018023107 |\n",
"| clip_fraction | 0.169 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -1.23 |\n",
"| explained_variance | 0.559 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.743 |\n",
"| n_updates | 190 |\n",
"| policy_gradient_loss | -0.0327 |\n",
"| value_loss | 1.24 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 6.22 |\n",
"| ep_rew_mean | -0.38 |\n",
"| time/ | |\n",
"| fps | 361 |\n",
"| iterations | 21 |\n",
"| time_elapsed | 119 |\n",
"| total_timesteps | 43008 |\n",
"| train/ | |\n",
"| approx_kl | 0.023376558 |\n",
"| clip_fraction | 0.29 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -1.07 |\n",
"| explained_variance | 0.628 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.502 |\n",
"| n_updates | 200 |\n",
"| policy_gradient_loss | -0.0451 |\n",
"| value_loss | 1.11 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 7.06 |\n",
"| ep_rew_mean | 0.26 |\n",
"| time/ | |\n",
"| fps | 362 |\n",
"| iterations | 22 |\n",
"| time_elapsed | 124 |\n",
"| total_timesteps | 45056 |\n",
"| train/ | |\n",
"| approx_kl | 0.023838695 |\n",
"| clip_fraction | 0.269 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -1.01 |\n",
"| explained_variance | 0.566 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.925 |\n",
"| n_updates | 210 |\n",
"| policy_gradient_loss | -0.0463 |\n",
"| value_loss | 1.71 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 7.93 |\n",
"| ep_rew_mean | 1.19 |\n",
"| time/ | |\n",
"| fps | 362 |\n",
"| iterations | 23 |\n",
"| time_elapsed | 129 |\n",
"| total_timesteps | 47104 |\n",
"| train/ | |\n",
"| approx_kl | 0.021363221 |\n",
"| clip_fraction | 0.229 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.906 |\n",
"| explained_variance | 0.594 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.819 |\n",
"| n_updates | 220 |\n",
"| policy_gradient_loss | -0.0426 |\n",
"| value_loss | 1.94 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 8.52 |\n",
"| ep_rew_mean | 1.98 |\n",
"| time/ | |\n",
"| fps | 363 |\n",
"| iterations | 24 |\n",
"| time_elapsed | 135 |\n",
"| total_timesteps | 49152 |\n",
"| train/ | |\n",
"| approx_kl | 0.022241611 |\n",
"| clip_fraction | 0.167 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.882 |\n",
"| explained_variance | 0.667 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.838 |\n",
"| n_updates | 230 |\n",
"| policy_gradient_loss | -0.0343 |\n",
"| value_loss | 1.73 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 8.9 |\n",
"| ep_rew_mean | 1.8 |\n",
"| time/ | |\n",
"| fps | 364 |\n",
"| iterations | 25 |\n",
"| time_elapsed | 140 |\n",
"| total_timesteps | 51200 |\n",
"| train/ | |\n",
"| approx_kl | 0.011297604 |\n",
"| clip_fraction | 0.111 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.859 |\n",
"| explained_variance | 0.763 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.5 |\n",
"| n_updates | 240 |\n",
"| policy_gradient_loss | -0.024 |\n",
"| value_loss | 1.35 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9.01 |\n",
"| ep_rew_mean | 2.23 |\n",
"| time/ | |\n",
"| fps | 364 |\n",
"| iterations | 26 |\n",
"| time_elapsed | 145 |\n",
"| total_timesteps | 53248 |\n",
"| train/ | |\n",
"| approx_kl | 0.010706454 |\n",
"| clip_fraction | 0.0958 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.84 |\n",
"| explained_variance | 0.486 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.872 |\n",
"| n_updates | 250 |\n",
"| policy_gradient_loss | -0.0237 |\n",
"| value_loss | 2.77 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9.75 |\n",
"| ep_rew_mean | 1.81 |\n",
"| time/ | |\n",
"| fps | 365 |\n",
"| iterations | 27 |\n",
"| time_elapsed | 151 |\n",
"| total_timesteps | 55296 |\n",
"| train/ | |\n",
"| approx_kl | 0.011905432 |\n",
"| clip_fraction | 0.12 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.785 |\n",
"| explained_variance | 0.838 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.121 |\n",
"| n_updates | 260 |\n",
"| policy_gradient_loss | -0.0206 |\n",
"| value_loss | 0.851 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9.07 |\n",
"| ep_rew_mean | 2.13 |\n",
"| time/ | |\n",
"| fps | 365 |\n",
"| iterations | 28 |\n",
"| time_elapsed | 156 |\n",
"| total_timesteps | 57344 |\n",
"| train/ | |\n",
"| approx_kl | 0.009603689 |\n",
"| clip_fraction | 0.0931 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.808 |\n",
"| explained_variance | 0.684 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.391 |\n",
"| n_updates | 270 |\n",
"| policy_gradient_loss | -0.0184 |\n",
"| value_loss | 1.63 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"------------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 8.87 |\n",
"| ep_rew_mean | 2.51 |\n",
"| time/ | |\n",
"| fps | 366 |\n",
"| iterations | 29 |\n",
"| time_elapsed | 162 |\n",
"| total_timesteps | 59392 |\n",
"| train/ | |\n",
"| approx_kl | 0.0071417904 |\n",
"| clip_fraction | 0.0738 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.736 |\n",
"| explained_variance | 0.826 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.83 |\n",
"| n_updates | 280 |\n",
"| policy_gradient_loss | -0.0131 |\n",
"| value_loss | 1.72 |\n",
"------------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9.02 |\n",
"| ep_rew_mean | 2.74 |\n",
"| time/ | |\n",
"| fps | 366 |\n",
"| iterations | 30 |\n",
"| time_elapsed | 167 |\n",
"| total_timesteps | 61440 |\n",
"| train/ | |\n",
"| approx_kl | 0.009269893 |\n",
"| clip_fraction | 0.0918 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.686 |\n",
"| explained_variance | 0.912 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.163 |\n",
"| n_updates | 290 |\n",
"| policy_gradient_loss | -0.0166 |\n",
"| value_loss | 0.537 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9.51 |\n",
"| ep_rew_mean | 2.29 |\n",
"| time/ | |\n",
"| fps | 367 |\n",
"| iterations | 31 |\n",
"| time_elapsed | 172 |\n",
"| total_timesteps | 63488 |\n",
"| train/ | |\n",
"| approx_kl | 0.010982089 |\n",
"| clip_fraction | 0.0863 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.657 |\n",
"| explained_variance | 0.946 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.339 |\n",
"| n_updates | 300 |\n",
"| policy_gradient_loss | -0.0149 |\n",
"| value_loss | 0.341 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 8.94 |\n",
"| ep_rew_mean | 2.7 |\n",
"| time/ | |\n",
"| fps | 368 |\n",
"| iterations | 32 |\n",
"| time_elapsed | 177 |\n",
"| total_timesteps | 65536 |\n",
"| train/ | |\n",
"| approx_kl | 0.01880536 |\n",
"| clip_fraction | 0.0622 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.667 |\n",
"| explained_variance | 0.778 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 1.13 |\n",
"| n_updates | 310 |\n",
"| policy_gradient_loss | -0.0107 |\n",
"| value_loss | 2.3 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 8.82 |\n",
"| ep_rew_mean | 2.6 |\n",
"| time/ | |\n",
"| fps | 369 |\n",
"| iterations | 33 |\n",
"| time_elapsed | 182 |\n",
"| total_timesteps | 67584 |\n",
"| train/ | |\n",
"| approx_kl | 0.013803964 |\n",
"| clip_fraction | 0.1 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.644 |\n",
"| explained_variance | 0.952 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.0438 |\n",
"| n_updates | 320 |\n",
"| policy_gradient_loss | -0.0186 |\n",
"| value_loss | 0.31 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9 |\n",
"| ep_rew_mean | 2.9 |\n",
"| time/ | |\n",
"| fps | 370 |\n",
"| iterations | 34 |\n",
"| time_elapsed | 188 |\n",
"| total_timesteps | 69632 |\n",
"| train/ | |\n",
"| approx_kl | 0.011061303 |\n",
"| clip_fraction | 0.0942 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.597 |\n",
"| explained_variance | 0.905 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.293 |\n",
"| n_updates | 330 |\n",
"| policy_gradient_loss | -0.0158 |\n",
"| value_loss | 0.61 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9.96 |\n",
"| ep_rew_mean | 2.6 |\n",
"| time/ | |\n",
"| fps | 370 |\n",
"| iterations | 35 |\n",
"| time_elapsed | 193 |\n",
"| total_timesteps | 71680 |\n",
"| train/ | |\n",
"| approx_kl | 0.016911192 |\n",
"| clip_fraction | 0.165 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.551 |\n",
"| explained_variance | 0.926 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.229 |\n",
"| n_updates | 340 |\n",
"| policy_gradient_loss | -0.0229 |\n",
"| value_loss | 0.519 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9.5 |\n",
"| ep_rew_mean | 3.32 |\n",
"| time/ | |\n",
"| fps | 370 |\n",
"| iterations | 36 |\n",
"| time_elapsed | 198 |\n",
"| total_timesteps | 73728 |\n",
"| train/ | |\n",
"| approx_kl | 0.012311206 |\n",
"| clip_fraction | 0.119 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.595 |\n",
"| explained_variance | 0.709 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 1.68 |\n",
"| n_updates | 350 |\n",
"| policy_gradient_loss | -0.0275 |\n",
"| value_loss | 1.95 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9.6 |\n",
"| ep_rew_mean | 3.3 |\n",
"| time/ | |\n",
"| fps | 370 |\n",
"| iterations | 37 |\n",
"| time_elapsed | 204 |\n",
"| total_timesteps | 75776 |\n",
"| train/ | |\n",
"| approx_kl | 0.059752032 |\n",
"| clip_fraction | 0.154 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.534 |\n",
"| explained_variance | 0.785 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.835 |\n",
"| n_updates | 360 |\n",
"| policy_gradient_loss | -0.0234 |\n",
"| value_loss | 1.46 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 10.8 |\n",
"| ep_rew_mean | 2.7 |\n",
"| time/ | |\n",
"| fps | 370 |\n",
"| iterations | 38 |\n",
"| time_elapsed | 209 |\n",
"| total_timesteps | 77824 |\n",
"| train/ | |\n",
"| approx_kl | 0.01475055 |\n",
"| clip_fraction | 0.0968 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.455 |\n",
"| explained_variance | 0.917 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.153 |\n",
"| n_updates | 370 |\n",
"| policy_gradient_loss | -0.019 |\n",
"| value_loss | 0.428 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 10.2 |\n",
"| ep_rew_mean | 3.01 |\n",
"| time/ | |\n",
"| fps | 370 |\n",
"| iterations | 39 |\n",
"| time_elapsed | 215 |\n",
"| total_timesteps | 79872 |\n",
"| train/ | |\n",
"| approx_kl | 0.012021113 |\n",
"| clip_fraction | 0.0851 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.558 |\n",
"| explained_variance | 0.702 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 1.8 |\n",
"| n_updates | 380 |\n",
"| policy_gradient_loss | -0.0284 |\n",
"| value_loss | 4.45 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9.83 |\n",
"| ep_rew_mean | 3.57 |\n",
"| time/ | |\n",
"| fps | 370 |\n",
"| iterations | 40 |\n",
"| time_elapsed | 220 |\n",
"| total_timesteps | 81920 |\n",
"| train/ | |\n",
"| approx_kl | 0.010166377 |\n",
"| clip_fraction | 0.0623 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.454 |\n",
"| explained_variance | 0.728 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 1.43 |\n",
"| n_updates | 390 |\n",
"| policy_gradient_loss | -0.0201 |\n",
"| value_loss | 2.21 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 10.3 |\n",
"| ep_rew_mean | 3.14 |\n",
"| time/ | |\n",
"| fps | 371 |\n",
"| iterations | 41 |\n",
"| time_elapsed | 226 |\n",
"| total_timesteps | 83968 |\n",
"| train/ | |\n",
"| approx_kl | 0.017603599 |\n",
"| clip_fraction | 0.0748 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.384 |\n",
"| explained_variance | 0.957 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.152 |\n",
"| n_updates | 400 |\n",
"| policy_gradient_loss | -0.0122 |\n",
"| value_loss | 0.286 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9.95 |\n",
"| ep_rew_mean | 3.81 |\n",
"| time/ | |\n",
"| fps | 371 |\n",
"| iterations | 42 |\n",
"| time_elapsed | 231 |\n",
"| total_timesteps | 86016 |\n",
"| train/ | |\n",
"| approx_kl | 0.028185518 |\n",
"| clip_fraction | 0.0954 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.383 |\n",
"| explained_variance | 0.811 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.412 |\n",
"| n_updates | 410 |\n",
"| policy_gradient_loss | -0.0224 |\n",
"| value_loss | 2.05 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 10.5 |\n",
"| ep_rew_mean | 3.57 |\n",
"| time/ | |\n",
"| fps | 371 |\n",
"| iterations | 43 |\n",
"| time_elapsed | 236 |\n",
"| total_timesteps | 88064 |\n",
"| train/ | |\n",
"| approx_kl | 0.02107103 |\n",
"| clip_fraction | 0.0702 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.375 |\n",
"| explained_variance | 0.732 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.156 |\n",
"| n_updates | 420 |\n",
"| policy_gradient_loss | -0.0342 |\n",
"| value_loss | 1.54 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9.76 |\n",
"| ep_rew_mean | 3.7 |\n",
"| time/ | |\n",
"| fps | 371 |\n",
"| iterations | 44 |\n",
"| time_elapsed | 242 |\n",
"| total_timesteps | 90112 |\n",
"| train/ | |\n",
"| approx_kl | 0.007913441 |\n",
"| clip_fraction | 0.0349 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.346 |\n",
"| explained_variance | 0.827 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 2.92 |\n",
"| n_updates | 430 |\n",
"| policy_gradient_loss | -0.00355 |\n",
"| value_loss | 2.2 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"---------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 25.8 |\n",
"| ep_rew_mean | -12.4 |\n",
"| time/ | |\n",
"| fps | 371 |\n",
"| iterations | 45 |\n",
"| time_elapsed | 247 |\n",
"| total_timesteps | 92160 |\n",
"| train/ | |\n",
"| approx_kl | 0.5432366 |\n",
"| clip_fraction | 0.109 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.316 |\n",
"| explained_variance | 0.958 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.231 |\n",
"| n_updates | 440 |\n",
"| policy_gradient_loss | -0.0068 |\n",
"| value_loss | 0.313 |\n",
"---------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 45.9 |\n",
"| ep_rew_mean | -32.4 |\n",
"| time/ | |\n",
"| fps | 372 |\n",
"| iterations | 46 |\n",
"| time_elapsed | 252 |\n",
"| total_timesteps | 94208 |\n",
"| train/ | |\n",
"| approx_kl | 0.008981178 |\n",
"| clip_fraction | 0.0144 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.11 |\n",
"| explained_variance | -0.0603 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 1.91 |\n",
"| n_updates | 450 |\n",
"| policy_gradient_loss | -0.00156 |\n",
"| value_loss | 17.2 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"------------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 62.7 |\n",
"| ep_rew_mean | -49.3 |\n",
"| time/ | |\n",
"| fps | 373 |\n",
"| iterations | 47 |\n",
"| time_elapsed | 257 |\n",
"| total_timesteps | 96256 |\n",
"| train/ | |\n",
"| approx_kl | 0.0025778997 |\n",
"| clip_fraction | 0.0184 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.165 |\n",
"| explained_variance | 0.396 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 19.1 |\n",
"| n_updates | 460 |\n",
"| policy_gradient_loss | -0.00601 |\n",
"| value_loss | 45.6 |\n",
"------------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 49.3 |\n",
"| ep_rew_mean | -35.6 |\n",
"| time/ | |\n",
"| fps | 373 |\n",
"| iterations | 48 |\n",
"| time_elapsed | 262 |\n",
"| total_timesteps | 98304 |\n",
"| train/ | |\n",
"| approx_kl | 0.014914533 |\n",
"| clip_fraction | 0.0816 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.272 |\n",
"| explained_variance | 0.394 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 33.5 |\n",
"| n_updates | 470 |\n",
"| policy_gradient_loss | -0.0164 |\n",
"| value_loss | 74.1 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"------------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 39.9 |\n",
"| ep_rew_mean | -26.3 |\n",
"| time/ | |\n",
"| fps | 374 |\n",
"| iterations | 49 |\n",
"| time_elapsed | 268 |\n",
"| total_timesteps | 100352 |\n",
"| train/ | |\n",
"| approx_kl | 0.0012997694 |\n",
"| clip_fraction | 0.026 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.24 |\n",
"| explained_variance | 0.465 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 44.5 |\n",
"| n_updates | 480 |\n",
"| policy_gradient_loss | -0.00881 |\n",
"| value_loss | 76.6 |\n",
"------------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"------------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 27 |\n",
"| ep_rew_mean | -13.3 |\n",
"| time/ | |\n",
"| fps | 374 |\n",
"| iterations | 50 |\n",
"| time_elapsed | 273 |\n",
"| total_timesteps | 102400 |\n",
"| train/ | |\n",
"| approx_kl | 0.0014951692 |\n",
"| clip_fraction | 0.037 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.25 |\n",
"| explained_variance | 0.594 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 30.2 |\n",
"| n_updates | 490 |\n",
"| policy_gradient_loss | -0.00958 |\n",
"| value_loss | 77.4 |\n",
"------------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"---------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 17.4 |\n",
"| ep_rew_mean | -3.57 |\n",
"| time/ | |\n",
"| fps | 374 |\n",
"| iterations | 51 |\n",
"| time_elapsed | 279 |\n",
"| total_timesteps | 104448 |\n",
"| train/ | |\n",
"| approx_kl | 0.0443189 |\n",
"| clip_fraction | 0.176 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.457 |\n",
"| explained_variance | 0.338 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 44.4 |\n",
"| n_updates | 500 |\n",
"| policy_gradient_loss | -0.0247 |\n",
"| value_loss | 70.8 |\n",
"---------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 13.3 |\n",
"| ep_rew_mean | 0.45 |\n",
"| time/ | |\n",
"| fps | 374 |\n",
"| iterations | 52 |\n",
"| time_elapsed | 284 |\n",
"| total_timesteps | 106496 |\n",
"| train/ | |\n",
"| approx_kl | 0.005663138 |\n",
"| clip_fraction | 0.069 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.463 |\n",
"| explained_variance | 0.558 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 14.7 |\n",
"| n_updates | 510 |\n",
"| policy_gradient_loss | -0.0117 |\n",
"| value_loss | 36.8 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 19.8 |\n",
"| ep_rew_mean | -5.92 |\n",
"| time/ | |\n",
"| fps | 374 |\n",
"| iterations | 53 |\n",
"| time_elapsed | 289 |\n",
"| total_timesteps | 108544 |\n",
"| train/ | |\n",
"| approx_kl | 0.020181399 |\n",
"| clip_fraction | 0.176 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.499 |\n",
"| explained_variance | 0.182 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 3.45 |\n",
"| n_updates | 520 |\n",
"| policy_gradient_loss | -0.0254 |\n",
"| value_loss | 9.2 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 11.4 |\n",
"| ep_rew_mean | 2.26 |\n",
"| time/ | |\n",
"| fps | 374 |\n",
"| iterations | 54 |\n",
"| time_elapsed | 295 |\n",
"| total_timesteps | 110592 |\n",
"| train/ | |\n",
"| approx_kl | 0.009738399 |\n",
"| clip_fraction | 0.108 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.538 |\n",
"| explained_variance | 0.891 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 1.88 |\n",
"| n_updates | 530 |\n",
"| policy_gradient_loss | -0.00573 |\n",
"| value_loss | 22 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 10.4 |\n",
"| ep_rew_mean | 3.06 |\n",
"| time/ | |\n",
"| fps | 375 |\n",
"| iterations | 55 |\n",
"| time_elapsed | 300 |\n",
"| total_timesteps | 112640 |\n",
"| train/ | |\n",
"| approx_kl | 0.01536967 |\n",
"| clip_fraction | 0.191 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.49 |\n",
"| explained_variance | 0.315 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.971 |\n",
"| n_updates | 540 |\n",
"| policy_gradient_loss | -0.0219 |\n",
"| value_loss | 6.46 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"---------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 10.8 |\n",
"| ep_rew_mean | 1.63 |\n",
"| time/ | |\n",
"| fps | 375 |\n",
"| iterations | 56 |\n",
"| time_elapsed | 305 |\n",
"| total_timesteps | 114688 |\n",
"| train/ | |\n",
"| approx_kl | 0.0688808 |\n",
"| clip_fraction | 0.264 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.496 |\n",
"| explained_variance | 0.838 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.205 |\n",
"| n_updates | 550 |\n",
"| policy_gradient_loss | -0.0406 |\n",
"| value_loss | 0.836 |\n",
"---------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 10.3 |\n",
"| ep_rew_mean | 2.98 |\n",
"| time/ | |\n",
"| fps | 375 |\n",
"| iterations | 57 |\n",
"| time_elapsed | 311 |\n",
"| total_timesteps | 116736 |\n",
"| train/ | |\n",
"| approx_kl | 0.04210388 |\n",
"| clip_fraction | 0.326 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.534 |\n",
"| explained_variance | 0.734 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.907 |\n",
"| n_updates | 560 |\n",
"| policy_gradient_loss | -0.0421 |\n",
"| value_loss | 1.91 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 10.3 |\n",
"| ep_rew_mean | 3.28 |\n",
"| time/ | |\n",
"| fps | 375 |\n",
"| iterations | 58 |\n",
"| time_elapsed | 316 |\n",
"| total_timesteps | 118784 |\n",
"| train/ | |\n",
"| approx_kl | 0.051502623 |\n",
"| clip_fraction | 0.224 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.434 |\n",
"| explained_variance | 0.875 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.404 |\n",
"| n_updates | 570 |\n",
"| policy_gradient_loss | -0.0345 |\n",
"| value_loss | 0.827 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 10.2 |\n",
"| ep_rew_mean | 3.78 |\n",
"| time/ | |\n",
"| fps | 375 |\n",
"| iterations | 59 |\n",
"| time_elapsed | 322 |\n",
"| total_timesteps | 120832 |\n",
"| train/ | |\n",
"| approx_kl | 0.07014565 |\n",
"| clip_fraction | 0.165 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.391 |\n",
"| explained_variance | 0.903 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.145 |\n",
"| n_updates | 580 |\n",
"| policy_gradient_loss | -0.0254 |\n",
"| value_loss | 0.681 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 16.4 |\n",
"| ep_rew_mean | -3.11 |\n",
"| time/ | |\n",
"| fps | 375 |\n",
"| iterations | 60 |\n",
"| time_elapsed | 327 |\n",
"| total_timesteps | 122880 |\n",
"| train/ | |\n",
"| approx_kl | 0.118292876 |\n",
"| clip_fraction | 0.151 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.466 |\n",
"| explained_variance | 0.952 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.0778 |\n",
"| n_updates | 590 |\n",
"| policy_gradient_loss | -0.0214 |\n",
"| value_loss | 0.316 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 12.5 |\n",
"| ep_rew_mean | 1.05 |\n",
"| time/ | |\n",
"| fps | 375 |\n",
"| iterations | 61 |\n",
"| time_elapsed | 333 |\n",
"| total_timesteps | 124928 |\n",
"| train/ | |\n",
"| approx_kl | 0.061776154 |\n",
"| clip_fraction | 0.321 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.665 |\n",
"| explained_variance | 0.401 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 9.22 |\n",
"| n_updates | 600 |\n",
"| policy_gradient_loss | -0.0454 |\n",
"| value_loss | 17 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 10.8 |\n",
"| ep_rew_mean | 2.8 |\n",
"| time/ | |\n",
"| fps | 374 |\n",
"| iterations | 62 |\n",
"| time_elapsed | 338 |\n",
"| total_timesteps | 126976 |\n",
"| train/ | |\n",
"| approx_kl | 0.01477613 |\n",
"| clip_fraction | 0.153 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.579 |\n",
"| explained_variance | 0.682 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 6.12 |\n",
"| n_updates | 610 |\n",
"| policy_gradient_loss | -0.027 |\n",
"| value_loss | 10.5 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 11.4 |\n",
"| ep_rew_mean | 2.68 |\n",
"| time/ | |\n",
"| fps | 374 |\n",
"| iterations | 63 |\n",
"| time_elapsed | 344 |\n",
"| total_timesteps | 129024 |\n",
"| train/ | |\n",
"| approx_kl | 0.015770137 |\n",
"| clip_fraction | 0.128 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.507 |\n",
"| explained_variance | 0.869 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 2.13 |\n",
"| n_updates | 620 |\n",
"| policy_gradient_loss | -0.0253 |\n",
"| value_loss | 5.99 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 10.2 |\n",
"| ep_rew_mean | 3.48 |\n",
"| time/ | |\n",
"| fps | 375 |\n",
"| iterations | 64 |\n",
"| time_elapsed | 349 |\n",
"| total_timesteps | 131072 |\n",
"| train/ | |\n",
"| approx_kl | 0.007838536 |\n",
"| clip_fraction | 0.0755 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.446 |\n",
"| explained_variance | 0.749 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 11.9 |\n",
"| n_updates | 630 |\n",
"| policy_gradient_loss | -0.0106 |\n",
"| value_loss | 9.21 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 10 |\n",
"| ep_rew_mean | 3.8 |\n",
"| time/ | |\n",
"| fps | 375 |\n",
"| iterations | 65 |\n",
"| time_elapsed | 354 |\n",
"| total_timesteps | 133120 |\n",
"| train/ | |\n",
"| approx_kl | 0.035350725 |\n",
"| clip_fraction | 0.101 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.368 |\n",
"| explained_variance | 0.867 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.388 |\n",
"| n_updates | 640 |\n",
"| policy_gradient_loss | -0.0213 |\n",
"| value_loss | 1.11 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 14.6 |\n",
"| ep_rew_mean | -0.59 |\n",
"| time/ | |\n",
"| fps | 375 |\n",
"| iterations | 66 |\n",
"| time_elapsed | 360 |\n",
"| total_timesteps | 135168 |\n",
"| train/ | |\n",
"| approx_kl | 0.042963736 |\n",
"| clip_fraction | 0.0817 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.321 |\n",
"| explained_variance | 0.941 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.00919 |\n",
"| n_updates | 650 |\n",
"| policy_gradient_loss | -0.0127 |\n",
"| value_loss | 0.325 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 10.3 |\n",
"| ep_rew_mean | 3.64 |\n",
"| time/ | |\n",
"| fps | 374 |\n",
"| iterations | 67 |\n",
"| time_elapsed | 366 |\n",
"| total_timesteps | 137216 |\n",
"| train/ | |\n",
"| approx_kl | 0.08396668 |\n",
"| clip_fraction | 0.184 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.409 |\n",
"| explained_variance | 0.803 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 2.09 |\n",
"| n_updates | 660 |\n",
"| policy_gradient_loss | -0.0203 |\n",
"| value_loss | 8.99 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 19.8 |\n",
"| ep_rew_mean | -9.15 |\n",
"| time/ | |\n",
"| fps | 374 |\n",
"| iterations | 68 |\n",
"| time_elapsed | 371 |\n",
"| total_timesteps | 139264 |\n",
"| train/ | |\n",
"| approx_kl | 0.15326424 |\n",
"| clip_fraction | 0.199 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.39 |\n",
"| explained_variance | 0.894 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.122 |\n",
"| n_updates | 670 |\n",
"| policy_gradient_loss | -0.0201 |\n",
"| value_loss | 0.415 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9.38 |\n",
"| ep_rew_mean | 3.18 |\n",
"| time/ | |\n",
"| fps | 374 |\n",
"| iterations | 69 |\n",
"| time_elapsed | 376 |\n",
"| total_timesteps | 141312 |\n",
"| train/ | |\n",
"| approx_kl | 0.21504487 |\n",
"| clip_fraction | 0.411 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.483 |\n",
"| explained_variance | 0.408 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 6.24 |\n",
"| n_updates | 680 |\n",
"| policy_gradient_loss | -0.0591 |\n",
"| value_loss | 18.4 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9.98 |\n",
"| ep_rew_mean | 3.8 |\n",
"| time/ | |\n",
"| fps | 375 |\n",
"| iterations | 70 |\n",
"| time_elapsed | 382 |\n",
"| total_timesteps | 143360 |\n",
"| train/ | |\n",
"| approx_kl | 0.050312966 |\n",
"| clip_fraction | 0.0584 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.341 |\n",
"| explained_variance | 0.707 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.0602 |\n",
"| n_updates | 690 |\n",
"| policy_gradient_loss | -0.00661 |\n",
"| value_loss | 0.845 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 11.7 |\n",
"| ep_rew_mean | 2.07 |\n",
"| time/ | |\n",
"| fps | 375 |\n",
"| iterations | 71 |\n",
"| time_elapsed | 387 |\n",
"| total_timesteps | 145408 |\n",
"| train/ | |\n",
"| approx_kl | 0.018565401 |\n",
"| clip_fraction | 0.0643 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.322 |\n",
"| explained_variance | 0.977 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.348 |\n",
"| n_updates | 700 |\n",
"| policy_gradient_loss | -0.0112 |\n",
"| value_loss | 0.175 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 10.2 |\n",
"| ep_rew_mean | 3.47 |\n",
"| time/ | |\n",
"| fps | 375 |\n",
"| iterations | 72 |\n",
"| time_elapsed | 392 |\n",
"| total_timesteps | 147456 |\n",
"| train/ | |\n",
"| approx_kl | 0.10186449 |\n",
"| clip_fraction | 0.19 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.395 |\n",
"| explained_variance | 0.47 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 1.66 |\n",
"| n_updates | 710 |\n",
"| policy_gradient_loss | -0.0419 |\n",
"| value_loss | 4.71 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 10.2 |\n",
"| ep_rew_mean | 3.72 |\n",
"| time/ | |\n",
"| fps | 375 |\n",
"| iterations | 73 |\n",
"| time_elapsed | 398 |\n",
"| total_timesteps | 149504 |\n",
"| train/ | |\n",
"| approx_kl | 0.01903234 |\n",
"| clip_fraction | 0.0737 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.333 |\n",
"| explained_variance | 0.723 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 2.71 |\n",
"| n_updates | 720 |\n",
"| policy_gradient_loss | -0.022 |\n",
"| value_loss | 2.97 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9.94 |\n",
"| ep_rew_mean | 3.92 |\n",
"| time/ | |\n",
"| fps | 375 |\n",
"| iterations | 74 |\n",
"| time_elapsed | 403 |\n",
"| total_timesteps | 151552 |\n",
"| train/ | |\n",
"| approx_kl | 0.026403807 |\n",
"| clip_fraction | 0.065 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.322 |\n",
"| explained_variance | 0.934 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.24 |\n",
"| n_updates | 730 |\n",
"| policy_gradient_loss | -0.00481 |\n",
"| value_loss | 0.481 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 10.2 |\n",
"| ep_rew_mean | 3.81 |\n",
"| time/ | |\n",
"| fps | 376 |\n",
"| iterations | 75 |\n",
"| time_elapsed | 408 |\n",
"| total_timesteps | 153600 |\n",
"| train/ | |\n",
"| approx_kl | 0.011051587 |\n",
"| clip_fraction | 0.0593 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.324 |\n",
"| explained_variance | 0.984 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.00908 |\n",
"| n_updates | 740 |\n",
"| policy_gradient_loss | -0.00591 |\n",
"| value_loss | 0.113 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 10.2 |\n",
"| ep_rew_mean | 3.63 |\n",
"| time/ | |\n",
"| fps | 376 |\n",
"| iterations | 76 |\n",
"| time_elapsed | 413 |\n",
"| total_timesteps | 155648 |\n",
"| train/ | |\n",
"| approx_kl | 0.00972967 |\n",
"| clip_fraction | 0.0544 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.339 |\n",
"| explained_variance | 0.922 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.0226 |\n",
"| n_updates | 750 |\n",
"| policy_gradient_loss | -0.00438 |\n",
"| value_loss | 0.758 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9.92 |\n",
"| ep_rew_mean | 3.86 |\n",
"| time/ | |\n",
"| fps | 376 |\n",
"| iterations | 77 |\n",
"| time_elapsed | 418 |\n",
"| total_timesteps | 157696 |\n",
"| train/ | |\n",
"| approx_kl | 0.013084366 |\n",
"| clip_fraction | 0.0481 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.348 |\n",
"| explained_variance | 0.812 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.0736 |\n",
"| n_updates | 760 |\n",
"| policy_gradient_loss | -0.0157 |\n",
"| value_loss | 1.47 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 5.05 |\n",
"| ep_rew_mean | -2.29 |\n",
"| time/ | |\n",
"| fps | 376 |\n",
"| iterations | 78 |\n",
"| time_elapsed | 424 |\n",
"| total_timesteps | 159744 |\n",
"| train/ | |\n",
"| approx_kl | 0.42402273 |\n",
"| clip_fraction | 0.215 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.49 |\n",
"| explained_variance | 0.983 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.00926 |\n",
"| n_updates | 770 |\n",
"| policy_gradient_loss | -0.0284 |\n",
"| value_loss | 0.109 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 4.53 |\n",
"| ep_rew_mean | -2.17 |\n",
"| time/ | |\n",
"| fps | 376 |\n",
"| iterations | 79 |\n",
"| time_elapsed | 429 |\n",
"| total_timesteps | 161792 |\n",
"| train/ | |\n",
"| approx_kl | 0.27915305 |\n",
"| clip_fraction | 0.138 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.376 |\n",
"| explained_variance | 0.503 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.373 |\n",
"| n_updates | 780 |\n",
"| policy_gradient_loss | -0.00444 |\n",
"| value_loss | 5.03 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 4.38 |\n",
"| ep_rew_mean | -1.74 |\n",
"| time/ | |\n",
"| fps | 376 |\n",
"| iterations | 80 |\n",
"| time_elapsed | 435 |\n",
"| total_timesteps | 163840 |\n",
"| train/ | |\n",
"| approx_kl | 0.019562341 |\n",
"| clip_fraction | 0.0921 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.457 |\n",
"| explained_variance | 0.562 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.638 |\n",
"| n_updates | 790 |\n",
"| policy_gradient_loss | -0.0128 |\n",
"| value_loss | 0.895 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 4.34 |\n",
"| ep_rew_mean | -1.7 |\n",
"| time/ | |\n",
"| fps | 376 |\n",
"| iterations | 81 |\n",
"| time_elapsed | 440 |\n",
"| total_timesteps | 165888 |\n",
"| train/ | |\n",
"| approx_kl | 0.18844175 |\n",
"| clip_fraction | 0.147 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.279 |\n",
"| explained_variance | 0.102 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 1.03 |\n",
"| n_updates | 800 |\n",
"| policy_gradient_loss | 0.013 |\n",
"| value_loss | 11.4 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 5.54 |\n",
"| ep_rew_mean | -0.66 |\n",
"| time/ | |\n",
"| fps | 376 |\n",
"| iterations | 82 |\n",
"| time_elapsed | 445 |\n",
"| total_timesteps | 167936 |\n",
"| train/ | |\n",
"| approx_kl | 0.06374202 |\n",
"| clip_fraction | 0.21 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.398 |\n",
"| explained_variance | 0.566 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.599 |\n",
"| n_updates | 810 |\n",
"| policy_gradient_loss | -0.0207 |\n",
"| value_loss | 1.59 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 7.65 |\n",
"| ep_rew_mean | 1.61 |\n",
"| time/ | |\n",
"| fps | 376 |\n",
"| iterations | 83 |\n",
"| time_elapsed | 451 |\n",
"| total_timesteps | 169984 |\n",
"| train/ | |\n",
"| approx_kl | 0.06125373 |\n",
"| clip_fraction | 0.251 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.434 |\n",
"| explained_variance | 0.299 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 1.27 |\n",
"| n_updates | 820 |\n",
"| policy_gradient_loss | -0.0399 |\n",
"| value_loss | 3.7 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 8.86 |\n",
"| ep_rew_mean | 2.46 |\n",
"| time/ | |\n",
"| fps | 376 |\n",
"| iterations | 84 |\n",
"| time_elapsed | 457 |\n",
"| total_timesteps | 172032 |\n",
"| train/ | |\n",
"| approx_kl | 0.047695376 |\n",
"| clip_fraction | 0.171 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.387 |\n",
"| explained_variance | 0.336 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 1.72 |\n",
"| n_updates | 830 |\n",
"| policy_gradient_loss | -0.0407 |\n",
"| value_loss | 3.91 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9.84 |\n",
"| ep_rew_mean | 3.66 |\n",
"| time/ | |\n",
"| fps | 376 |\n",
"| iterations | 85 |\n",
"| time_elapsed | 462 |\n",
"| total_timesteps | 174080 |\n",
"| train/ | |\n",
"| approx_kl | 0.13684572 |\n",
"| clip_fraction | 0.133 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.346 |\n",
"| explained_variance | 0.625 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 1.13 |\n",
"| n_updates | 840 |\n",
"| policy_gradient_loss | -0.0231 |\n",
"| value_loss | 2.36 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 25.5 |\n",
"| ep_rew_mean | -12.2 |\n",
"| time/ | |\n",
"| fps | 376 |\n",
"| iterations | 86 |\n",
"| time_elapsed | 467 |\n",
"| total_timesteps | 176128 |\n",
"| train/ | |\n",
"| approx_kl | 0.36170986 |\n",
"| clip_fraction | 0.332 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.473 |\n",
"| explained_variance | 0.919 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.364 |\n",
"| n_updates | 850 |\n",
"| policy_gradient_loss | -0.0374 |\n",
"| value_loss | 0.278 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 17.2 |\n",
"| ep_rew_mean | -4.45 |\n",
"| time/ | |\n",
"| fps | 377 |\n",
"| iterations | 87 |\n",
"| time_elapsed | 472 |\n",
"| total_timesteps | 178176 |\n",
"| train/ | |\n",
"| approx_kl | 0.102079734 |\n",
"| clip_fraction | 0.34 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.613 |\n",
"| explained_variance | 0.0593 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 5.95 |\n",
"| n_updates | 860 |\n",
"| policy_gradient_loss | -0.04 |\n",
"| value_loss | 20.6 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 14.5 |\n",
"| ep_rew_mean | -2.36 |\n",
"| time/ | |\n",
"| fps | 378 |\n",
"| iterations | 88 |\n",
"| time_elapsed | 476 |\n",
"| total_timesteps | 180224 |\n",
"| train/ | |\n",
"| approx_kl | 0.042367905 |\n",
"| clip_fraction | 0.264 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.799 |\n",
"| explained_variance | 0.69 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 4.22 |\n",
"| n_updates | 870 |\n",
"| policy_gradient_loss | -0.0265 |\n",
"| value_loss | 16.7 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 12.3 |\n",
"| ep_rew_mean | 0.46 |\n",
"| time/ | |\n",
"| fps | 379 |\n",
"| iterations | 89 |\n",
"| time_elapsed | 480 |\n",
"| total_timesteps | 182272 |\n",
"| train/ | |\n",
"| approx_kl | 0.02418825 |\n",
"| clip_fraction | 0.262 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.744 |\n",
"| explained_variance | 0.75 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 3.08 |\n",
"| n_updates | 880 |\n",
"| policy_gradient_loss | -0.026 |\n",
"| value_loss | 16.2 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 11 |\n",
"| ep_rew_mean | 2.19 |\n",
"| time/ | |\n",
"| fps | 380 |\n",
"| iterations | 90 |\n",
"| time_elapsed | 484 |\n",
"| total_timesteps | 184320 |\n",
"| train/ | |\n",
"| approx_kl | 0.024347484 |\n",
"| clip_fraction | 0.271 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.669 |\n",
"| explained_variance | 0.675 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 4.76 |\n",
"| n_updates | 890 |\n",
"| policy_gradient_loss | -0.0243 |\n",
"| value_loss | 10.1 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 12.1 |\n",
"| ep_rew_mean | 1.35 |\n",
"| time/ | |\n",
"| fps | 381 |\n",
"| iterations | 91 |\n",
"| time_elapsed | 488 |\n",
"| total_timesteps | 186368 |\n",
"| train/ | |\n",
"| approx_kl | 0.02668532 |\n",
"| clip_fraction | 0.256 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.576 |\n",
"| explained_variance | 0.706 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.797 |\n",
"| n_updates | 900 |\n",
"| policy_gradient_loss | -0.0378 |\n",
"| value_loss | 1.6 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 11 |\n",
"| ep_rew_mean | 2.55 |\n",
"| time/ | |\n",
"| fps | 382 |\n",
"| iterations | 92 |\n",
"| time_elapsed | 492 |\n",
"| total_timesteps | 188416 |\n",
"| train/ | |\n",
"| approx_kl | 0.017341316 |\n",
"| clip_fraction | 0.0919 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.681 |\n",
"| explained_variance | 0.865 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 9.01 |\n",
"| n_updates | 910 |\n",
"| policy_gradient_loss | -0.0076 |\n",
"| value_loss | 25.5 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 10.2 |\n",
"| ep_rew_mean | 2.27 |\n",
"| time/ | |\n",
"| fps | 383 |\n",
"| iterations | 93 |\n",
"| time_elapsed | 496 |\n",
"| total_timesteps | 190464 |\n",
"| train/ | |\n",
"| approx_kl | 0.070169866 |\n",
"| clip_fraction | 0.224 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.476 |\n",
"| explained_variance | 0.791 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 13 |\n",
"| n_updates | 920 |\n",
"| policy_gradient_loss | -0.0203 |\n",
"| value_loss | 5.41 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9.33 |\n",
"| ep_rew_mean | 3.17 |\n",
"| time/ | |\n",
"| fps | 384 |\n",
"| iterations | 94 |\n",
"| time_elapsed | 500 |\n",
"| total_timesteps | 192512 |\n",
"| train/ | |\n",
"| approx_kl | 0.062242664 |\n",
"| clip_fraction | 0.184 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.504 |\n",
"| explained_variance | 0.426 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 1.94 |\n",
"| n_updates | 930 |\n",
"| policy_gradient_loss | -0.0392 |\n",
"| value_loss | 6.22 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9.85 |\n",
"| ep_rew_mean | 3.79 |\n",
"| time/ | |\n",
"| fps | 385 |\n",
"| iterations | 95 |\n",
"| time_elapsed | 504 |\n",
"| total_timesteps | 194560 |\n",
"| train/ | |\n",
"| approx_kl | 0.042221397 |\n",
"| clip_fraction | 0.0712 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.328 |\n",
"| explained_variance | 0.893 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.236 |\n",
"| n_updates | 940 |\n",
"| policy_gradient_loss | -0.0162 |\n",
"| value_loss | 0.692 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 8.17 |\n",
"| ep_rew_mean | 2.09 |\n",
"| time/ | |\n",
"| fps | 387 |\n",
"| iterations | 96 |\n",
"| time_elapsed | 507 |\n",
"| total_timesteps | 196608 |\n",
"| train/ | |\n",
"| approx_kl | 0.036962293 |\n",
"| clip_fraction | 0.154 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.374 |\n",
"| explained_variance | 0.981 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | -0.0447 |\n",
"| n_updates | 950 |\n",
"| policy_gradient_loss | -0.0142 |\n",
"| value_loss | 0.151 |\n",
"-----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 9.38 |\n",
"| ep_rew_mean | 3.36 |\n",
"| time/ | |\n",
"| fps | 387 |\n",
"| iterations | 97 |\n",
"| time_elapsed | 512 |\n",
"| total_timesteps | 198656 |\n",
"| train/ | |\n",
"| approx_kl | 0.06385146 |\n",
"| clip_fraction | 0.245 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.366 |\n",
"| explained_variance | 0.553 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 1.4 |\n",
"| n_updates | 960 |\n",
"| policy_gradient_loss | -0.0399 |\n",
"| value_loss | 2.79 |\n",
"----------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------\n",
"| rollout/ | |\n",
"| ep_len_mean | 24.8 |\n",
"| ep_rew_mean | -11.4 |\n",
"| time/ | |\n",
"| fps | 389 |\n",
"| iterations | 98 |\n",
"| time_elapsed | 515 |\n",
"| total_timesteps | 200704 |\n",
"| train/ | |\n",
"| approx_kl | 0.26313344 |\n",
"| clip_fraction | 0.178 |\n",
"| clip_range | 0.2 |\n",
"| entropy_loss | -0.363 |\n",
"| explained_variance | 0.898 |\n",
"| learning_rate | 0.0003 |\n",
"| loss | 0.247 |\n",
"| n_updates | 970 |\n",
"| policy_gradient_loss | -0.0285 |\n",
"| value_loss | 0.521 |\n",
"----------------------------------------\n"
]
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
],
"text/plain": []
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">\n",
"</pre>\n"
],
"text/plain": [
"\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "528f9bb0103a402d83eb31e85dd97caf",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"VBox(children=(Label(value='0.438 MB of 0.438 MB uploaded\\r'), FloatProgress(value=1.0, max=1.0)))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"wandb: WARNING Source type is set to 'repo' but some required information is missing from the environment. A job will not be created from this run. See https://docs.wandb.ai/guides/launch/create-job\n"
]
},
{
"data": {
"text/html": [
"<style>\n",
" table.wandb td:nth-child(1) { padding: 0 10px; text-align: left ; width: auto;} td:nth-child(2) {text-align: left ; width: 100%}\n",
" .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
" .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
" </style>\n",
"<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>global_step</td><td>▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███</td></tr><tr><td>rollout/ep_len_mean</td><td>▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▄█▅▄▂▂▃▂▂▄▂▂▂▁▁▂▂▃▂▂▂▄</td></tr><tr><td>rollout/ep_rew_mean</td><td>▇▇▇▇▇▇▇▇▇█████████▅▁▅▆██▇██▆███▇▇██▇███▅</td></tr><tr><td>time/fps</td><td>█▂▂▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃</td></tr><tr><td>train/approx_kl</td><td>▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▂▂▁▂▃▂▁▁█▁▂▃▂▁▂▂▅</td></tr><tr><td>train/clip_fraction</td><td>█▇▇▅▅▆▅▄▆▄▂▂▂▂▃▂▂▂▁▂▄▄▅▅▆▃▂▄▂▂▂▄▂▅▃▅▅▅▂▄</td></tr><tr><td>train/clip_range</td><td>▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>train/entropy_loss</td><td>▁▁▂▃▃▄▅▅▆▆▆▇▇▇▇▇▇▇██▇▇▇▇▇▇█▇▇▇█▇▇▇▇▆▇▇█▇</td></tr><tr><td>train/explained_variance</td><td>▃▄▅▆▆▆▆▄▆▆▅▇█▇▆██▆▁▄▄▃▇▇▄▇█▇▆▆██▅▃▆▆▆▇▇▇</td></tr><tr><td>train/learning_rate</td><td>▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>train/loss</td><td>▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▆█▂▁▁▂▁▁▁▁▁▁▁▁▁▁▂▂▃▁▁</td></tr><tr><td>train/policy_gradient_loss</td><td>▁▂▂▄▄▄▄▅▃▄▆▇▇▆▅▆▇▄█▆▆▅▄▄▃▅▇▆▇▆█▅▇▄▆▅▆▆▆▅</td></tr><tr><td>train/value_loss</td><td>▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃██▂▁▁▃▂▁▁▁▁▁▁▁▁▁▃▂▂▁▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>global_step</td><td>200704</td></tr><tr><td>rollout/ep_len_mean</td><td>24.77</td></tr><tr><td>rollout/ep_rew_mean</td><td>-11.45</td></tr><tr><td>time/fps</td><td>389.0</td></tr><tr><td>train/approx_kl</td><td>0.26313</td></tr><tr><td>train/clip_fraction</td><td>0.17793</td></tr><tr><td>train/clip_range</td><td>0.2</td></tr><tr><td>train/entropy_loss</td><td>-0.36315</td></tr><tr><td>train/explained_variance</td><td>0.89819</td></tr><tr><td>train/learning_rate</td><td>0.0003</td></tr><tr><td>train/loss</td><td>0.24744</td></tr><tr><td>train/policy_gradient_loss</td><td>-0.02851</td></tr><tr><td>train/value_loss</td><td>0.52073</td></tr></table><br/></div></div>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" View run <strong style=\"color:#cdcd00\">confused-meadow-3</strong> at: <a href='https://wandb.ai/fulltime/wordle/runs/ot2i0b8h' target=\"_blank\">https://wandb.ai/fulltime/wordle/runs/ot2i0b8h</a><br/>Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 2 other file(s)"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Find logs at: <code>./wandb/run-20240319_162920-ot2i0b8h/logs</code>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"model = PPO(config[\"policy_type\"], env=env, verbose=0, tensorboard_log=f\"runs/{run.id}\")\n",
2024-03-19 18:52:10 +00:00
"\n",
"# Train for a certain number of timesteps\n",
2024-03-19 23:49:01 +00:00
"model.learn(\n",
" total_timesteps=config[\"total_timesteps\"],\n",
" callback=WandbCallback(\n",
" model_save_path=f\"models/{run.id}\",\n",
" verbose=2,\n",
" ),\n",
"\tprogress_bar=True\n",
")\n",
"\n",
"run.finish()\n",
2024-03-19 18:52:10 +00:00
"\n",
"# Save the model\n",
"model.save(\"wordle_ppo_model\")"
]
},
{
"cell_type": "code",
2024-03-19 23:49:01 +00:00
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"model.save(\"wordle_ppo_model\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
2024-03-19 18:52:10 +00:00
"metadata": {},
"outputs": [],
2024-03-19 23:49:01 +00:00
"source": [
"model = PPO.load(\"wordle_ppo_model\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1000/1000 [00:20<00:00, 49.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-6.703\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"rewards = 0\n",
"for i in tqdm(range(1000)):\n",
" obs, _ = env.reset()\n",
" done = False\n",
" while not done:\n",
" action, _ = model.predict(obs)\n",
" obs, reward, done, _, info = env.step(action)\n",
" rewards += reward\n",
"print(rewards / 1000)"
]
2024-03-19 18:52:10 +00:00
}
],
"metadata": {
"kernelspec": {
"display_name": "env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2024-03-19 23:49:01 +00:00
"version": "3.8.10"
2024-03-19 18:52:10 +00:00
}
},
"nbformat": 4,
"nbformat_minor": 2
}