{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "def load_valid_words(file_path='wordle_words.txt'):\n", " \"\"\"\n", " Load valid five-letter words from a specified text file.\n", "\n", " Parameters:\n", " - file_path (str): The path to the text file containing valid words.\n", "\n", " Returns:\n", " - list[str]: A list of valid words loaded from the file.\n", " \"\"\"\n", " with open(file_path, 'r') as file:\n", " valid_words = [line.strip() for line in file if len(line.strip()) == 5]\n", " return valid_words" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from stable_baselines3 import PPO # Or any other suitable RL algorithm\n", "from stable_baselines3.common.env_checker import check_env\n", "from letter_guess import LetterGuessingEnv\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "env = LetterGuessingEnv(valid_words=load_valid_words()) # Make sure to load your valid words\n", "check_env(env) # Optional: Verify the environment is compatible with SB3" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import wandb\n", "from wandb.integration.sb3 import WandbCallback" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mltcptgeneral\u001b[0m (\u001b[33mfulltime\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" ] }, { "data": { "text/html": [ "Tracking run with wandb version 0.16.4" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Run data is saved locally in /home/art/cse151b-final-project/wandb/run-20240319_162920-ot2i0b8h" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Syncing run confused-meadow-3 to Weights & Biases (docs)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View project at https://wandb.ai/fulltime/wordle" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run at https://wandb.ai/fulltime/wordle/runs/ot2i0b8h" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "config = {\n", " \"policy_type\": \"MlpPolicy\",\n", " \"total_timesteps\": 200_000\n", "}\n", "run = wandb.init(\n", " project=\"wordle\",\n", " config=config,\n", " sync_tensorboard=True\n", ")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Using cuda device\n", "Wrapping the env with a `Monitor` wrapper\n", "Wrapping the env in a DummyVecEnv.\n", "Logging to runs/ot2i0b8h/PPO_1\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "585e7545478a485aa91c487b8630840f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Output()" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 2.48 |\n", "| ep_rew_mean | -3.7 |\n", "| time/ | |\n", "| fps | 465 |\n", "| iterations | 1 |\n", "| time_elapsed | 4 |\n", "| total_timesteps | 2048 |\n", "---------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 2.49 |\n", "| ep_rew_mean | -3.65 |\n", "| time/ | |\n", "| fps | 395 |\n", "| iterations | 2 |\n", "| time_elapsed | 10 |\n", "| total_timesteps | 4096 |\n", "| train/ | |\n", "| approx_kl | 0.04501068 |\n", "| clip_fraction | 0.427 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -3.23 |\n", "| explained_variance | 0.189 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.205 |\n", "| n_updates | 10 |\n", "| policy_gradient_loss | -0.0667 |\n", "| value_loss | 0.997 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 2.84 |\n", "| ep_rew_mean | -3.4 |\n", "| time/ | |\n", "| fps | 381 |\n", "| iterations | 3 |\n", "| time_elapsed | 16 |\n", "| total_timesteps | 6144 |\n", "| train/ | |\n", "| approx_kl | 0.01765968 |\n", "| clip_fraction | 0.319 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -3.17 |\n", "| explained_variance | 0.481 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.123 |\n", "| n_updates | 20 |\n", "| policy_gradient_loss | -0.0525 |\n", "| value_loss | 0.383 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 2.98 |\n", "| ep_rew_mean | -3.28 |\n", "| time/ | |\n", "| fps | 374 |\n", "| iterations | 4 |\n", "| time_elapsed | 21 |\n", "| total_timesteps | 8192 |\n", "| train/ | |\n", "| approx_kl | 0.018652592 |\n", "| clip_fraction | 0.368 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -3.11 |\n", "| explained_variance | 0.428 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.181 |\n", "| n_updates | 30 |\n", "| policy_gradient_loss | -0.0572 |\n", "| value_loss | 0.51 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 3.1 |\n", "| ep_rew_mean | -3.24 |\n", "| time/ | |\n", "| fps | 369 |\n", "| iterations | 5 |\n", "| time_elapsed | 27 |\n", "| total_timesteps | 10240 |\n", "| train/ | |\n", "| approx_kl | 0.023806999 |\n", "| clip_fraction | 0.365 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -3.04 |\n", "| explained_variance | 0.46 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.118 |\n", "| n_updates | 40 |\n", "| policy_gradient_loss | -0.0609 |\n", "| value_loss | 0.499 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 3.15 |\n", "| ep_rew_mean | -3.09 |\n", "| time/ | |\n", "| fps | 366 |\n", "| iterations | 6 |\n", "| time_elapsed | 33 |\n", "| total_timesteps | 12288 |\n", "| train/ | |\n", "| approx_kl | 0.024716537 |\n", "| clip_fraction | 0.372 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -2.94 |\n", "| explained_variance | 0.495 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.266 |\n", "| n_updates | 50 |\n", "| policy_gradient_loss | -0.0578 |\n", "| value_loss | 0.503 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 3.46 |\n", "| ep_rew_mean | -2.8 |\n", "| time/ | |\n", "| fps | 365 |\n", "| iterations | 7 |\n", "| time_elapsed | 39 |\n", "| total_timesteps | 14336 |\n", "| train/ | |\n", "| approx_kl | 0.023435738 |\n", "| clip_fraction | 0.357 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -2.82 |\n", "| explained_variance | 0.556 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.105 |\n", "| n_updates | 60 |\n", "| policy_gradient_loss | -0.0537 |\n", "| value_loss | 0.491 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 3.54 |\n", "| ep_rew_mean | -2.74 |\n", "| time/ | |\n", "| fps | 363 |\n", "| iterations | 8 |\n", "| time_elapsed | 45 |\n", "| total_timesteps | 16384 |\n", "| train/ | |\n", "| approx_kl | 0.02574392 |\n", "| clip_fraction | 0.29 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -2.71 |\n", "| explained_variance | 0.608 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.125 |\n", "| n_updates | 70 |\n", "| policy_gradient_loss | -0.0445 |\n", "| value_loss | 0.464 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 3.71 |\n", "| ep_rew_mean | -2.63 |\n", "| time/ | |\n", "| fps | 362 |\n", "| iterations | 9 |\n", "| time_elapsed | 50 |\n", "| total_timesteps | 18432 |\n", "| train/ | |\n", "| approx_kl | 0.021754535 |\n", "| clip_fraction | 0.251 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -2.56 |\n", "| explained_variance | 0.673 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.152 |\n", "| n_updates | 80 |\n", "| policy_gradient_loss | -0.0385 |\n", "| value_loss | 0.4 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 3.8 |\n", "| ep_rew_mean | -2.5 |\n", "| time/ | |\n", "| fps | 362 |\n", "| iterations | 10 |\n", "| time_elapsed | 56 |\n", "| total_timesteps | 20480 |\n", "| train/ | |\n", "| approx_kl | 0.018548178 |\n", "| clip_fraction | 0.239 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -2.46 |\n", "| explained_variance | 0.702 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.218 |\n", "| n_updates | 90 |\n", "| policy_gradient_loss | -0.0361 |\n", "| value_loss | 0.396 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 3.8 |\n", "| ep_rew_mean | -2.34 |\n", "| time/ | |\n", "| fps | 362 |\n", "| iterations | 11 |\n", "| time_elapsed | 62 |\n", "| total_timesteps | 22528 |\n", "| train/ | |\n", "| approx_kl | 0.016667131 |\n", "| clip_fraction | 0.24 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -2.36 |\n", "| explained_variance | 0.698 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.226 |\n", "| n_updates | 100 |\n", "| policy_gradient_loss | -0.037 |\n", "| value_loss | 0.411 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 3.95 |\n", "| ep_rew_mean | -2.31 |\n", "| time/ | |\n", "| fps | 361 |\n", "| iterations | 12 |\n", "| time_elapsed | 67 |\n", "| total_timesteps | 24576 |\n", "| train/ | |\n", "| approx_kl | 0.020023255 |\n", "| clip_fraction | 0.257 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -2.24 |\n", "| explained_variance | 0.712 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0958 |\n", "| n_updates | 110 |\n", "| policy_gradient_loss | -0.0381 |\n", "| value_loss | 0.406 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 4.19 |\n", "| ep_rew_mean | -2.03 |\n", "| time/ | |\n", "| fps | 360 |\n", "| iterations | 13 |\n", "| time_elapsed | 73 |\n", "| total_timesteps | 26624 |\n", "| train/ | |\n", "| approx_kl | 0.019943349 |\n", "| clip_fraction | 0.266 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -2.13 |\n", "| explained_variance | 0.712 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0974 |\n", "| n_updates | 120 |\n", "| policy_gradient_loss | -0.0387 |\n", "| value_loss | 0.444 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 4.24 |\n", "| ep_rew_mean | -1.96 |\n", "| time/ | |\n", "| fps | 360 |\n", "| iterations | 14 |\n", "| time_elapsed | 79 |\n", "| total_timesteps | 28672 |\n", "| train/ | |\n", "| approx_kl | 0.022638176 |\n", "| clip_fraction | 0.298 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.98 |\n", "| explained_variance | 0.697 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.145 |\n", "| n_updates | 130 |\n", "| policy_gradient_loss | -0.0433 |\n", "| value_loss | 0.486 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 4.45 |\n", "| ep_rew_mean | -1.89 |\n", "| time/ | |\n", "| fps | 361 |\n", "| iterations | 15 |\n", "| time_elapsed | 84 |\n", "| total_timesteps | 30720 |\n", "| train/ | |\n", "| approx_kl | 0.02262218 |\n", "| clip_fraction | 0.354 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.77 |\n", "| explained_variance | 0.727 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0807 |\n", "| n_updates | 140 |\n", "| policy_gradient_loss | -0.0439 |\n", "| value_loss | 0.454 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 4.91 |\n", "| ep_rew_mean | -1.65 |\n", "| time/ | |\n", "| fps | 362 |\n", "| iterations | 16 |\n", "| time_elapsed | 90 |\n", "| total_timesteps | 32768 |\n", "| train/ | |\n", "| approx_kl | 0.023807548 |\n", "| clip_fraction | 0.251 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.62 |\n", "| explained_variance | 0.654 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.171 |\n", "| n_updates | 150 |\n", "| policy_gradient_loss | -0.0355 |\n", "| value_loss | 0.686 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 4.86 |\n", "| ep_rew_mean | -1.42 |\n", "| time/ | |\n", "| fps | 361 |\n", "| iterations | 17 |\n", "| time_elapsed | 96 |\n", "| total_timesteps | 34816 |\n", "| train/ | |\n", "| approx_kl | 0.018085614 |\n", "| clip_fraction | 0.198 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.49 |\n", "| explained_variance | 0.699 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.339 |\n", "| n_updates | 160 |\n", "| policy_gradient_loss | -0.0297 |\n", "| value_loss | 0.674 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 5.4 |\n", "| ep_rew_mean | -1.88 |\n", "| time/ | |\n", "| fps | 361 |\n", "| iterations | 18 |\n", "| time_elapsed | 102 |\n", "| total_timesteps | 36864 |\n", "| train/ | |\n", "| approx_kl | 0.015559142 |\n", "| clip_fraction | 0.222 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.35 |\n", "| explained_variance | 0.719 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.354 |\n", "| n_updates | 170 |\n", "| policy_gradient_loss | -0.0349 |\n", "| value_loss | 0.629 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 5.54 |\n", "| ep_rew_mean | -1.22 |\n", "| time/ | |\n", "| fps | 360 |\n", "| iterations | 19 |\n", "| time_elapsed | 108 |\n", "| total_timesteps | 38912 |\n", "| train/ | |\n", "| approx_kl | 0.014995611 |\n", "| clip_fraction | 0.17 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.32 |\n", "| explained_variance | 0.436 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.33 |\n", "| n_updates | 180 |\n", "| policy_gradient_loss | -0.031 |\n", "| value_loss | 2.79 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 5.28 |\n", "| ep_rew_mean | -1.2 |\n", "| time/ | |\n", "| fps | 360 |\n", "| iterations | 20 |\n", "| time_elapsed | 113 |\n", "| total_timesteps | 40960 |\n", "| train/ | |\n", "| approx_kl | 0.018023107 |\n", "| clip_fraction | 0.169 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.23 |\n", "| explained_variance | 0.559 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.743 |\n", "| n_updates | 190 |\n", "| policy_gradient_loss | -0.0327 |\n", "| value_loss | 1.24 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 6.22 |\n", "| ep_rew_mean | -0.38 |\n", "| time/ | |\n", "| fps | 361 |\n", "| iterations | 21 |\n", "| time_elapsed | 119 |\n", "| total_timesteps | 43008 |\n", "| train/ | |\n", "| approx_kl | 0.023376558 |\n", "| clip_fraction | 0.29 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.07 |\n", "| explained_variance | 0.628 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.502 |\n", "| n_updates | 200 |\n", "| policy_gradient_loss | -0.0451 |\n", "| value_loss | 1.11 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 7.06 |\n", "| ep_rew_mean | 0.26 |\n", "| time/ | |\n", "| fps | 362 |\n", "| iterations | 22 |\n", "| time_elapsed | 124 |\n", "| total_timesteps | 45056 |\n", "| train/ | |\n", "| approx_kl | 0.023838695 |\n", "| clip_fraction | 0.269 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.01 |\n", "| explained_variance | 0.566 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.925 |\n", "| n_updates | 210 |\n", "| policy_gradient_loss | -0.0463 |\n", "| value_loss | 1.71 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 7.93 |\n", "| ep_rew_mean | 1.19 |\n", "| time/ | |\n", "| fps | 362 |\n", "| iterations | 23 |\n", "| time_elapsed | 129 |\n", "| total_timesteps | 47104 |\n", "| train/ | |\n", "| approx_kl | 0.021363221 |\n", "| clip_fraction | 0.229 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.906 |\n", "| explained_variance | 0.594 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.819 |\n", "| n_updates | 220 |\n", "| policy_gradient_loss | -0.0426 |\n", "| value_loss | 1.94 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 8.52 |\n", "| ep_rew_mean | 1.98 |\n", "| time/ | |\n", "| fps | 363 |\n", "| iterations | 24 |\n", "| time_elapsed | 135 |\n", "| total_timesteps | 49152 |\n", "| train/ | |\n", "| approx_kl | 0.022241611 |\n", "| clip_fraction | 0.167 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.882 |\n", "| explained_variance | 0.667 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.838 |\n", "| n_updates | 230 |\n", "| policy_gradient_loss | -0.0343 |\n", "| value_loss | 1.73 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 8.9 |\n", "| ep_rew_mean | 1.8 |\n", "| time/ | |\n", "| fps | 364 |\n", "| iterations | 25 |\n", "| time_elapsed | 140 |\n", "| total_timesteps | 51200 |\n", "| train/ | |\n", "| approx_kl | 0.011297604 |\n", "| clip_fraction | 0.111 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.859 |\n", "| explained_variance | 0.763 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.5 |\n", "| n_updates | 240 |\n", "| policy_gradient_loss | -0.024 |\n", "| value_loss | 1.35 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9.01 |\n", "| ep_rew_mean | 2.23 |\n", "| time/ | |\n", "| fps | 364 |\n", "| iterations | 26 |\n", "| time_elapsed | 145 |\n", "| total_timesteps | 53248 |\n", "| train/ | |\n", "| approx_kl | 0.010706454 |\n", "| clip_fraction | 0.0958 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.84 |\n", "| explained_variance | 0.486 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.872 |\n", "| n_updates | 250 |\n", "| policy_gradient_loss | -0.0237 |\n", "| value_loss | 2.77 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9.75 |\n", "| ep_rew_mean | 1.81 |\n", "| time/ | |\n", "| fps | 365 |\n", "| iterations | 27 |\n", "| time_elapsed | 151 |\n", "| total_timesteps | 55296 |\n", "| train/ | |\n", "| approx_kl | 0.011905432 |\n", "| clip_fraction | 0.12 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.785 |\n", "| explained_variance | 0.838 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.121 |\n", "| n_updates | 260 |\n", "| policy_gradient_loss | -0.0206 |\n", "| value_loss | 0.851 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9.07 |\n", "| ep_rew_mean | 2.13 |\n", "| time/ | |\n", "| fps | 365 |\n", "| iterations | 28 |\n", "| time_elapsed | 156 |\n", "| total_timesteps | 57344 |\n", "| train/ | |\n", "| approx_kl | 0.009603689 |\n", "| clip_fraction | 0.0931 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.808 |\n", "| explained_variance | 0.684 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.391 |\n", "| n_updates | 270 |\n", "| policy_gradient_loss | -0.0184 |\n", "| value_loss | 1.63 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 8.87 |\n", "| ep_rew_mean | 2.51 |\n", "| time/ | |\n", "| fps | 366 |\n", "| iterations | 29 |\n", "| time_elapsed | 162 |\n", "| total_timesteps | 59392 |\n", "| train/ | |\n", "| approx_kl | 0.0071417904 |\n", "| clip_fraction | 0.0738 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.736 |\n", "| explained_variance | 0.826 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.83 |\n", "| n_updates | 280 |\n", "| policy_gradient_loss | -0.0131 |\n", "| value_loss | 1.72 |\n", "------------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9.02 |\n", "| ep_rew_mean | 2.74 |\n", "| time/ | |\n", "| fps | 366 |\n", "| iterations | 30 |\n", "| time_elapsed | 167 |\n", "| total_timesteps | 61440 |\n", "| train/ | |\n", "| approx_kl | 0.009269893 |\n", "| clip_fraction | 0.0918 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.686 |\n", "| explained_variance | 0.912 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.163 |\n", "| n_updates | 290 |\n", "| policy_gradient_loss | -0.0166 |\n", "| value_loss | 0.537 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9.51 |\n", "| ep_rew_mean | 2.29 |\n", "| time/ | |\n", "| fps | 367 |\n", "| iterations | 31 |\n", "| time_elapsed | 172 |\n", "| total_timesteps | 63488 |\n", "| train/ | |\n", "| approx_kl | 0.010982089 |\n", "| clip_fraction | 0.0863 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.657 |\n", "| explained_variance | 0.946 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.339 |\n", "| n_updates | 300 |\n", "| policy_gradient_loss | -0.0149 |\n", "| value_loss | 0.341 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 8.94 |\n", "| ep_rew_mean | 2.7 |\n", "| time/ | |\n", "| fps | 368 |\n", "| iterations | 32 |\n", "| time_elapsed | 177 |\n", "| total_timesteps | 65536 |\n", "| train/ | |\n", "| approx_kl | 0.01880536 |\n", "| clip_fraction | 0.0622 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.667 |\n", "| explained_variance | 0.778 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.13 |\n", "| n_updates | 310 |\n", "| policy_gradient_loss | -0.0107 |\n", "| value_loss | 2.3 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 8.82 |\n", "| ep_rew_mean | 2.6 |\n", "| time/ | |\n", "| fps | 369 |\n", "| iterations | 33 |\n", "| time_elapsed | 182 |\n", "| total_timesteps | 67584 |\n", "| train/ | |\n", "| approx_kl | 0.013803964 |\n", "| clip_fraction | 0.1 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.644 |\n", "| explained_variance | 0.952 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0438 |\n", "| n_updates | 320 |\n", "| policy_gradient_loss | -0.0186 |\n", "| value_loss | 0.31 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9 |\n", "| ep_rew_mean | 2.9 |\n", "| time/ | |\n", "| fps | 370 |\n", "| iterations | 34 |\n", "| time_elapsed | 188 |\n", "| total_timesteps | 69632 |\n", "| train/ | |\n", "| approx_kl | 0.011061303 |\n", "| clip_fraction | 0.0942 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.597 |\n", "| explained_variance | 0.905 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.293 |\n", "| n_updates | 330 |\n", "| policy_gradient_loss | -0.0158 |\n", "| value_loss | 0.61 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9.96 |\n", "| ep_rew_mean | 2.6 |\n", "| time/ | |\n", "| fps | 370 |\n", "| iterations | 35 |\n", "| time_elapsed | 193 |\n", "| total_timesteps | 71680 |\n", "| train/ | |\n", "| approx_kl | 0.016911192 |\n", "| clip_fraction | 0.165 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.551 |\n", "| explained_variance | 0.926 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.229 |\n", "| n_updates | 340 |\n", "| policy_gradient_loss | -0.0229 |\n", "| value_loss | 0.519 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9.5 |\n", "| ep_rew_mean | 3.32 |\n", "| time/ | |\n", "| fps | 370 |\n", "| iterations | 36 |\n", "| time_elapsed | 198 |\n", "| total_timesteps | 73728 |\n", "| train/ | |\n", "| approx_kl | 0.012311206 |\n", "| clip_fraction | 0.119 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.595 |\n", "| explained_variance | 0.709 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.68 |\n", "| n_updates | 350 |\n", "| policy_gradient_loss | -0.0275 |\n", "| value_loss | 1.95 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9.6 |\n", "| ep_rew_mean | 3.3 |\n", "| time/ | |\n", "| fps | 370 |\n", "| iterations | 37 |\n", "| time_elapsed | 204 |\n", "| total_timesteps | 75776 |\n", "| train/ | |\n", "| approx_kl | 0.059752032 |\n", "| clip_fraction | 0.154 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.534 |\n", "| explained_variance | 0.785 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.835 |\n", "| n_updates | 360 |\n", "| policy_gradient_loss | -0.0234 |\n", "| value_loss | 1.46 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 10.8 |\n", "| ep_rew_mean | 2.7 |\n", "| time/ | |\n", "| fps | 370 |\n", "| iterations | 38 |\n", "| time_elapsed | 209 |\n", "| total_timesteps | 77824 |\n", "| train/ | |\n", "| approx_kl | 0.01475055 |\n", "| clip_fraction | 0.0968 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.455 |\n", "| explained_variance | 0.917 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.153 |\n", "| n_updates | 370 |\n", "| policy_gradient_loss | -0.019 |\n", "| value_loss | 0.428 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 10.2 |\n", "| ep_rew_mean | 3.01 |\n", "| time/ | |\n", "| fps | 370 |\n", "| iterations | 39 |\n", "| time_elapsed | 215 |\n", "| total_timesteps | 79872 |\n", "| train/ | |\n", "| approx_kl | 0.012021113 |\n", "| clip_fraction | 0.0851 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.558 |\n", "| explained_variance | 0.702 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.8 |\n", "| n_updates | 380 |\n", "| policy_gradient_loss | -0.0284 |\n", "| value_loss | 4.45 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9.83 |\n", "| ep_rew_mean | 3.57 |\n", "| time/ | |\n", "| fps | 370 |\n", "| iterations | 40 |\n", "| time_elapsed | 220 |\n", "| total_timesteps | 81920 |\n", "| train/ | |\n", "| approx_kl | 0.010166377 |\n", "| clip_fraction | 0.0623 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.454 |\n", "| explained_variance | 0.728 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.43 |\n", "| n_updates | 390 |\n", "| policy_gradient_loss | -0.0201 |\n", "| value_loss | 2.21 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 10.3 |\n", "| ep_rew_mean | 3.14 |\n", "| time/ | |\n", "| fps | 371 |\n", "| iterations | 41 |\n", "| time_elapsed | 226 |\n", "| total_timesteps | 83968 |\n", "| train/ | |\n", "| approx_kl | 0.017603599 |\n", "| clip_fraction | 0.0748 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.384 |\n", "| explained_variance | 0.957 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.152 |\n", "| n_updates | 400 |\n", "| policy_gradient_loss | -0.0122 |\n", "| value_loss | 0.286 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9.95 |\n", "| ep_rew_mean | 3.81 |\n", "| time/ | |\n", "| fps | 371 |\n", "| iterations | 42 |\n", "| time_elapsed | 231 |\n", "| total_timesteps | 86016 |\n", "| train/ | |\n", "| approx_kl | 0.028185518 |\n", "| clip_fraction | 0.0954 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.383 |\n", "| explained_variance | 0.811 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.412 |\n", "| n_updates | 410 |\n", "| policy_gradient_loss | -0.0224 |\n", "| value_loss | 2.05 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 10.5 |\n", "| ep_rew_mean | 3.57 |\n", "| time/ | |\n", "| fps | 371 |\n", "| iterations | 43 |\n", "| time_elapsed | 236 |\n", "| total_timesteps | 88064 |\n", "| train/ | |\n", "| approx_kl | 0.02107103 |\n", "| clip_fraction | 0.0702 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.375 |\n", "| explained_variance | 0.732 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.156 |\n", "| n_updates | 420 |\n", "| policy_gradient_loss | -0.0342 |\n", "| value_loss | 1.54 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9.76 |\n", "| ep_rew_mean | 3.7 |\n", "| time/ | |\n", "| fps | 371 |\n", "| iterations | 44 |\n", "| time_elapsed | 242 |\n", "| total_timesteps | 90112 |\n", "| train/ | |\n", "| approx_kl | 0.007913441 |\n", "| clip_fraction | 0.0349 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.346 |\n", "| explained_variance | 0.827 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.92 |\n", "| n_updates | 430 |\n", "| policy_gradient_loss | -0.00355 |\n", "| value_loss | 2.2 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 25.8 |\n", "| ep_rew_mean | -12.4 |\n", "| time/ | |\n", "| fps | 371 |\n", "| iterations | 45 |\n", "| time_elapsed | 247 |\n", "| total_timesteps | 92160 |\n", "| train/ | |\n", "| approx_kl | 0.5432366 |\n", "| clip_fraction | 0.109 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.316 |\n", "| explained_variance | 0.958 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.231 |\n", "| n_updates | 440 |\n", "| policy_gradient_loss | -0.0068 |\n", "| value_loss | 0.313 |\n", "---------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 45.9 |\n", "| ep_rew_mean | -32.4 |\n", "| time/ | |\n", "| fps | 372 |\n", "| iterations | 46 |\n", "| time_elapsed | 252 |\n", "| total_timesteps | 94208 |\n", "| train/ | |\n", "| approx_kl | 0.008981178 |\n", "| clip_fraction | 0.0144 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.11 |\n", "| explained_variance | -0.0603 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.91 |\n", "| n_updates | 450 |\n", "| policy_gradient_loss | -0.00156 |\n", "| value_loss | 17.2 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 62.7 |\n", "| ep_rew_mean | -49.3 |\n", "| time/ | |\n", "| fps | 373 |\n", "| iterations | 47 |\n", "| time_elapsed | 257 |\n", "| total_timesteps | 96256 |\n", "| train/ | |\n", "| approx_kl | 0.0025778997 |\n", "| clip_fraction | 0.0184 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.165 |\n", "| explained_variance | 0.396 |\n", "| learning_rate | 0.0003 |\n", "| loss | 19.1 |\n", "| n_updates | 460 |\n", "| policy_gradient_loss | -0.00601 |\n", "| value_loss | 45.6 |\n", "------------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 49.3 |\n", "| ep_rew_mean | -35.6 |\n", "| time/ | |\n", "| fps | 373 |\n", "| iterations | 48 |\n", "| time_elapsed | 262 |\n", "| total_timesteps | 98304 |\n", "| train/ | |\n", "| approx_kl | 0.014914533 |\n", "| clip_fraction | 0.0816 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.272 |\n", "| explained_variance | 0.394 |\n", "| learning_rate | 0.0003 |\n", "| loss | 33.5 |\n", "| n_updates | 470 |\n", "| policy_gradient_loss | -0.0164 |\n", "| value_loss | 74.1 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 39.9 |\n", "| ep_rew_mean | -26.3 |\n", "| time/ | |\n", "| fps | 374 |\n", "| iterations | 49 |\n", "| time_elapsed | 268 |\n", "| total_timesteps | 100352 |\n", "| train/ | |\n", "| approx_kl | 0.0012997694 |\n", "| clip_fraction | 0.026 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.24 |\n", "| explained_variance | 0.465 |\n", "| learning_rate | 0.0003 |\n", "| loss | 44.5 |\n", "| n_updates | 480 |\n", "| policy_gradient_loss | -0.00881 |\n", "| value_loss | 76.6 |\n", "------------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 27 |\n", "| ep_rew_mean | -13.3 |\n", "| time/ | |\n", "| fps | 374 |\n", "| iterations | 50 |\n", "| time_elapsed | 273 |\n", "| total_timesteps | 102400 |\n", "| train/ | |\n", "| approx_kl | 0.0014951692 |\n", "| clip_fraction | 0.037 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.25 |\n", "| explained_variance | 0.594 |\n", "| learning_rate | 0.0003 |\n", "| loss | 30.2 |\n", "| n_updates | 490 |\n", "| policy_gradient_loss | -0.00958 |\n", "| value_loss | 77.4 |\n", "------------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 17.4 |\n", "| ep_rew_mean | -3.57 |\n", "| time/ | |\n", "| fps | 374 |\n", "| iterations | 51 |\n", "| time_elapsed | 279 |\n", "| total_timesteps | 104448 |\n", "| train/ | |\n", "| approx_kl | 0.0443189 |\n", "| clip_fraction | 0.176 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.457 |\n", "| explained_variance | 0.338 |\n", "| learning_rate | 0.0003 |\n", "| loss | 44.4 |\n", "| n_updates | 500 |\n", "| policy_gradient_loss | -0.0247 |\n", "| value_loss | 70.8 |\n", "---------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 13.3 |\n", "| ep_rew_mean | 0.45 |\n", "| time/ | |\n", "| fps | 374 |\n", "| iterations | 52 |\n", "| time_elapsed | 284 |\n", "| total_timesteps | 106496 |\n", "| train/ | |\n", "| approx_kl | 0.005663138 |\n", "| clip_fraction | 0.069 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.463 |\n", "| explained_variance | 0.558 |\n", "| learning_rate | 0.0003 |\n", "| loss | 14.7 |\n", "| n_updates | 510 |\n", "| policy_gradient_loss | -0.0117 |\n", "| value_loss | 36.8 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 19.8 |\n", "| ep_rew_mean | -5.92 |\n", "| time/ | |\n", "| fps | 374 |\n", "| iterations | 53 |\n", "| time_elapsed | 289 |\n", "| total_timesteps | 108544 |\n", "| train/ | |\n", "| approx_kl | 0.020181399 |\n", "| clip_fraction | 0.176 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.499 |\n", "| explained_variance | 0.182 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.45 |\n", "| n_updates | 520 |\n", "| policy_gradient_loss | -0.0254 |\n", "| value_loss | 9.2 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 11.4 |\n", "| ep_rew_mean | 2.26 |\n", "| time/ | |\n", "| fps | 374 |\n", "| iterations | 54 |\n", "| time_elapsed | 295 |\n", "| total_timesteps | 110592 |\n", "| train/ | |\n", "| approx_kl | 0.009738399 |\n", "| clip_fraction | 0.108 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.538 |\n", "| explained_variance | 0.891 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.88 |\n", "| n_updates | 530 |\n", "| policy_gradient_loss | -0.00573 |\n", "| value_loss | 22 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 10.4 |\n", "| ep_rew_mean | 3.06 |\n", "| time/ | |\n", "| fps | 375 |\n", "| iterations | 55 |\n", "| time_elapsed | 300 |\n", "| total_timesteps | 112640 |\n", "| train/ | |\n", "| approx_kl | 0.01536967 |\n", "| clip_fraction | 0.191 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.49 |\n", "| explained_variance | 0.315 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.971 |\n", "| n_updates | 540 |\n", "| policy_gradient_loss | -0.0219 |\n", "| value_loss | 6.46 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 10.8 |\n", "| ep_rew_mean | 1.63 |\n", "| time/ | |\n", "| fps | 375 |\n", "| iterations | 56 |\n", "| time_elapsed | 305 |\n", "| total_timesteps | 114688 |\n", "| train/ | |\n", "| approx_kl | 0.0688808 |\n", "| clip_fraction | 0.264 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.496 |\n", "| explained_variance | 0.838 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.205 |\n", "| n_updates | 550 |\n", "| policy_gradient_loss | -0.0406 |\n", "| value_loss | 0.836 |\n", "---------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 10.3 |\n", "| ep_rew_mean | 2.98 |\n", "| time/ | |\n", "| fps | 375 |\n", "| iterations | 57 |\n", "| time_elapsed | 311 |\n", "| total_timesteps | 116736 |\n", "| train/ | |\n", "| approx_kl | 0.04210388 |\n", "| clip_fraction | 0.326 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.534 |\n", "| explained_variance | 0.734 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.907 |\n", "| n_updates | 560 |\n", "| policy_gradient_loss | -0.0421 |\n", "| value_loss | 1.91 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 10.3 |\n", "| ep_rew_mean | 3.28 |\n", "| time/ | |\n", "| fps | 375 |\n", "| iterations | 58 |\n", "| time_elapsed | 316 |\n", "| total_timesteps | 118784 |\n", "| train/ | |\n", "| approx_kl | 0.051502623 |\n", "| clip_fraction | 0.224 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.434 |\n", "| explained_variance | 0.875 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.404 |\n", "| n_updates | 570 |\n", "| policy_gradient_loss | -0.0345 |\n", "| value_loss | 0.827 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 10.2 |\n", "| ep_rew_mean | 3.78 |\n", "| time/ | |\n", "| fps | 375 |\n", "| iterations | 59 |\n", "| time_elapsed | 322 |\n", "| total_timesteps | 120832 |\n", "| train/ | |\n", "| approx_kl | 0.07014565 |\n", "| clip_fraction | 0.165 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.391 |\n", "| explained_variance | 0.903 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.145 |\n", "| n_updates | 580 |\n", "| policy_gradient_loss | -0.0254 |\n", "| value_loss | 0.681 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 16.4 |\n", "| ep_rew_mean | -3.11 |\n", "| time/ | |\n", "| fps | 375 |\n", "| iterations | 60 |\n", "| time_elapsed | 327 |\n", "| total_timesteps | 122880 |\n", "| train/ | |\n", "| approx_kl | 0.118292876 |\n", "| clip_fraction | 0.151 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.466 |\n", "| explained_variance | 0.952 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0778 |\n", "| n_updates | 590 |\n", "| policy_gradient_loss | -0.0214 |\n", "| value_loss | 0.316 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 12.5 |\n", "| ep_rew_mean | 1.05 |\n", "| time/ | |\n", "| fps | 375 |\n", "| iterations | 61 |\n", "| time_elapsed | 333 |\n", "| total_timesteps | 124928 |\n", "| train/ | |\n", "| approx_kl | 0.061776154 |\n", "| clip_fraction | 0.321 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.665 |\n", "| explained_variance | 0.401 |\n", "| learning_rate | 0.0003 |\n", "| loss | 9.22 |\n", "| n_updates | 600 |\n", "| policy_gradient_loss | -0.0454 |\n", "| value_loss | 17 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 10.8 |\n", "| ep_rew_mean | 2.8 |\n", "| time/ | |\n", "| fps | 374 |\n", "| iterations | 62 |\n", "| time_elapsed | 338 |\n", "| total_timesteps | 126976 |\n", "| train/ | |\n", "| approx_kl | 0.01477613 |\n", "| clip_fraction | 0.153 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.579 |\n", "| explained_variance | 0.682 |\n", "| learning_rate | 0.0003 |\n", "| loss | 6.12 |\n", "| n_updates | 610 |\n", "| policy_gradient_loss | -0.027 |\n", "| value_loss | 10.5 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 11.4 |\n", "| ep_rew_mean | 2.68 |\n", "| time/ | |\n", "| fps | 374 |\n", "| iterations | 63 |\n", "| time_elapsed | 344 |\n", "| total_timesteps | 129024 |\n", "| train/ | |\n", "| approx_kl | 0.015770137 |\n", "| clip_fraction | 0.128 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.507 |\n", "| explained_variance | 0.869 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.13 |\n", "| n_updates | 620 |\n", "| policy_gradient_loss | -0.0253 |\n", "| value_loss | 5.99 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 10.2 |\n", "| ep_rew_mean | 3.48 |\n", "| time/ | |\n", "| fps | 375 |\n", "| iterations | 64 |\n", "| time_elapsed | 349 |\n", "| total_timesteps | 131072 |\n", "| train/ | |\n", "| approx_kl | 0.007838536 |\n", "| clip_fraction | 0.0755 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.446 |\n", "| explained_variance | 0.749 |\n", "| learning_rate | 0.0003 |\n", "| loss | 11.9 |\n", "| n_updates | 630 |\n", "| policy_gradient_loss | -0.0106 |\n", "| value_loss | 9.21 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 10 |\n", "| ep_rew_mean | 3.8 |\n", "| time/ | |\n", "| fps | 375 |\n", "| iterations | 65 |\n", "| time_elapsed | 354 |\n", "| total_timesteps | 133120 |\n", "| train/ | |\n", "| approx_kl | 0.035350725 |\n", "| clip_fraction | 0.101 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.368 |\n", "| explained_variance | 0.867 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.388 |\n", "| n_updates | 640 |\n", "| policy_gradient_loss | -0.0213 |\n", "| value_loss | 1.11 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 14.6 |\n", "| ep_rew_mean | -0.59 |\n", "| time/ | |\n", "| fps | 375 |\n", "| iterations | 66 |\n", "| time_elapsed | 360 |\n", "| total_timesteps | 135168 |\n", "| train/ | |\n", "| approx_kl | 0.042963736 |\n", "| clip_fraction | 0.0817 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.321 |\n", "| explained_variance | 0.941 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00919 |\n", "| n_updates | 650 |\n", "| policy_gradient_loss | -0.0127 |\n", "| value_loss | 0.325 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 10.3 |\n", "| ep_rew_mean | 3.64 |\n", "| time/ | |\n", "| fps | 374 |\n", "| iterations | 67 |\n", "| time_elapsed | 366 |\n", "| total_timesteps | 137216 |\n", "| train/ | |\n", "| approx_kl | 0.08396668 |\n", "| clip_fraction | 0.184 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.409 |\n", "| explained_variance | 0.803 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.09 |\n", "| n_updates | 660 |\n", "| policy_gradient_loss | -0.0203 |\n", "| value_loss | 8.99 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 19.8 |\n", "| ep_rew_mean | -9.15 |\n", "| time/ | |\n", "| fps | 374 |\n", "| iterations | 68 |\n", "| time_elapsed | 371 |\n", "| total_timesteps | 139264 |\n", "| train/ | |\n", "| approx_kl | 0.15326424 |\n", "| clip_fraction | 0.199 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.39 |\n", "| explained_variance | 0.894 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.122 |\n", "| n_updates | 670 |\n", "| policy_gradient_loss | -0.0201 |\n", "| value_loss | 0.415 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9.38 |\n", "| ep_rew_mean | 3.18 |\n", "| time/ | |\n", "| fps | 374 |\n", "| iterations | 69 |\n", "| time_elapsed | 376 |\n", "| total_timesteps | 141312 |\n", "| train/ | |\n", "| approx_kl | 0.21504487 |\n", "| clip_fraction | 0.411 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.483 |\n", "| explained_variance | 0.408 |\n", "| learning_rate | 0.0003 |\n", "| loss | 6.24 |\n", "| n_updates | 680 |\n", "| policy_gradient_loss | -0.0591 |\n", "| value_loss | 18.4 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9.98 |\n", "| ep_rew_mean | 3.8 |\n", "| time/ | |\n", "| fps | 375 |\n", "| iterations | 70 |\n", "| time_elapsed | 382 |\n", "| total_timesteps | 143360 |\n", "| train/ | |\n", "| approx_kl | 0.050312966 |\n", "| clip_fraction | 0.0584 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.341 |\n", "| explained_variance | 0.707 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0602 |\n", "| n_updates | 690 |\n", "| policy_gradient_loss | -0.00661 |\n", "| value_loss | 0.845 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 11.7 |\n", "| ep_rew_mean | 2.07 |\n", "| time/ | |\n", "| fps | 375 |\n", "| iterations | 71 |\n", "| time_elapsed | 387 |\n", "| total_timesteps | 145408 |\n", "| train/ | |\n", "| approx_kl | 0.018565401 |\n", "| clip_fraction | 0.0643 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.322 |\n", "| explained_variance | 0.977 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.348 |\n", "| n_updates | 700 |\n", "| policy_gradient_loss | -0.0112 |\n", "| value_loss | 0.175 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 10.2 |\n", "| ep_rew_mean | 3.47 |\n", "| time/ | |\n", "| fps | 375 |\n", "| iterations | 72 |\n", "| time_elapsed | 392 |\n", "| total_timesteps | 147456 |\n", "| train/ | |\n", "| approx_kl | 0.10186449 |\n", "| clip_fraction | 0.19 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.395 |\n", "| explained_variance | 0.47 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.66 |\n", "| n_updates | 710 |\n", "| policy_gradient_loss | -0.0419 |\n", "| value_loss | 4.71 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 10.2 |\n", "| ep_rew_mean | 3.72 |\n", "| time/ | |\n", "| fps | 375 |\n", "| iterations | 73 |\n", "| time_elapsed | 398 |\n", "| total_timesteps | 149504 |\n", "| train/ | |\n", "| approx_kl | 0.01903234 |\n", "| clip_fraction | 0.0737 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.333 |\n", "| explained_variance | 0.723 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.71 |\n", "| n_updates | 720 |\n", "| policy_gradient_loss | -0.022 |\n", "| value_loss | 2.97 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9.94 |\n", "| ep_rew_mean | 3.92 |\n", "| time/ | |\n", "| fps | 375 |\n", "| iterations | 74 |\n", "| time_elapsed | 403 |\n", "| total_timesteps | 151552 |\n", "| train/ | |\n", "| approx_kl | 0.026403807 |\n", "| clip_fraction | 0.065 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.322 |\n", "| explained_variance | 0.934 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.24 |\n", "| n_updates | 730 |\n", "| policy_gradient_loss | -0.00481 |\n", "| value_loss | 0.481 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 10.2 |\n", "| ep_rew_mean | 3.81 |\n", "| time/ | |\n", "| fps | 376 |\n", "| iterations | 75 |\n", "| time_elapsed | 408 |\n", "| total_timesteps | 153600 |\n", "| train/ | |\n", "| approx_kl | 0.011051587 |\n", "| clip_fraction | 0.0593 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.324 |\n", "| explained_variance | 0.984 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00908 |\n", "| n_updates | 740 |\n", "| policy_gradient_loss | -0.00591 |\n", "| value_loss | 0.113 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 10.2 |\n", "| ep_rew_mean | 3.63 |\n", "| time/ | |\n", "| fps | 376 |\n", "| iterations | 76 |\n", "| time_elapsed | 413 |\n", "| total_timesteps | 155648 |\n", "| train/ | |\n", "| approx_kl | 0.00972967 |\n", "| clip_fraction | 0.0544 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.339 |\n", "| explained_variance | 0.922 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0226 |\n", "| n_updates | 750 |\n", "| policy_gradient_loss | -0.00438 |\n", "| value_loss | 0.758 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9.92 |\n", "| ep_rew_mean | 3.86 |\n", "| time/ | |\n", "| fps | 376 |\n", "| iterations | 77 |\n", "| time_elapsed | 418 |\n", "| total_timesteps | 157696 |\n", "| train/ | |\n", "| approx_kl | 0.013084366 |\n", "| clip_fraction | 0.0481 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.348 |\n", "| explained_variance | 0.812 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0736 |\n", "| n_updates | 760 |\n", "| policy_gradient_loss | -0.0157 |\n", "| value_loss | 1.47 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 5.05 |\n", "| ep_rew_mean | -2.29 |\n", "| time/ | |\n", "| fps | 376 |\n", "| iterations | 78 |\n", "| time_elapsed | 424 |\n", "| total_timesteps | 159744 |\n", "| train/ | |\n", "| approx_kl | 0.42402273 |\n", "| clip_fraction | 0.215 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.49 |\n", "| explained_variance | 0.983 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00926 |\n", "| n_updates | 770 |\n", "| policy_gradient_loss | -0.0284 |\n", "| value_loss | 0.109 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 4.53 |\n", "| ep_rew_mean | -2.17 |\n", "| time/ | |\n", "| fps | 376 |\n", "| iterations | 79 |\n", "| time_elapsed | 429 |\n", "| total_timesteps | 161792 |\n", "| train/ | |\n", "| approx_kl | 0.27915305 |\n", "| clip_fraction | 0.138 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.376 |\n", "| explained_variance | 0.503 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.373 |\n", "| n_updates | 780 |\n", "| policy_gradient_loss | -0.00444 |\n", "| value_loss | 5.03 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 4.38 |\n", "| ep_rew_mean | -1.74 |\n", "| time/ | |\n", "| fps | 376 |\n", "| iterations | 80 |\n", "| time_elapsed | 435 |\n", "| total_timesteps | 163840 |\n", "| train/ | |\n", "| approx_kl | 0.019562341 |\n", "| clip_fraction | 0.0921 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.457 |\n", "| explained_variance | 0.562 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.638 |\n", "| n_updates | 790 |\n", "| policy_gradient_loss | -0.0128 |\n", "| value_loss | 0.895 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 4.34 |\n", "| ep_rew_mean | -1.7 |\n", "| time/ | |\n", "| fps | 376 |\n", "| iterations | 81 |\n", "| time_elapsed | 440 |\n", "| total_timesteps | 165888 |\n", "| train/ | |\n", "| approx_kl | 0.18844175 |\n", "| clip_fraction | 0.147 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.279 |\n", "| explained_variance | 0.102 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.03 |\n", "| n_updates | 800 |\n", "| policy_gradient_loss | 0.013 |\n", "| value_loss | 11.4 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 5.54 |\n", "| ep_rew_mean | -0.66 |\n", "| time/ | |\n", "| fps | 376 |\n", "| iterations | 82 |\n", "| time_elapsed | 445 |\n", "| total_timesteps | 167936 |\n", "| train/ | |\n", "| approx_kl | 0.06374202 |\n", "| clip_fraction | 0.21 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.398 |\n", "| explained_variance | 0.566 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.599 |\n", "| n_updates | 810 |\n", "| policy_gradient_loss | -0.0207 |\n", "| value_loss | 1.59 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 7.65 |\n", "| ep_rew_mean | 1.61 |\n", "| time/ | |\n", "| fps | 376 |\n", "| iterations | 83 |\n", "| time_elapsed | 451 |\n", "| total_timesteps | 169984 |\n", "| train/ | |\n", "| approx_kl | 0.06125373 |\n", "| clip_fraction | 0.251 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.434 |\n", "| explained_variance | 0.299 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.27 |\n", "| n_updates | 820 |\n", "| policy_gradient_loss | -0.0399 |\n", "| value_loss | 3.7 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 8.86 |\n", "| ep_rew_mean | 2.46 |\n", "| time/ | |\n", "| fps | 376 |\n", "| iterations | 84 |\n", "| time_elapsed | 457 |\n", "| total_timesteps | 172032 |\n", "| train/ | |\n", "| approx_kl | 0.047695376 |\n", "| clip_fraction | 0.171 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.387 |\n", "| explained_variance | 0.336 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.72 |\n", "| n_updates | 830 |\n", "| policy_gradient_loss | -0.0407 |\n", "| value_loss | 3.91 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9.84 |\n", "| ep_rew_mean | 3.66 |\n", "| time/ | |\n", "| fps | 376 |\n", "| iterations | 85 |\n", "| time_elapsed | 462 |\n", "| total_timesteps | 174080 |\n", "| train/ | |\n", "| approx_kl | 0.13684572 |\n", "| clip_fraction | 0.133 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.346 |\n", "| explained_variance | 0.625 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.13 |\n", "| n_updates | 840 |\n", "| policy_gradient_loss | -0.0231 |\n", "| value_loss | 2.36 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 25.5 |\n", "| ep_rew_mean | -12.2 |\n", "| time/ | |\n", "| fps | 376 |\n", "| iterations | 86 |\n", "| time_elapsed | 467 |\n", "| total_timesteps | 176128 |\n", "| train/ | |\n", "| approx_kl | 0.36170986 |\n", "| clip_fraction | 0.332 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.473 |\n", "| explained_variance | 0.919 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.364 |\n", "| n_updates | 850 |\n", "| policy_gradient_loss | -0.0374 |\n", "| value_loss | 0.278 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 17.2 |\n", "| ep_rew_mean | -4.45 |\n", "| time/ | |\n", "| fps | 377 |\n", "| iterations | 87 |\n", "| time_elapsed | 472 |\n", "| total_timesteps | 178176 |\n", "| train/ | |\n", "| approx_kl | 0.102079734 |\n", "| clip_fraction | 0.34 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.613 |\n", "| explained_variance | 0.0593 |\n", "| learning_rate | 0.0003 |\n", "| loss | 5.95 |\n", "| n_updates | 860 |\n", "| policy_gradient_loss | -0.04 |\n", "| value_loss | 20.6 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 14.5 |\n", "| ep_rew_mean | -2.36 |\n", "| time/ | |\n", "| fps | 378 |\n", "| iterations | 88 |\n", "| time_elapsed | 476 |\n", "| total_timesteps | 180224 |\n", "| train/ | |\n", "| approx_kl | 0.042367905 |\n", "| clip_fraction | 0.264 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.799 |\n", "| explained_variance | 0.69 |\n", "| learning_rate | 0.0003 |\n", "| loss | 4.22 |\n", "| n_updates | 870 |\n", "| policy_gradient_loss | -0.0265 |\n", "| value_loss | 16.7 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 12.3 |\n", "| ep_rew_mean | 0.46 |\n", "| time/ | |\n", "| fps | 379 |\n", "| iterations | 89 |\n", "| time_elapsed | 480 |\n", "| total_timesteps | 182272 |\n", "| train/ | |\n", "| approx_kl | 0.02418825 |\n", "| clip_fraction | 0.262 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.744 |\n", "| explained_variance | 0.75 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.08 |\n", "| n_updates | 880 |\n", "| policy_gradient_loss | -0.026 |\n", "| value_loss | 16.2 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 11 |\n", "| ep_rew_mean | 2.19 |\n", "| time/ | |\n", "| fps | 380 |\n", "| iterations | 90 |\n", "| time_elapsed | 484 |\n", "| total_timesteps | 184320 |\n", "| train/ | |\n", "| approx_kl | 0.024347484 |\n", "| clip_fraction | 0.271 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.669 |\n", "| explained_variance | 0.675 |\n", "| learning_rate | 0.0003 |\n", "| loss | 4.76 |\n", "| n_updates | 890 |\n", "| policy_gradient_loss | -0.0243 |\n", "| value_loss | 10.1 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 12.1 |\n", "| ep_rew_mean | 1.35 |\n", "| time/ | |\n", "| fps | 381 |\n", "| iterations | 91 |\n", "| time_elapsed | 488 |\n", "| total_timesteps | 186368 |\n", "| train/ | |\n", "| approx_kl | 0.02668532 |\n", "| clip_fraction | 0.256 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.576 |\n", "| explained_variance | 0.706 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.797 |\n", "| n_updates | 900 |\n", "| policy_gradient_loss | -0.0378 |\n", "| value_loss | 1.6 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 11 |\n", "| ep_rew_mean | 2.55 |\n", "| time/ | |\n", "| fps | 382 |\n", "| iterations | 92 |\n", "| time_elapsed | 492 |\n", "| total_timesteps | 188416 |\n", "| train/ | |\n", "| approx_kl | 0.017341316 |\n", "| clip_fraction | 0.0919 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.681 |\n", "| explained_variance | 0.865 |\n", "| learning_rate | 0.0003 |\n", "| loss | 9.01 |\n", "| n_updates | 910 |\n", "| policy_gradient_loss | -0.0076 |\n", "| value_loss | 25.5 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 10.2 |\n", "| ep_rew_mean | 2.27 |\n", "| time/ | |\n", "| fps | 383 |\n", "| iterations | 93 |\n", "| time_elapsed | 496 |\n", "| total_timesteps | 190464 |\n", "| train/ | |\n", "| approx_kl | 0.070169866 |\n", "| clip_fraction | 0.224 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.476 |\n", "| explained_variance | 0.791 |\n", "| learning_rate | 0.0003 |\n", "| loss | 13 |\n", "| n_updates | 920 |\n", "| policy_gradient_loss | -0.0203 |\n", "| value_loss | 5.41 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9.33 |\n", "| ep_rew_mean | 3.17 |\n", "| time/ | |\n", "| fps | 384 |\n", "| iterations | 94 |\n", "| time_elapsed | 500 |\n", "| total_timesteps | 192512 |\n", "| train/ | |\n", "| approx_kl | 0.062242664 |\n", "| clip_fraction | 0.184 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.504 |\n", "| explained_variance | 0.426 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.94 |\n", "| n_updates | 930 |\n", "| policy_gradient_loss | -0.0392 |\n", "| value_loss | 6.22 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9.85 |\n", "| ep_rew_mean | 3.79 |\n", "| time/ | |\n", "| fps | 385 |\n", "| iterations | 95 |\n", "| time_elapsed | 504 |\n", "| total_timesteps | 194560 |\n", "| train/ | |\n", "| approx_kl | 0.042221397 |\n", "| clip_fraction | 0.0712 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.328 |\n", "| explained_variance | 0.893 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.236 |\n", "| n_updates | 940 |\n", "| policy_gradient_loss | -0.0162 |\n", "| value_loss | 0.692 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 8.17 |\n", "| ep_rew_mean | 2.09 |\n", "| time/ | |\n", "| fps | 387 |\n", "| iterations | 96 |\n", "| time_elapsed | 507 |\n", "| total_timesteps | 196608 |\n", "| train/ | |\n", "| approx_kl | 0.036962293 |\n", "| clip_fraction | 0.154 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.374 |\n", "| explained_variance | 0.981 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0447 |\n", "| n_updates | 950 |\n", "| policy_gradient_loss | -0.0142 |\n", "| value_loss | 0.151 |\n", "-----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 9.38 |\n", "| ep_rew_mean | 3.36 |\n", "| time/ | |\n", "| fps | 387 |\n", "| iterations | 97 |\n", "| time_elapsed | 512 |\n", "| total_timesteps | 198656 |\n", "| train/ | |\n", "| approx_kl | 0.06385146 |\n", "| clip_fraction | 0.245 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.366 |\n", "| explained_variance | 0.553 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.4 |\n", "| n_updates | 960 |\n", "| policy_gradient_loss | -0.0399 |\n", "| value_loss | 2.79 |\n", "----------------------------------------\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 24.8 |\n", "| ep_rew_mean | -11.4 |\n", "| time/ | |\n", "| fps | 389 |\n", "| iterations | 98 |\n", "| time_elapsed | 515 |\n", "| total_timesteps | 200704 |\n", "| train/ | |\n", "| approx_kl | 0.26313344 |\n", "| clip_fraction | 0.178 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.363 |\n", "| explained_variance | 0.898 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.247 |\n", "| n_updates | 970 |\n", "| policy_gradient_loss | -0.0285 |\n", "| value_loss | 0.521 |\n", "----------------------------------------\n" ] }, { "data": { "text/html": [ "
\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "
\n",
       "
\n" ], "text/plain": [ "\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "528f9bb0103a402d83eb31e85dd97caf", "version_major": 2, "version_minor": 0 }, "text/plain": [ "VBox(children=(Label(value='0.438 MB of 0.438 MB uploaded\\r'), FloatProgress(value=1.0, max=1.0)))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "wandb: WARNING Source type is set to 'repo' but some required information is missing from the environment. A job will not be created from this run. See https://docs.wandb.ai/guides/launch/create-job\n" ] }, { "data": { "text/html": [ "\n", "

Run history:


global_step▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
rollout/ep_len_mean▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▄█▅▄▂▂▃▂▂▄▂▂▂▁▁▂▂▃▂▂▂▄
rollout/ep_rew_mean▇▇▇▇▇▇▇▇▇█████████▅▁▅▆██▇██▆███▇▇██▇███▅
time/fps█▂▂▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃
train/approx_kl▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▂▂▁▂▃▂▁▁█▁▂▃▂▁▂▂▅
train/clip_fraction█▇▇▅▅▆▅▄▆▄▂▂▂▂▃▂▂▂▁▂▄▄▅▅▆▃▂▄▂▂▂▄▂▅▃▅▅▅▂▄
train/clip_range▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/entropy_loss▁▁▂▃▃▄▅▅▆▆▆▇▇▇▇▇▇▇██▇▇▇▇▇▇█▇▇▇█▇▇▇▇▆▇▇█▇
train/explained_variance▃▄▅▆▆▆▆▄▆▆▅▇█▇▆██▆▁▄▄▃▇▇▄▇█▇▆▆██▅▃▆▆▆▇▇▇
train/learning_rate▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/loss▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▆█▂▁▁▂▁▁▁▁▁▁▁▁▁▁▂▂▃▁▁
train/policy_gradient_loss▁▂▂▄▄▄▄▅▃▄▆▇▇▆▅▆▇▄█▆▆▅▄▄▃▅▇▆▇▆█▅▇▄▆▅▆▆▆▅
train/value_loss▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃██▂▁▁▃▂▁▁▁▁▁▁▁▁▁▃▂▂▁▁

Run summary:


global_step200704
rollout/ep_len_mean24.77
rollout/ep_rew_mean-11.45
time/fps389.0
train/approx_kl0.26313
train/clip_fraction0.17793
train/clip_range0.2
train/entropy_loss-0.36315
train/explained_variance0.89819
train/learning_rate0.0003
train/loss0.24744
train/policy_gradient_loss-0.02851
train/value_loss0.52073

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run confused-meadow-3 at: https://wandb.ai/fulltime/wordle/runs/ot2i0b8h
Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 2 other file(s)" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Find logs at: ./wandb/run-20240319_162920-ot2i0b8h/logs" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "model = PPO(config[\"policy_type\"], env=env, verbose=0, tensorboard_log=f\"runs/{run.id}\")\n", "\n", "# Train for a certain number of timesteps\n", "model.learn(\n", " total_timesteps=config[\"total_timesteps\"],\n", " callback=WandbCallback(\n", " model_save_path=f\"models/{run.id}\",\n", " verbose=2,\n", " ),\n", "\tprogress_bar=True\n", ")\n", "\n", "run.finish()\n", "\n", "# Save the model\n", "model.save(\"wordle_ppo_model\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "model.save(\"wordle_ppo_model\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "model = PPO.load(\"wordle_ppo_model\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 1000/1000 [00:20<00:00, 49.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-6.703\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "rewards = 0\n", "for i in tqdm(range(1000)):\n", " obs, _ = env.reset()\n", " done = False\n", " while not done:\n", " action, _ = model.predict(obs)\n", " obs, reward, done, _, info = env.step(action)\n", " rewards += reward\n", "print(rewards / 1000)" ] } ], "metadata": { "kernelspec": { "display_name": "env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 2 }