This repository has been archived on 2023-12-08. You can view files and clone it, but cannot push or open issues or pull requests.
CSE-158-Assignment-2/preliminary.ipynb

252 lines
124 KiB
Plaintext
Raw Permalink Normal View History

2023-11-28 23:58:51 +00:00
{
"cells": [
{
"cell_type": "code",
2023-12-04 18:22:45 +00:00
"execution_count": 2,
2023-11-29 23:45:43 +00:00
"id": "9808cacf",
2023-11-28 23:58:51 +00:00
"metadata": {},
"outputs": [],
"source": [
2023-11-29 23:45:43 +00:00
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from datetime import datetime\n",
"import re\n",
"import gzip"
2023-11-28 23:58:51 +00:00
]
},
{
"cell_type": "code",
2023-12-04 18:22:45 +00:00
"execution_count": 3,
2023-11-29 23:45:43 +00:00
"id": "494d6c25",
2023-11-28 23:58:51 +00:00
"metadata": {},
"outputs": [],
"source": [
2023-11-29 23:45:43 +00:00
"def parseData(fname):\n",
" for l in gzip.open(fname):\n",
" yield eval(l)\n",
"\n",
"data = list(parseData(\"australian_user_reviews.json.gz\"))\n",
"\n",
"dm = [[0,0],[0,0]]\n",
"\n",
"users = set()\n",
"games = set()\n",
"\n",
"nodate = 0\n",
"\n",
"reviews = []\n",
"\n",
"for user in data:\n",
" if user[\"user_id\"] in users:\n",
" #print(f\"ducplicate user skipped: {user['user_id']}\")\n",
" pass\n",
" else:\n",
" users.add(user[\"user_id\"])\n",
" for review in user[\"reviews\"]:\n",
" games.add(review[\"item_id\"])\n",
" funny = review[\"funny\"]\n",
" hasfunny = int(funny != \"\")\n",
" if funny == \"\":\n",
" review[\"funny\"] = 0\n",
" else:\n",
" review[\"funny\"] = int(re.findall(\"\\d+\", funny)[0])\n",
" \n",
" helpful = review[\"helpful\"]\n",
" hashelpful = int(helpful != \"No ratings yet\")\n",
" if helpful == \"No ratings yet\":\n",
" review[\"helpful_n\"] = 0\n",
" review[\"helpful_total\"] = 0\n",
" review[\"helpful\"] = 0\n",
" else:\n",
" nums = re.findall(\"\\d+\", helpful.replace(\",\", \"\"))\n",
" helpfulness = float(nums[0]) / float(nums[1])\n",
" review[\"helpful\"] = float(nums[0]) / float(nums[1])\n",
" review[\"helpful_n\"] = int(nums[0])\n",
" review[\"helpful_total\"] = int(nums[1])\n",
" \n",
" dm[hasfunny][hashelpful] += 1\n",
"\n",
" try:\n",
" post_datetime = datetime.strptime(review[\"posted\"],'Posted %B %d, %Y.')\n",
" review[\"posted\"] = post_datetime\n",
" except:\n",
" nodate += 1\n",
"\n",
" review[\"user_id\"] = user[\"user_id\"]\n",
" review[\"user_url\"] = user[\"user_url\"]\n",
" reviews.append(review)"
2023-11-28 23:58:51 +00:00
]
},
{
"cell_type": "code",
2023-11-30 18:16:47 +00:00
"execution_count": 3,
2023-11-29 23:45:43 +00:00
"id": "52f12059",
2023-11-28 23:58:51 +00:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2023-11-29 23:45:43 +00:00
"\t\tno helpful\thas hepful\n",
"no funny\t28791\t\t21629\n",
"has funny\t940\t\t7070\n",
"num unique users: 25485\n",
"num unique games: 3682\n",
"num reviews: 58430\n",
"num reviews with bad date format: 9932\n"
2023-11-28 23:58:51 +00:00
]
}
],
"source": [
2023-11-29 23:45:43 +00:00
"print(f\"\\t\\tno helpful\\thas hepful\\nno funny\\t{dm[0][0]}\\t\\t{dm[0][1]}\\nhas funny\\t{dm[1][0]}\\t\\t{dm[1][1]}\")\n",
"print(f\"num unique users: {len(users)}\")\n",
"print(f\"num unique games: {len(games)}\")\n",
"print(f\"num reviews: {len(reviews)}\")\n",
"print(f\"num reviews with bad date format: {nodate}\")"
2023-11-28 23:58:51 +00:00
]
},
{
"cell_type": "code",
2023-11-30 18:16:47 +00:00
"execution_count": 4,
2023-11-29 23:45:43 +00:00
"id": "fadaaebb",
2023-11-28 23:58:51 +00:00
"metadata": {},
"outputs": [
{
"data": {
2023-11-30 18:16:47 +00:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABNcklEQVR4nO3deXQUdfb38U8nIQtLwp4QgQ7CACIwIBmYoA6ocdhEUZRFdkEMioq4TDPKqkDDuCMSdRzCKIwooCijKAKKSMTIpqIiogkMGLZIwg5J6vnDX/eTztqddKe39+ucOoeuqq66VQnU5X6XMhmGYQgAACCIhXg7AAAAAG8jIQIAAEGPhAgAAAQ9EiIAABD0SIgAAEDQIyECAABBj4QIAAAEPRIiAAAQ9EiIAABA0CMhAoAqmjFjhkwmk7fD8GmffPKJTCaTPvnkE2+HApSKhAhwUVpamkwmkyIjI3Xw4MES23v27Kn27dt7ITLXjB49WiaTqdRl7dq13g6vSmw/I9sSFhamSy65RKNHjy71Z+aMM2fOaMaMGTzQiyjvd6joMnr0aG+HClQozNsBAP7q/PnzslqtWrBggbdDqbSIiAj985//LLH+j3/8oxeicb9Zs2apRYsWOnfunL744gulpaVp8+bN+vbbbxUZGenSsc6cOaOZM2dK+j3pLeqxxx6TxWJxV9h+46677lJycrL98y+//KJp06Zp/Pjxuvrqq+3rW7ZsqW7duuns2bMKDw/3RqhAhUiIgErq1KmTXnnlFU2ZMkXx8fHeDqdSwsLCNHz4cG+H4TF9+vRRYmKiJGncuHFq2LCh5s2bp3fffVeDBg1y23nCwsIUFhZ8/5wmJSUpKSnJ/vmrr77StGnTlJSUVOrvlatJKFCdaDIDKunvf/+7CgoKZLVay90vMzNTJpNJaWlpJbaZTCbNmDHD/tnWF+XHH3/U8OHDFRMTo0aNGmnq1KkyDEMHDhzQTTfdpOjoaMXFxempp55y81X9f2X1+SjtekaPHq3atWvr4MGDGjBggGrXrq1GjRrpoYceUkFBQYnvPvnkk3r55ZfVsmVLRURE6E9/+pMyMjLs+y1evFgmk0k7duwoEdecOXMUGhpaqaYvW9Vi37599nUXLlzQtGnT1KVLF8XExKhWrVq6+uqrtXHjRoe4GzVqJEmaOXOmvSnI9rMrrQ+RyWTSxIkT9c4776h9+/aKiIjQ5ZdfXmpz5CeffKLExERFRkaqZcuWeumll0o95rp163TVVVepbt26ql27ttq0aaO///3v5V5z+/btdc0115RYX1hYqEsuuUS33nqrfd0bb7yhLl26qE6dOoqOjlaHDh303HPPlXt8Z5X2+2RrXv7666/Vo0cP1axZU61atdKKFSskSZ9++qm6deumqKgotWnTRh9//HGJ4x48eFB33HGHYmNj7ff4X//6l1tiRnAhIQIqqUWLFho5cqReeeUVHTp0yK3HHjx4sAoLC2W1WtWtWzc98cQTevbZZ3X99dfrkksu0bx589SqVSs99NBD2rRpU5XOdezYMYclNze3UscpKChQr1691KBBAz355JPq0aOHnnrqKb388ssl9l22bJn+8Y9/6K677tITTzyhzMxM3XLLLbp48aIk6dZbb1VUVJSWLl1a4rtLly5Vz549dckll7gcY2ZmpiSpXr169nV5eXn65z//qZ49e2revHmaMWOGjh49ql69emnnzp2SpEaNGmnRokWSpJtvvlmvvfaaXnvtNd1yyy3lnm/z5s26++67NWTIEM2fP1/nzp3TwIEDdfz4cfs+O3bsUO/evXX8+HHNnDlTY8eO1axZs/TOO+84HGv37t264YYbdP78ec2aNUtPPfWUbrzxRn3++eflxjB48GBt2rRJ2dnZJWI7dOiQhgwZIun3ZGvo0KGqV6+e5s2bJ6vVqp49e1Z4/Kr67bffdMMNN6hbt26aP3++IiIiNGTIEC1fvlxDhgxR3759ZbVadfr0ad166606efKk/buHDx/Wn//8Z3388ceaOHGinnvuObVq1Upjx47Vs88+69G4EYAMAC5ZvHixIcnIyMgw9u3bZ4SFhRn33XeffXuPHj2Myy+/3P75l19+MSQZixcvLnEsScb06dPtn6dPn25IMsaPH29fl5+fbzRt2tQwmUyG1Wq1r//tt9+MqKgoY9SoUZW6jlGjRhmSSiw9evQwDMMwNm7caEgyNm7c6PC90q7HdqxZs2Y57Nu5c2ejS5cuJb7boEEDIycnx75+9erVhiTjvffes68bOnSoER8fbxQUFNjXbd++vcx7WZTtZ/Txxx8bR48eNQ4cOGCsWLHCaNSokREREWEcOHDAvm9+fr5x/vx5h+//9ttvRmxsrHHHHXfY1x09erTEz8vG9nMrSpIRHh5u/PTTT/Z1u3btMiQZCxYssK/r37+/UbNmTePgwYP2dXv37jXCwsIcjvnMM88YkoyjR4+We+3F7dmzp8Q5DcMw7r77bqN27drGmTNnDMMwjPvvv9+Ijo428vPzXTp+URkZGWX+fEr7ferRo4chyVi2bJl93Q8//GBIMkJCQowvvvjCvv7DDz8sceyxY8caTZo0MY4dO+ZwriFDhhgxMTH2awOcQYUIqIJLL71UI0aM0Msvv6xff/3VbccdN26c/c+hoaFKTEyUYRgaO3asfX3dunXVpk0b/fzzz5U+T2RkpNatW+ewVKUZLiUlxeHz1VdfXWp8gwcPdqjS2Jqyiu47cuRIHTp0yKHpaunSpYqKitLAgQOdiic5OVmNGjVSs2bNdOutt6pWrVp699131bRpU/s+oaGh9o6+hYWFysnJUX5+vhITE7V9+3anzlPe+Vu2bGn/3LFjR0VHR9uvs6CgQB9//LEGDBjg0A+tVatW6tOnj8Ox6tatK0lavXq1CgsLnY6hdevW6tSpk5YvX25fV1BQoBUrVqh///6KioqyH//06dNat26dy9dZFbVr17ZXqSSpTZs2qlu3ri677DJ169bNvt72Z9u9MwxDK1euVP/+/WUYhkOVs1evXsrNza3yzw/BhYQIqKLHHntM+fn5FfYlckXz5s0dPsfExCgyMlINGzYssf63336r9HlCQ0OVnJzssHTp0qVSx4qMjLT3s7GpV69eqfEVvz5bclR03+uvv15NmjSxN5sVFhbqP//5j2666SbVqVPHqZgWLlyodevWacWKFerbt6+OHTumiIiIEvstWbJEHTt2VGRkpBo0aKBGjRrpv//9b6WbD22KX6fkeE+OHDmis2fPqlWrViX2K75u8ODBuvLKKzVu3DjFxsZqyJAhevPNN51KjgYPHqzPP//c3u/qk08+0ZEjRzR48GD7Pnfffbdat26tPn36qGnTprrjjjuqZfqFpk2blugrFRMTo2bNmpVYJ/3/35GjR4/qxIkTevnll9WoUSOHZcyYMZJ+v7+As0iIgCq69NJLNXz48DKrRGVN2Fe0s3FxoaGhTq2Tfv+fsie4GndZ8bmyb9FrCQ0N1e23366VK1fq3Llz2rhxow4dOuTSqLiuXbsqOTlZAwcO1Lvvvqv27dvr9ttv16lTp+z7vP766xo9erRatmypV199VWvXrtW6det07bXXulSJKY07f2ZRUVHatGmTPv74Y40YMUJff/21Bg8erOuvv77c3yXp94TIMAy99dZbkqQ333xTMTEx6t27t32fxo0ba+fOnXr33Xd14403auPGjerTp49GjRrlcqyuKOseVXTvbD+b4cOHl6hy2pYrr7zSM0EjIJEQAW5gqxLNmzevxDZb9ePEiRMO67OysqojtErzhbhHjhypvLw8vffee1q6dKkaNWqkXr16VepYoaGhmjt3rg4dOqQXXnjBvn7FihW69NJLtWrVKo0YMUK9evVScnKyzp075/B9T8xE3bhxY0VGRuqnn34qsa20dSEhIbruuuv09NNP67vvvtPs2bO1YcMGh2bF0rRo0UJdu3bV8uXLlZ+fr1WrVmnAgAElqmXh4eHq37+/XnzxRe3bt0933XWX/v3vf5cai7c1atRIderUUUFBQYkqp21p3Lixt8OEHyEhAtygZcuWGj58uF566aU
2023-11-28 23:58:51 +00:00
"text/plain": [
2023-11-29 23:45:43 +00:00
"<Figure size 640x480 with 1 Axes>"
2023-11-28 23:58:51 +00:00
]
},
"metadata": {},
2023-11-29 23:45:43 +00:00
"output_type": "display_data"
2023-11-28 23:58:51 +00:00
}
],
"source": [
2023-11-29 23:45:43 +00:00
"%matplotlib inline\n",
2023-11-28 23:58:51 +00:00
"\n",
"X = []\n",
2023-11-29 23:45:43 +00:00
"Y = []\n",
"for review in reviews:\n",
" if type(review[\"posted\"]) == datetime:\n",
" X.append(review[\"posted\"].timestamp())\n",
" Y.append(review[\"funny\"])\n",
2023-11-28 23:58:51 +00:00
"\n",
"X = np.array(X)\n",
2023-11-29 23:45:43 +00:00
"Y = np.array(Y)\n",
"\n",
"X = X - np.max(X)\n",
"X = X / (60*60*24*365)\n",
2023-11-28 23:58:51 +00:00
"\n",
2023-11-29 23:45:43 +00:00
"plt.scatter(X, Y, s=1, color=\"k\")\n",
"plt.xlabel(\"Time (Years)\")\n",
"plt.ylabel(\"Num. Funny Ratings\")\n",
"plt.title(\"Num. Funny Ratings vs Time\")\n",
"plt.show()\n"
2023-11-28 23:58:51 +00:00
]
},
{
"cell_type": "code",
2023-11-30 18:16:47 +00:00
"execution_count": 5,
2023-11-29 23:45:43 +00:00
"id": "eee4566a",
2023-11-28 23:58:51 +00:00
"metadata": {},
"outputs": [
{
2023-11-29 23:45:43 +00:00
"data": {
2023-11-30 18:16:47 +00:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHHCAYAAABDUnkqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAACbe0lEQVR4nO2deXxU1dn4n3snMwk3YclEBEFmArgiKMgk1qAgakXrRl1GEytIXciruLbqWFSoFomt1Spqom0F6U+LK6K1at14FUFEEbRYFZEE3BBBQkQBSZ7fH7zn9sydc9e52yTP9/O5n2TuPctznnOT88xznnOOhIgIBEEQBEEQnQQ5aAEIgiAIgiDchIwbgiAIgiA6FWTcEARBEATRqSDjhiAIgiCITgUZNwRBEARBdCrIuCEIgiAIolNBxg1BEARBEJ0KMm4IgiAIguhUkHFDEARBEESngowbgiB8pbKyEs477zzH+f/whz/AoEGDIBKJwPDhw23lnTNnDkiSBM3NzY7r94LzzjsPKisrgxYj1IS174hwQsYNERrYPy92lZSUwH777QdTpkyBDRs2BC1e3nzwwQcwffr0gvjnfN5550FZWZnuc0mSYMqUKT5KtJt//etfcM0118CoUaNg9uzZcMstt3hSz/Tp07PexWg0CpWVlXDZZZfBli1bHJX5xRdfwPTp02HFihWuylrIHHXUUVl61rumT58etKhEgVEUtAAEoeWmm26CgQMHwvbt22HRokXQ2NgI//znP+Hf//43KIoStHiO+eCDD+C3v/0tHHXUUfQt3SGvvPIKyLIMf/3rXyEWi3leX2NjI5SVlcG2bdvg5ZdfhlmzZsHy5cth0aJFtsv64osv4Le//S1UVlbmeJz+/Oc/Q0dHh0tSFw5Tp06FCy64QP28bNkyuOuuu+A3v/kNHHjgger9gw8+GA466CA4++yzobi4OAhRiQKDjBsidJxwwgmQSqUAAOCCCy6AiooKuP3222HBggVQW1ubV9nff/99QRtIXZ2vv/4aunXr5othAwBwxhlnwB577AEAAJMnT4azzz4bHnnkEXjrrbegurratXqi0ahrZRUSP/3pT7M+l5SUwF133QU//elP4aijjspJH4lEfJKMKHRoWooIPUcffTQAAKxdu1a99//+3/+DkSNHQrdu3SAej8PZZ58N69evz8p31FFHwdChQ+Gdd96B0aNHg6Io8Jvf/AYAALZv3w7Tp0+H/fbbD0pKSmCvvfaC0047DdasWaPm7+jogD/96U9w0EEHQUlJCfTp0wcmT54M3377bVY9lZWVcNJJJ8GiRYuguroaSkpKYNCgQTB37lw1zZw5c+DMM88EAICxY8eq7vaFCxcCAMCCBQvgxBNPhH79+kFxcTEMHjwYbr75Zmhvb8/Rxz333AODBg2Cbt26QXV1Nbz++utw1FFH5QwGO3bsgGnTpsE+++wDxcXFMGDAALjmmmtgx44dNnvAGk7rY9ORr732GkyePBkqKiqgR48eMGHChCxdS5IEs2fPhm3btqn6mzNnDjQ3N6u/a3F7SuPII48EAMh6TzZv3gy//vWvYdiwYVBWVgY9evSAE044AVauXKmmWbhwIVRVVQEAwKRJk7LkB8iNuWFtuu222+D++++HwYMHQ3FxMVRVVcGyZcty5HrsscdgyJAhUFJSAkOHDoX58+cL43jmzZsHI0eOhO7du0OPHj1g2LBhcOedd+q298cff4R4PA6TJk3KebZ161YoKSmBX//61+q9WbNmwUEHHQSKokB5eTmkUil4+OGH9RVqA1HMDfvbW7hwIaRSKejWrRsMGzZM/bt68sknYdiwYVBSUgIjR46Ed999N6fcDz/8EM444wyIx+NQUlICqVQKnn76aVdkJoKDPDdE6GEDSUVFBQAAzJgxA2644QZIp9NwwQUXwMaNG2HWrFkwevRoePfdd6FXr15q3k2bNsEJJ5wAZ599NvziF7+APn36QHt7O5x00knw8ssvw9lnnw2XX345tLW1wYsvvgj//ve/YfDgwQCw+5v6nDlzYNKkSXDZZZfB2rVr4e6774Z3330X3njjjaxv25988gmcccYZcP7558PEiRPhgQcegPPOOw9GjhwJBx10EIwePRouu+yyHJc7+zlnzhwoKyuDq666CsrKyuCVV16BG2+8EbZu3Qp/+MMf1HoaGxthypQpcOSRR8KVV14Jzc3NMH78eCgvL4e9995bTdfR0QGnnHIKLFq0CC666CI48MAD4f3334c77rgDPv74Y3jqqacs6f6bb76xlM6N+qZMmQK9evWC6dOnw0cffQSNjY3Q0tICCxcuBEmS4G9/+xvcf//98NZbb8Ff/vIXAACoqamxJJ9bsIG1vLxcvffpp5/CU089BWeeeSYMHDgQNmzYAPfddx+MGTMGPvjgA+jXrx8ceOCBcNNNN8GNN94IF110kWokmcn/8MMPQ1tbG0yePBkkSYLf//73cNppp8Gnn36qvn/PPvssnHXWWTBs2DCYOXMmfPvtt3D++edD//79s8p68cUXoba2Fo455hi49dZbAQDgP//5D7zxxhtw+eWXC+uPRqPw85//HJ588km47777sjxmTz31FOzYsQPOPvtsANg9tXbZZZfBGWecAZdffjls374d3nvvPVi6dCnU1dXZ0LI9PvnkE6irq4PJkyfDL37xC7jtttvg5JNPhqamJvjNb34DF198MQAAzJw5E9LpNHz00Ucgy7u/169atQpGjRoF/fv3h0wmA6WlpfDoo4/C+PHj4YknnoCf//znnslNeAwSREiYPXs2AgC+9NJLuHHjRly/fj3OmzcPKyoqsFu3bvjZZ59hc3MzRiIRnDFjRlbe999/H4uKirLujxkzBgEAm5qastI+8MADCAB4++2358jQ0dGBiIivv/46AgA+9NBDWc+ff/75nPvJZBIBAF977TX13tdff43FxcX4q1/9Sr332GOPIQDgq6++mlPv999/n3Nv8uTJqCgKbt++HRERd+zYgRUVFVhVVYU//vijmm7OnDkIADhmzBj13t/+9jeUZRlff/31rDKbmpoQAPCNN97IqY9n4sSJCACG1yWXXOKovmQyiRMnTlQ/s34fOXIk7ty5U73/+9//HgEAFyxYkCVXaWlpVh1r165FAMDZs2fntAMAcNq0aTl1rV271rD906ZNQwDAjz76CDdu3IjNzc34wAMPYLdu3bB37964bds2Ne327duxvb09R6bi4mK86aab1HvLli3TlXPixImYTCZz2lRRUYGbN29W7y9YsAABAJ955hn13rBhw3DvvffGtrY29d7ChQsRALLKvPzyy7FHjx64a9cuw7ZreeGFF3LqRET82c9+hoMGDVI/n3rqqXjQQQfZKluL0d+IqO/Y397ixYtz5O3WrRu2tLSo9++7776cso855hgcNmyY+jeGuPt/QE1NDe677755tYUIFpqWIkLHscceC71794YBAwbA2WefDWVlZTB//nzo378/PPnkk9DR0QHpdBq++eYb9erbty/su+++8Oqrr2aVVVxcnONSf+KJJ2CPPfaASy+9NKduSZIAYLebv2fPnvDTn/40q56RI0dCWVlZTj1DhgxRv40DAPTu3Rv2339/+PTTTy21uVu3burvbW1t8M0338CRRx4J33//PXz44YcAAPD222/Dpk2b4MILL4Siov86Xc8555wsTwKT/8ADD4QDDjggS342xaeVX0RJSQm8+OKLwkuLG/VddNFFWd6w//mf/4GioiL45z//aZrXK/bff3/o3bs3VFZWwi9/+UvYZ5994LnnnsuK2youLlY9Ae3t7bBp0yYoKyuD/fffH5YvX55X/WeddVZW37J3jL1XX3zxBbz//vswYcKErNVtY8aMgWHDhmWV1atXL9i2bZuw/4w4+uijYY899oBHHnlEvfftt9/Ciy++CGeddVZW+Z999plw2sxLhgwZAocffrj6+bDDDgOA3XInEomc+0x3mzdvhldeeQXS6bT6N/fNN9/Apk2bYNy4cbB69Wr4/PPPfWwJ4SY0LUWEjnvuuQf2228/KCoqgj59+sD++++vDh6rV68
2023-11-29 23:45:43 +00:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
2023-11-28 23:58:51 +00:00
}
],
"source": [
2023-11-29 23:45:43 +00:00
"%matplotlib inline\n",
2023-11-28 23:58:51 +00:00
"\n",
2023-11-29 23:45:43 +00:00
"X = []\n",
"Y = []\n",
"for review in reviews:\n",
" if type(review[\"posted\"]) == datetime:\n",
" X.append(review[\"posted\"].timestamp())\n",
" Y.append(review[\"helpful\"])\n",
2023-11-28 23:58:51 +00:00
"\n",
2023-11-29 23:45:43 +00:00
"X = np.array(X)\n",
"Y = np.array(Y)\n",
2023-11-28 23:58:51 +00:00
"\n",
2023-11-29 23:45:43 +00:00
"X = X - np.max(X)\n",
"X = X / (60*60*24*365)\n",
2023-11-28 23:58:51 +00:00
"\n",
2023-11-29 23:45:43 +00:00
"plt.scatter(X, Y, s=1, color=\"k\")\n",
"plt.xlabel(\"Time (Years)\")\n",
"plt.ylabel(\"Percentage Helpful Ratings\")\n",
"plt.title(\"Percentage Helpful Ratings vs Time\")\n",
"plt.show()\n"
2023-11-28 23:58:51 +00:00
]
},
{
"cell_type": "code",
2023-11-30 18:16:47 +00:00
"execution_count": 6,
2023-11-29 23:45:43 +00:00
"id": "d903e81a",
2023-11-28 23:58:51 +00:00
"metadata": {},
"outputs": [
{
2023-11-29 23:45:43 +00:00
"data": {
"text/plain": [
2023-11-30 18:16:47 +00:00
"Text(0.5, 1.0, 'Num. Helpful Ratings vs Num. Total Ratings')"
2023-11-29 23:45:43 +00:00
]
},
2023-11-30 18:16:47 +00:00
"execution_count": 6,
2023-11-29 23:45:43 +00:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
2023-11-30 18:16:47 +00:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB2PElEQVR4nO3de3zO9f/H8ce1sc1pmzGb4+aUcohKyVnZ1yGSKMeEkIUvOsgqIb+vLH2jkizfCh0k5JywHBNFciYiQzFkbI5j2/v3x+e76+uyYZtru65tz/vtdt34fD7v6/N5fT6u7Xp5H23GGIOIiIhIPubh6gBEREREXE0JkYiIiOR7SohEREQk31NCJCIiIvmeEiIRERHJ95QQiYiISL6nhEhERETyPSVEIiIiku8pIRIREZF8TwmRSDYKDQ2lV69eWX7/22+/TaVKlfD09KROnTqZeu/06dOx2WzExMRk+frZoVevXoSGhro6DMkD1qxZg81mY82aNa4OxcHo0aOx2WyuDkMySQmRZFnqF66Pjw9//fVXmuPNmjWjZs2aLogsc3r16kXRokVveNxmszFo0KAcjMiyYsUKXn75ZRo2bMi0adN48803s+U6qb+8U18FCxYkNDSUwYMHc/bs2Syd89ixY4wePZpt27Y5NdbcLiYmxv6cv/nmmzTHU/8t/v77bxdElzHXflZu9spIkvLmm2+yYMGCbI859XdV6qtAgQKULVuWXr16pfu7KyMuXrzI6NGj3S4Zk6wr4OoAJPdLTEwkMjKSSZMmuTqUPGXVqlV4eHjwySef4OXlle3XmzJlCkWLFuXChQusXLmSSZMm8euvv7J+/fpMn+vYsWO88cYbhIaGpqnZ+s9//kNKSoqTos69xowZQ4cOHXJdTcLnn3/usP3ZZ58RHR2dZv9dd911y3O9+eabPPHEE7Rv396ZId7QmDFjqFixIpcvX+ann35i+vTprF+/nl27duHj45Opc128eJE33ngDsP7zd60RI0YQERHhrLAlhyghkttWp04d/vOf//DKK69QpkwZV4eTZ5w8eZJChQrlSDIE8MQTT1CyZEkA+vfvT5cuXfj666/ZtGkTDzzwgNOuU7BgQaedK7eqU6cO27ZtY/78+XTo0MHV4WTKU0895bD9008/ER0dnWa/O2rdujV169YFoG/fvpQsWZK33nqLRYsW0alTJ6ddp0CBAhQooK/X3EZNZnLbXn31VZKTk4mMjLxpudTmgunTp6c5ZrPZGD16tH07telg//79PPXUU/j5+REYGMjrr7+OMYajR4/y2GOP4evrS3BwMO+8846T7+rmEhMTGTVqFFWqVMHb25vy5cvz8ssvk5iYeNP3pVbdr1u3jv79+1OiRAl8fX15+umnOXPmjL2czWZj2rRpXLhwwV7NP3369Ew9w9vVuHFjAA4ePGjfFxcXx0svvUStWrUoWrQovr6+tG7dmu3bt9vLrFmzhvvvvx+A3r17O8QPafsQpd7Tv//9b6ZOnUrlypXx9vbm/vvvZ/PmzWnimjNnDtWrV8fHx4eaNWsyf/78dPslzZo1i/vuu49ixYrh6+tLrVq1eO+99254v1evXiUgIIDevXunOZaQkICPjw8vvfSSfd+kSZOoUaMGhQsXpnjx4tStW5eZM2fe+IFeo0uXLtxxxx2MGTMGY8xNy96oH1qzZs0caiZS+9PMnj2bN954g7Jly1KsWDGeeOIJ4uPjSUxMZOjQoZQqVYqiRYvSu3fvW35es+rChQu8+OKLlC9fHm9vb6pVq8a///1vh3u12WxcuHCBGTNm2D8jqfd5+PBhBgwYQLVq1ShUqBAlSpTgySefdHp/uPQ+41euXGHkyJHcd999+Pn5UaRIERo3bszq1avtZWJiYggMDATgjTfesMef+vOXXh+i1Kb3BQsWULNmTby9valRowbLli1LE9eaNWuoW7cuPj4+VK5cmY8++ijdc0ZHR9OoUSP8/f0pWrQo1apV49VXX3XKs8mPlMLKbatYsSJPP/00//nPf4iIiHBqLVHnzp256667iIyM5Ntvv+Vf//oXAQEBfPTRRzz88MO89dZbfPnll7z00kvcf//9NGnSJMvXymi/jZSUFNq1a8f69et59tlnueuuu9i5cycTJ05k//79GeoTMWjQIPz9/Rk9ejT79u1jypQpHD582P6l9vnnnzN16lQ2bdrExx9/DECDBg2yfG9ZkfrlU7x4cfu+P/74gwULFvDkk09SsWJFTpw4wUcffUTTpk3Zs2cPZcqU4a677mLMmDGMHDmSZ5991v6lc6v4Z86cyblz5+jfvz82m43x48fToUMH/vjjD3ut0rfffkvnzp2pVasW48aN48yZM/Tp04eyZcs6nCs6OpquXbvSvHlz3nrrLQD27t3Ljz/+yJAhQ9K9fsGCBXn88ceZN28eH330kUPN3IIFC0hMTKRLly6A1ew3ePBgnnjiCYYMGcLly5fZsWMHP//8M926dbvls/X09GTEiBE8/fTTTq8lGjduHIUKFSIiIoIDBw4wadIkChYsiIeHB2fOnGH06NH25qKKFSsycuRIp10bwBhDu3btWL16NX369KFOnTosX76cYcOG8ddffzFx4kTAanrr27cvDzzwAM8++ywAlStXBmDz5s1s2LCBLl26UK5cOWJiYpgyZQrNmjVjz549FC5c2CmxpvcZT0hI4OOPP6Zr167069ePc+fO8cknn9CyZUs2bdpEnTp1CAwMZMqUKTz33HM8/vjj9n+/u++++6bXW79+PfPmzWPAgAEUK1aM999/n44dO3LkyBFKlCgBwNatW2nVqhWlS5fmjTfeIDk5mTFjxtgTsFS7d++mbdu23H333YwZMwZvb28OHDjAjz/+6JRnky8ZkSyaNm2aAczmzZvNwYMHTYECBczgwYPtx5s2bWpq1Khh3z506JABzLRp09KcCzCjRo2yb48aNcoA5tlnn7XvS0pKMuXKlTM2m81ERkba9585c8YUKlTI9OzZM0v30bNnTwPc9DVw4EB7+c8//9x4eHiYH374weE8UVFRBjA//vijfV9ISIhDXKnP7L777jNXrlyx7x8/frwBzMKFCx3iKlKkiMM1MvMMU6916NChm95/6rPet2+fOXXqlImJiTGffvqpKVSokAkMDDQXLlywl718+bJJTk5OE5O3t7cZM2aMfd/mzZtvGGfPnj1NSEhImnsqUaKEiYuLs+9fuHChAczixYvt+2rVqmXKlStnzp07Z9+3Zs0aAzicc8iQIcbX19ckJSXd9N6vt3z58jTXNMaYRx55xFSqVMm+/dhjjzl8tjMq9V7ffvttk5SUZKpWrWpq165tUlJSjDH/+7c4deqU/T3Xf4ZSNW3a1DRt2tS+vXr1agOYmjVrOny2unbtamw2m2ndurXD++vXr+/wzLJq4MCB5tqvkgULFhjA/Otf/3Io98QTTxibzWYOHDhg31ekSJF07+3ixYtp9m3cuNEA5rPPPrPvS73n1atX3zTG1J+F77//3pw6dcocPXrUzJ071wQGBhpvb29z9OhRe9mkpCSTmJjo8P4zZ86YoKAg88wzz9j3nTp1Ks3PXKrUf8drAcbLy8vh/rdv324AM2nSJPu+Rx991BQuXNj89ddf9n2///67KVCggMM5J06cmOazIrdHTWbiFJUqVaJHjx5MnTqV48ePO+28ffv2tf/d09OTunXrYoyhT58+9v3+/v5Uq1aNP/74I8vX8fHxITo6Ot3X9ebMmcNdd93FnXfeyd9//21/PfzwwwAOVes38uyzzzr0pXnuuecoUKAAS5cuzfI93K5q1aoRGBhIaGgozzzzDFWqVOG7775z+N+4t7c3Hh7Wr43k5GROnz5tr6r/9ddfb+v6nTt3dvifemrNUuq/67Fjx9i5cydPP/20w6jApk2bUqtWLYdz+fv7c+HChXT//W7m4YcfpmTJknz99df2fWfOnCE6OprOnTs7nP/PP/9Mt0kvo1JribZv3+7UkVZPP/20w2erXr16GGN45plnHMrVq1ePo0ePkpSU5LRrAyxduhRPT08GDx7ssP/FF1/EGMN33313y3MUKlT
2023-11-29 23:45:43 +00:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
2023-11-28 23:58:51 +00:00
}
],
"source": [
2023-11-29 23:45:43 +00:00
"X = []\n",
"Y = []\n",
"for review in reviews:\n",
" X.append(review[\"helpful_total\"])\n",
" Y.append(review[\"helpful_n\"])\n",
2023-11-28 23:58:51 +00:00
"\n",
2023-11-29 23:45:43 +00:00
"plt.scatter(X,Y,s=1,color=\"k\")\n",
"plt.axline((0, 0), slope=1, color=\"r\")\n",
"plt.ylabel(\"Num. Helpful Ratings\")\n",
"plt.xlabel(\"Num. Total Ratings\")\n",
"plt.title(\"Num. Helpful Ratings vs Num. Total Ratings\")"
2023-11-28 23:58:51 +00:00
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}