This repository has been archived on 2023-12-08. You can view files and clone it, but cannot push or open issues or pull requests.
CSE-158-Assignment-2/preliminary.ipynb

252 lines
128 KiB
Plaintext
Raw Normal View History

2023-11-28 23:58:51 +00:00
{
"cells": [
{
"cell_type": "code",
2023-11-29 23:45:43 +00:00
"execution_count": 92,
"id": "9808cacf",
2023-11-28 23:58:51 +00:00
"metadata": {},
"outputs": [],
"source": [
2023-11-29 23:45:43 +00:00
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from datetime import datetime\n",
"import re\n",
"import gzip"
2023-11-28 23:58:51 +00:00
]
},
{
"cell_type": "code",
2023-11-29 23:45:43 +00:00
"execution_count": 93,
"id": "494d6c25",
2023-11-28 23:58:51 +00:00
"metadata": {},
"outputs": [],
"source": [
2023-11-29 23:45:43 +00:00
"def parseData(fname):\n",
" for l in gzip.open(fname):\n",
" yield eval(l)\n",
"\n",
"data = list(parseData(\"australian_user_reviews.json.gz\"))\n",
"\n",
"dm = [[0,0],[0,0]]\n",
"\n",
"users = set()\n",
"games = set()\n",
"\n",
"nodate = 0\n",
"\n",
"reviews = []\n",
"\n",
"for user in data:\n",
" if user[\"user_id\"] in users:\n",
" #print(f\"ducplicate user skipped: {user['user_id']}\")\n",
" pass\n",
" else:\n",
" users.add(user[\"user_id\"])\n",
" for review in user[\"reviews\"]:\n",
" games.add(review[\"item_id\"])\n",
" funny = review[\"funny\"]\n",
" hasfunny = int(funny != \"\")\n",
" if funny == \"\":\n",
" review[\"funny\"] = 0\n",
" else:\n",
" review[\"funny\"] = int(re.findall(\"\\d+\", funny)[0])\n",
" \n",
" helpful = review[\"helpful\"]\n",
" hashelpful = int(helpful != \"No ratings yet\")\n",
" if helpful == \"No ratings yet\":\n",
" review[\"helpful_n\"] = 0\n",
" review[\"helpful_total\"] = 0\n",
" review[\"helpful\"] = 0\n",
" else:\n",
" nums = re.findall(\"\\d+\", helpful.replace(\",\", \"\"))\n",
" helpfulness = float(nums[0]) / float(nums[1])\n",
" review[\"helpful\"] = float(nums[0]) / float(nums[1])\n",
" review[\"helpful_n\"] = int(nums[0])\n",
" review[\"helpful_total\"] = int(nums[1])\n",
" \n",
" dm[hasfunny][hashelpful] += 1\n",
"\n",
" try:\n",
" post_datetime = datetime.strptime(review[\"posted\"],'Posted %B %d, %Y.')\n",
" review[\"posted\"] = post_datetime\n",
" except:\n",
" nodate += 1\n",
"\n",
" review[\"user_id\"] = user[\"user_id\"]\n",
" review[\"user_url\"] = user[\"user_url\"]\n",
" reviews.append(review)"
2023-11-28 23:58:51 +00:00
]
},
{
"cell_type": "code",
2023-11-29 23:45:43 +00:00
"execution_count": 94,
"id": "52f12059",
2023-11-28 23:58:51 +00:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2023-11-29 23:45:43 +00:00
"\t\tno helpful\thas hepful\n",
"no funny\t28791\t\t21629\n",
"has funny\t940\t\t7070\n",
"num unique users: 25485\n",
"num unique games: 3682\n",
"num reviews: 58430\n",
"num reviews with bad date format: 9932\n"
2023-11-28 23:58:51 +00:00
]
}
],
"source": [
2023-11-29 23:45:43 +00:00
"print(f\"\\t\\tno helpful\\thas hepful\\nno funny\\t{dm[0][0]}\\t\\t{dm[0][1]}\\nhas funny\\t{dm[1][0]}\\t\\t{dm[1][1]}\")\n",
"print(f\"num unique users: {len(users)}\")\n",
"print(f\"num unique games: {len(games)}\")\n",
"print(f\"num reviews: {len(reviews)}\")\n",
"print(f\"num reviews with bad date format: {nodate}\")"
2023-11-28 23:58:51 +00:00
]
},
{
"cell_type": "code",
2023-11-29 23:45:43 +00:00
"execution_count": 95,
"id": "fadaaebb",
2023-11-28 23:58:51 +00:00
"metadata": {},
"outputs": [
{
"data": {
2023-11-29 23:45:43 +00:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABRKklEQVR4nO3deVxU9f4/8NcwLIJsoiySOBCWihsk6UUz7UrikqVZqOGaS7hm2jKmgprLZFkppbTcK1aWmEsuv7QMt0wyXCtcUhNwCVARcAUdzu+PvnPuDAwwB2af1/PxOI9Hc86Zc97ngJ03789yZIIgCCAiIiJyYE6WDoCIiIjI0pgQERERkcNjQkREREQOjwkREREROTwmREREROTwmBARERGRw2NCRERERA6PCRERERE5PCZERERE5PCYEBERWZFRo0YhNDTU0mFYtR49eqBHjx6WDoPsDBMichhpaWmQyWRo0KABLl26VGV7jx490LZtWwtEJs2oUaMgk8n0Ljt27LB0ePWi+RkdOnRI8ndv376NuXPnYs+ePcYPzMguX76MuXPn4tixY5YOxSrk5ORU+ztdecnJybF0uGSnnC0dAJG5lZWVQaVSISUlxdKh1Jmbmxs+++yzKus7dOhggWisw+3btzFv3jwAsPrqweXLlzFv3jyEhoYiMjJSZ9unn36KiooKywRmIf7+/vjiiy901i1duhQXL17E+++/X2XfH374wZzhkYNgQkQOJzIyEp9++ilmzpyJ4OBgS4dTJ87Ozhg2bJilw3AIt27dQsOGDc12PhcXF7Ody1o0bNiwyu/z2rVrcf36df6ek9mwyYwczptvvgm1Wg2VSlXjfpoyflpaWpVtMpkMc+fOFT/PnTsXMpkMf/75J4YNGwYfHx/4+/tjzpw5EAQBFy5cwDPPPANvb28EBQVh6dKlRr6q/9mzZw9kMlmVpiN91zNq1Ch4enri0qVLGDBgADw9PeHv749XX30VarW6ynffffddfPLJJwgPD4ebmxseffRRZGVlifutWrUKMpkMR48erRLXokWLIJfL9TZX1sSQGHNycuDv7w8AmDdvnti8ov0zOnXqFJ577jn4+fmhQYMGiI6OxpYtW3TOpWmy27t3LyZOnIiAgAA0a9YMAJCbm4uJEyeiZcuWcHd3R+PGjfH888/rbcIpLi7GK6+8gtDQULi5uaFZs2YYMWIErl69ij179uDRRx8FAIwePVqMVfNz0deH6NatW5gxYwZCQkLg5uaGli1b4t1334UgCDr7yWQyTJ48Gd9++y3atm0LNzc3tGnTpkpT6o0bNzBt2jQxvoCAADz55JM4cuRItT+H9evXi/emso8//hgymQx//PEHACA/Px+jR49Gs2bN4ObmhqZNm+KZZ54xWnNX5T5Emt/5devWYd68eXjggQfg5eWF5557DiUlJSgrK8O0adMQEBAAT09PjB49GmVlZVWO++WXX6Jjx45wd3eHn58fhgwZggsXLhglZrJ+rBCRwwkLC8OIESPw6aefQqlUGrVKNHjwYLRu3RoqlQr/7//9PyxYsAB+fn74+OOP8e9//xtvv/021qxZg1dffRWPPvooHn/88Tqf6+rVqzqfXVxc4OPjI/k4arUacXFx6Ny5M9599138+OOPWLp0KcLDwzFhwgSdfb/66ivcuHEDL730EmQyGZYsWYJnn30Wf/31F1xcXPDcc89h0qRJWLNmDaKionS+u2bNGvTo0QMPPPCA0WP09/fHypUrMWHCBAwcOBDPPvssAKB9+/YAgOzsbHTt2hUPPPAAlEolGjZsiHXr1mHAgAHYsGEDBg4cqHO+iRMnwt/fH0lJSbh16xYAICsrCwcOHMCQIUPQrFkz5OTkYOXKlejRowdOnDgBDw8PAMDNmzfRrVs3nDx5Ei+++CIeeeQRXL16FVu2bMHFixfRunVrzJ8/H0lJSRg/fjy6desGAOjSpYveaxcEAU8//TR2796NMWPGIDIyEt9//z1ee+01XLp0qUqT0v79+7Fx40ZMnDgRXl5eWL58OQYNGoS8vDw0btwYAJCYmIj169dj8uTJiIiIwLVr17B//36cPHkSjzzyiN44+vXrB09PT6xbtw7du3fX2Zaeno42bdqIffAGDRqE7OxsTJkyBaGhoSgsLMTOnTuRl5dn0g7jixcvhru7O5RKJc6ePYuUlBS4uLjAyckJ169fx9y5c/HLL78gLS0NYWFhSEpKEr+7cOFCzJkzB/Hx8Rg7diyuXLmClJQUPP744zh69Ch8fX1NFjdZCYHIQaxatUoAIGRlZQnnzp0TnJ2dhalTp4rbu3fvLrRp00b8fP78eQGAsGrVqirHAiAkJyeLn5OTkwUAwvjx48V19+/fF5o1aybIZDJBpVKJ669fvy64u7sLI0eOrNN1jBw5UgBQZenevbsgCIKwe/duAYCwe/dune/pux7NsebPn6+zb1RUlNCxY8cq323cuLFQVFQkrt+8ebMAQNi6dau4bujQoUJwcLCgVqvFdUeOHKn2XmrT/hlJjfHKlStVfi4aPXv2FNq1ayfcvXtXXFdRUSF06dJFeOihh6qc/7HHHhPu37+vc4zbt29XOW5mZqYAQPj888/FdUlJSQIAYePGjVX2r6ioEARBELKysqq9HyNHjhQUCoX4+dtvvxUACAsWLNDZ77nnnhNkMplw9uxZcR0AwdXVVWfd8ePHBQBCSkqKuM7Hx0eYNGlSlXPXZujQoUJAQIDOvfn7778FJycn8edz/fp1AYDwzjvvSD6+tn79+uncB23du3cXf98F4X+/823bthXKy8t14pXJZEKfPn10vh8TE6Nz7JycHEEulwsLFy7U2e/3338XnJ2dq6wn+8QmM3JIDz74IIYPH45PPvkEf//9t9GOO3bsWPG/5XI5oqOjIQgCxowZI6739fVFy5Yt8ddff9X5PA0aNMDOnTt1lvo0wyUmJup87tatm974Bg8ejEaNGunsB0Bn3xEjRuDy5cvYvXu3uG7NmjVwd3fHoEGDTB5jZUVFRdi1axfi4+Nx48YNXL16FVevXsW1a9cQFxeHM2fOVGnGGzduHORyuc46d3d38b/v3buHa9euoUWLFvD19dVpatqwYQM6dOhQpeoE/NOkJdV3330HuVyOqVOn6qyfMWMGBEHA9u3bddbHxsYiPDxc/Ny+fXt4e3vr3CtfX18cPHgQly9flhTL4MGDUVhYqNMcu379elRUVGDw4MEA/rlPrq6u2LNnD65fvy7p+PU1YsQInT5YnTt3hiAIePHFF3X269y5My5cuID79+8DADZu3IiKigrEx8eLvx9Xr15FUFAQHnroIZ3fZbJfTIjIYc2ePRv379+vtS+RFM2bN9f57OPjgwYNGqBJkyZV1tfnYSGXyxEbG6uzdOzYsU7HatCggdj/RqNRo0Z646t8fZrkSHvfJ598Ek2bNsWaNWsAABUVFfj666/xzDPPwMvLy+QxVnb27FkIgoA5c+bA399fZ0lOTgYAFBYW6nwnLCysynHu3LmDpKQksR9PkyZN4O/vj+LiYpSUlIj7nTt3zqjTN+Tm5iI4OLjKvWvdurW4XVvlnxFQ9V4tWbIEf/zxB0JCQtCpUyfMnTvXoOSyd+/e8PHxQXp6urguPT0dkZGRePjhhwH8MwLy7bffxvbt2xEYGIjHH38cS5YsQX5+vuEXXUf6/v0BQEhISJX1FRUV4s/tzJkzEAQBDz30UJXfkZMnT1b5/SD7xD5E5LAefPBBDBs2DJ988gmUSmWV7dX9Na/d2biyylWF6tYBqNIh1likxl1dfFL21b4WuVyOF154AZ9++ilWrFiBn3/+GZcvX67XaCEpMVamGcL+6quvIi4uTu8+LVq00PmsXQ3SmDJlClatWoVp06YhJiYGPj4+kMlkGDJkiFUNkzfkZxQfH49u3bph06ZN+OGHH/DOO+/g7bffxsaNG9GnT59qj+3m5oYBAwZg06ZNWLFiBQoKCvDzzz9j0aJFOvtNmzYN/fv3x7fffovvv/8ec+bMweLFi7Fr164qfcuMqbprr+2eVFRUQCaTYfv27Xr39fT0NF6QZLWYEJFDmz1
2023-11-28 23:58:51 +00:00
"text/plain": [
2023-11-29 23:45:43 +00:00
"<Figure size 640x480 with 1 Axes>"
2023-11-28 23:58:51 +00:00
]
},
"metadata": {},
2023-11-29 23:45:43 +00:00
"output_type": "display_data"
2023-11-28 23:58:51 +00:00
}
],
"source": [
2023-11-29 23:45:43 +00:00
"%matplotlib inline\n",
2023-11-28 23:58:51 +00:00
"\n",
"X = []\n",
2023-11-29 23:45:43 +00:00
"Y = []\n",
"for review in reviews:\n",
" if type(review[\"posted\"]) == datetime:\n",
" X.append(review[\"posted\"].timestamp())\n",
" Y.append(review[\"funny\"])\n",
2023-11-28 23:58:51 +00:00
"\n",
"X = np.array(X)\n",
2023-11-29 23:45:43 +00:00
"Y = np.array(Y)\n",
"\n",
"X = X - np.max(X)\n",
"X = X / (60*60*24*365)\n",
2023-11-28 23:58:51 +00:00
"\n",
2023-11-29 23:45:43 +00:00
"plt.scatter(X, Y, s=1, color=\"k\")\n",
"plt.xlabel(\"Time (Years)\")\n",
"plt.ylabel(\"Num. Funny Ratings\")\n",
"plt.title(\"Num. Funny Ratings vs Time\")\n",
"plt.show()\n"
2023-11-28 23:58:51 +00:00
]
},
{
"cell_type": "code",
2023-11-29 23:45:43 +00:00
"execution_count": 96,
"id": "eee4566a",
2023-11-28 23:58:51 +00:00
"metadata": {},
"outputs": [
{
2023-11-29 23:45:43 +00:00
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHHCAYAAABDUnkqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAACgfklEQVR4nO2de3wU1fn/PzOb3Q0TbtmIIMhuELVeW5FNrPECqBWx9VKtq4mVS1VMBalixVis4D2t9Va1yde2ira23lqlfmu9FOWriBcUUatVEUlArchFIIqCJM/vD39nenZ2rrszu7Pheb9e80p25lye85xJzrPPec45ChERGIZhGIZheglqqQVgGIZhGIbxEzZuGIZhGIbpVbBxwzAMwzBMr4KNG4ZhGIZhehVs3DAMwzAM06tg44ZhGIZhmF4FGzcMwzAMw/Qq2LhhGIZhGKZXwcYNwzAMwzC9CjZuGIZxpLa2FpMnT847/3XXXYfddtsNkUgEBxxwgKe88+bNg6Io6OjoyLv+HYWxY8di7NixpRYj1BT6LjPlARs3TEGIgUdclZWV2HPPPTF9+nSsWbOm1OIVzFtvvYW5c+eWxcA6efJk9O3b1/K5oiiYPn16ESX6mieeeAKzZs3CIYccgjvvvBPXXHNNIPXMnTsXiqJg3bp1nvN+9NFHmDt3LpYtW+a/YD5TTu9kMVi4cGHW/yC7i9lxqCi1AEzv4IorrsCIESPw5ZdfYtGiRWhra8Ojjz6Kf/3rX9A0rdTi5c1bb72Fyy+/HGPHjkVtbW2pxSlLnnrqKaiqit///veIxWKlFseUjz76CJdffjlqa2s9e5aKjd07+cQTT5RGqBKy99574w9/+EPWvUsuuQR9+/bF7Nmzc9K/8847UFX+Xt/bYeOG8YUJEyYgnU4DAM466yzU1NTghhtuwPz589HY2FhQ2Vu2bClrA2lH55NPPkGfPn1Ca9gEyeeff46qqqqi1bcj6njw4MH44Q9/mHWvtbUVO+20U859AIjH48USjSkhbL4ygXDEEUcAAFauXKnf++Mf/4jRo0ejT58+SCQSOO2007B69eqsfGPHjsV+++2HV155BYcffjg0TcPPfvYzAMCXX36JuXPnYs8990RlZSV22WUXnHTSSVixYoWev6enBzfddBP23XdfVFZWYvDgwTjnnHPw6aefZtVTW1uL733ve1i0aBHq6+tRWVmJ3XbbDXfffbeeZt68eTjllFMAAOPGjdNd2wsXLgQAzJ8/H9/97ncxdOhQxONxjBw5EldeeSW6u7tz9HHbbbdht912Q58+fVBfX49nn33WND5i69atmDNnDnbffXfE43EMHz4cs2bNwtatWz32gDvyrU9MRz7zzDM455xzUFNTg/79+2PixIlZulYUBXfeeSc+//xzXX/z5s1DR0eH/rsRRVEwd+5cX9on3qe33noL48aNg6ZpGDZsGH75y1/qaRYuXIi6ujoAwJQpU7LkFLz44os45phjMGDAAGiahjFjxuC5557LqktMi7311ltoampCdXU1Dj30UADA66+/jsmTJ2O33XZDZWUlhgwZgh/96EdYv359jswffvghzjzzTP29GjFiBH784x9j27Ztju+k2Tv1ySef4Mwzz8TgwYNRWVmJb33rW7jrrruy0oj++NWvfoXbb78dI0eORDweR11dHZYsWZKV9uOPP8aUKVOw6667Ih6PY5dddsEJJ5xgO032q1/9CoqioLOzM+fZJZdcglgspr83y5cvx8knn4whQ4agsrISu+66K0477TRs2rTJsnwvGGNuxLu8aNEizJgxA4MGDcLAgQNxzjnnYNu2bdi4cSMmTpyI6upqVFdXY9asWSCirDLd/t9higd7bphAEAZHTU0NAODqq6/Gz3/+c2QyGZx11llYu3YtbrnlFhx++OF49dVXMXDgQD3v+vXrMWHCBJx22mn44Q9/iMGDB6O7uxvf+973sGDBApx22mn4yU9+gq6uLjz55JP417/+hZEjRwIAzjnnHMybNw9TpkzBjBkzsHLlStx666149dVX8dxzzyEajer1vPfee/jBD36AM888E5MmTcIdd9yByZMnY/To0dh3331x+OGHY8aMGfj1r3+Nn/3sZ9h7770BQP85b9489O3bFzNnzkTfvn3x1FNP4bLLLsPmzZtx3XXX6fW0tbVh+vTpOOyww3DBBRego6MDJ554Iqqrq7Hrrrvq6Xp6enD88cdj0aJFmDp1Kvbee2+88cYbuPHGG/Huu+/i4YcfdqV7tzEnftQ3ffp0DBw4EHPnzsU777yDtrY2dHZ26nEQf/jDH3D77bfjpZdewu9+9zsAQENDgyv5/OLTTz/FMcccg5NOOgmZTAYPPvggLr74Yuy///6YMGEC9t57b1xxxRW47LLLMHXqVBx22GFZcj711FOYMGECRo8ejTlz5kBVVdx555044ogj8Oyzz6K+vj6rvlNOOQV77LEHrrnmGn0QfPLJJ/H+++9jypQpGDJkCN58803cfvvtePPNN/HCCy/o8SAfffQR6uvrsXHjRkydOhV77bUXPvzwQzz44IPYsmWL4ztp5IsvvsDYsWPx3nvvYfr06RgxYgQeeOABTJ48GRs3bsRPfvKTrPR/+tOf0NXVhXPOOQeKouCXv/wlTjrpJLz//vv6387JJ5+MN998E+eddx5qa2vxySef4Mknn8SqVassp24zmQxmzZqF+++/HxdddFHWs/vvvx9HH300qqursW3bNowfPx5bt27FeeedhyFDhuDDDz/E//7v/2Ljxo0YMGCA2273jKjv8ssvxwsvvIDbb78dAwcOxOLFi5FMJnHNNdfg0UcfxXXXXYf99tsPEydO1PN6+b/DFAlimAK48847CQD985//pLVr19Lq1avp3nvvpZqaGurTpw998MEH1NHRQZFIhK6++uqsvG+88QZVVFRk3R8zZgwBoPb29qy0d9xxBwGgG264IUeGnp4eIiJ69tlnCQDdc889Wc8fe+yxnPupVIoA0DPPPKPf++STTygej9OFF16o33vggQcIAD399NM59W7ZsiXn3jnnnEOaptGXX35JRERbt26lmpoaqquro6+++kpPN2/ePAJAY8aM0e/94Q9/IFVV6dlnn80qs729nQDQc889l1OfzKRJkwiA7TVt2rS86kulUjRp0iT9s+j30aNH07Zt2/T7v/zlLwkAzZ8/P0uuqqqqrDpWrlxJAOjOO+/MaQcAmjNnTk5dK1eutG3/nDlzCACtXbtWvyfep7vvvlu/t3XrVhoyZAidfPLJ+r0lS5aYytPT00N77LEHjR8/Xn/PiL7u+xEjRtB3vvOdnPobGxtzZDN7V/785z/nvIMTJ04kVVVpyZIlOelF/Xbv5JgxY7LeqZtuuokA0B//+Ef93rZt2+jggw+mvn370ubNm4nov/1RU1NDGzZs0NPOnz+fANAjjzxCRESffvopAaDrrrsup24nDj74YBo9enTWvZdeeimrf1599VUCQA888IDn8mX23XffLD3IWL3Lxj4++OCDSVEUam5u1u9t376ddt1116yyvfzfYYoHT0sxvnDUUUdh0KBBGD58OE477TT07dsXDz30EIYNG4a//vWv6OnpQSaTwbp16/RryJAh2GOPPfD0009nlRWPxzFlypSse3/5y1+w00474bzzzsupW3zrfeCBBzBgwAB85zvfyapn9OjR6Nu3b049++yzj/4tHQAGDRqEb3zjG3j//fddtblPnz76711dXVi3bh0OO+wwbNmyBW+//TYA4OWXX8b69etx9tlno6Liv47S008/HdXV1VnlPfDAA9h7772x1157ZckvpviM8ptRWVmJJ5980vQy4kd9U6dOzfpW+uMf/xgVFRV49NFHHfMWi759+2bFXsRiMdTX17vq52XLlmH58uVoamrC+vXrdR19/vnnOPLII/HMM8+gp6cnK09zc3NOOfK78uWXX2LdunX49re/DQBYunQpgK89aQ8//DCOO+44PX5NJp/VPo8++iiGDBmSFfcWjUYxY8YMfPbZZ/i///u/rPSnnnpq1nsp/j6ErkTs1MKFCz1PuZx66ql45ZV
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
2023-11-28 23:58:51 +00:00
}
],
"source": [
2023-11-29 23:45:43 +00:00
"%matplotlib inline\n",
2023-11-28 23:58:51 +00:00
"\n",
2023-11-29 23:45:43 +00:00
"X = []\n",
"Y = []\n",
"for review in reviews:\n",
" if type(review[\"posted\"]) == datetime:\n",
" X.append(review[\"posted\"].timestamp())\n",
" Y.append(review[\"helpful\"])\n",
2023-11-28 23:58:51 +00:00
"\n",
2023-11-29 23:45:43 +00:00
"X = np.array(X)\n",
"Y = np.array(Y)\n",
2023-11-28 23:58:51 +00:00
"\n",
2023-11-29 23:45:43 +00:00
"X = X - np.max(X)\n",
"X = X / (60*60*24*365)\n",
2023-11-28 23:58:51 +00:00
"\n",
2023-11-29 23:45:43 +00:00
"plt.scatter(X, Y, s=1, color=\"k\")\n",
"plt.xlabel(\"Time (Years)\")\n",
"plt.ylabel(\"Percentage Helpful Ratings\")\n",
"plt.title(\"Percentage Helpful Ratings vs Time\")\n",
"plt.show()\n"
2023-11-28 23:58:51 +00:00
]
},
{
"cell_type": "code",
2023-11-29 23:45:43 +00:00
"execution_count": 108,
"id": "d903e81a",
2023-11-28 23:58:51 +00:00
"metadata": {},
"outputs": [
{
2023-11-29 23:45:43 +00:00
"data": {
"text/plain": [
"Text(0.5, 1.0, 'Num. Helpful Interactions vs Num. Total Interactions')"
]
},
"execution_count": 108,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB6RElEQVR4nO3dd1hT59sH8G9ACChLlCGKDLU46l7FbbWiVetqnbVuxWqttbXiaB39taKt2mot1FrHa+uuu4ri3rPiFhfDhQtZoqw87x+npERWAgknkO/nunLJOefJyX0OIbl9pkIIIUBERERkwszkDoCIiIhIbkyIiIiIyOQxISIiIiKTx4SIiIiITB4TIiIiIjJ5TIiIiIjI5DEhIiIiIpPHhIiIiIhMHhMiIiIiMnlMiKjE8PT0xODBgwv8/O+//x7e3t4wNzdHvXr1dHruihUroFAoEBkZWeDXNxVt2rRBmzZt5A6DSgC+l7QTGRkJhUKBFStWyB2KUWNCVExkfuFaWVnh/v372Y63adMGb775pgyR6Wbw4MGwsbHJ9bhCocDYsWOLMCLJnj178OWXX6J58+ZYvnw5vvvuO4O8zowZM6BQKPD06VOdn/vgwQPMmDEDYWFh+g9Mz65evYoZM2YwQXzN4MGDoVAoUKdOHeS0apJc739tZb5/83tok6QcP34cM2bMQFxcnMHj9vT0RJcuXQr03J07d2LGjBn6DchAVq9ejR9//FHuMIqtUnIHQLpJSUlBYGAgFi1aJHcoJcr+/fthZmaG33//HZaWlnKHk6MHDx5g5syZ8PT01LkGq6hdvXoVM2fORJs2beDp6alxbM+ePfIEZUQuXbqETZs2oVevXnKHopOePXuiatWq6u2kpCSMHj0aPXr0QM+ePdX7XVxc8j3X8ePHMXPmTAwePBgODg6GCFcvdu7cicWLFxeLpGj16tW4fPkyxo8fr7Hfw8MDL1++hIWFhTyBFRNMiIqZevXq4bfffsPkyZPh5uYmdzglxuPHj2FtbW20yZAhvXjxAmXKlCmy1zPFe5yVtbU13N3dMWvWLPTs2RMKhULukLRWp04d1KlTR7399OlTjB49GnXq1MGHH34oY2TFixACr169grW1dZG8XmbrAuWNTWbFzJQpU5CRkYHAwMA8y+XVZqxQKDT+t5NZDX7jxg18+OGHsLe3h5OTE7766isIIXD37l1069YNdnZ2cHV1xbx58/R8VXlLSUnB9OnTUbVqVSiVSri7u+PLL79ESkpKns/LbGY8fPgwRo0ahXLlysHOzg4fffQRnj9/ri6nUCiwfPlyvHjxQl3lv2LFCp3uYWFkNndevXoVbdu2RenSpVGxYkXMnTtXXebgwYNo3LgxAGDIkCEacWY6deoUOnbsCHt7e5QuXRqtW7fGsWPHNF4r83d99epV9O/fH2XLlkWLFi0AABcvXsTgwYPh7e0NKysruLq6YujQoXj27Fm2mO/fv49hw4bBzc0NSqUSXl5eGD16NFJTU7FixQp88MEHAIC2bduqYz148KD6el9vUnn8+DGGDRsGFxcXWFlZoW7duli5cqVGmczfxw8//IAlS5agSpUqUCqVaNy4Mc6cOaNRNiYmBkOGDEGlSpWgVCpRoUIFdOvWLc8mvB9++AEKhQJRUVHZjk2ePBmWlpbq983NmzfRq1cvuLq6wsrKCpUqVULfvn0RHx+f6/kzmZmZYdq0abh48SI2b96cZ9nc+qYdPHhQ454C/72PLl68iNatW6N06dKoWrUqNm7cCAA4dOgQmjZtCmtra/j4+GDv3r35xlpQ+/fvR8uWLVGmTBk4ODigW7duuHbtmvr4jBkzMHHiRACAl5eX+j2SeZ3Lly/H22+/DWdnZyiVStSsWRNBQUF6i0/b99LgwYOxePFiANBoEsykUqnw448/olatWrCysoKLiwtGjRql8fkC/Ndkt3v3bjRq1AjW1tb49ddfdb7WXbt2oXXr1rC1tYWdnR0aN26M1atXA5B+/3///TeioqLUcWbWzub2WZbf7wn47zPj1q1b6to8e3t7DBkyBMnJyRplQ0ND0aJFCzg4OMDGxgY+Pj6YMmWKlr8V+bGGqJjx8vLCRx99hN9++w0BAQF6rSXq06cPatSogcDAQPz999/43//+B0dHR/z66694++23MWfOHPz555/44osv0LhxY7Rq1arAr6VtHxqVSoX33nsPR48exciRI1GjRg1cunQJCxYswI0bN7Bly5Z8zzF27Fg4ODhgxowZCA8PR1BQEKKiotRfKqtWrcKSJUtw+vRpLF26FADQrFmzAl9bQTx//hwdO3ZEz5490bt3b2zcuBGTJk1C7dq10alTJ9SoUQOzZs3C119/jZEjR6Jly5Yace7fvx+dOnVCw4YNMX36dJiZmak/aI8cOYImTZpovN4HH3yAatWq4bvvvlP3ZQkNDcWdO3cwZMgQuLq64sqVK1iyZAmuXLmCkydPqr8IHjx4gCZNmiAuLg4jR45E9erVcf/+fWzcuBHJyclo1aoVxo0bh4ULF2LKlCmoUaMGAKj/fd3Lly/Rpk0b3Lp1C2PHjoWXlxc2bNiAwYMHIy4uDp9++qlG+dWrVyMxMRGjRo2CQqHA3Llz0bNnT9y5c0fdJNCrVy9cuXIFn3zyCTw9PfH48WOEhoYiOjo6WxNept69e+PLL7/E+vXr1V/WmdavX48OHTqgbNmySE1NhZ+fH1JSUvDJJ5/A1dUV9+/fx44dOxAXFwd7e/t8f9/9+/fHN998g1mzZqFHjx56qyV6/vw5unTpgr59++KDDz5AUFAQ+vbtiz///BPjx4+Hv78/+vfvj++//x7vv/8+7t69C1tbW728dqa9e/eiU6dO8Pb2xowZM/Dy5UssWrQIzZs3xz///ANPT0/07NkTN27cwJo1a7BgwQKUL18eAODk5AQACAoKQq1atfDee++hVKlS2L59Oz7++GOoVCqMGTNGb7Hm914aNWoUHjx4gNDQUKxatSrb80eNGoUVK1ZgyJAhGDduHCIiIvDzzz/j/PnzOHbsmEYTVXh4OPr164dRo0ZhxIgR8PHx0elaV6xYgaFDh6JWrVqYPHkyHBwccP78eYSEhKB///6YOnUq4uPjce/ePSxYsAAA8uyvqc3vKavevXvDy8sLs2fPxj///IOlS5fC2dkZc+bMAQBcuXIFXbp0QZ06dTBr1iwolUrcunUr23/KjJqgYmH58uUCgDhz5oy4ffu2KFWqlBg3bpz6eOvWrUWtWrXU2xEREQKAWL58ebZzARDTp09Xb0+fPl0AECNHjlTvS09PF5UqVRIKhUIEBgaq9z9//lxYW1uLQYMGFeg6Bg0aJADk+RgzZoy6/KpVq4SZmZk4cuSIxnmCg4MFAHHs2DH1Pg8PD424Mu9Zw4YNRWpqqnr/3LlzBQCxdetWjbjKlCmj8Rq63MPM14qIiMjz+jPv9ZMnT9T7WrduLQCI//u//1PvS0lJEa6urqJXr17qfWfOnMkxHpVKJapVqyb8/PyESqVS709OThZeXl7inXfeyfb6/fr1yxZbcnJytn1r1qwRAMThw4fV+z766CNhZmYmzpw5k6185utv2LBBABAHDhzIVqZ169aidevW6u0ff/xRABB//PGHel9qaqrw9fUVNjY2IiEhQQjx3++jXLlyIjY2Vl1269atAoDYvn27EEJ6jwIQ33//fbbXzo+vr69o2LChxr7Tp09r/H7Onz8vAIgNGzbofP6s77OVK1cKAGLTpk3q46+//3N7Xx04cCDb/c18H61evVq97/r16wKAMDMzEydPnlTv3717d67vbV08efIk299CvXr1hLOzs3j27Jl634ULF4SZmZn46KOP1Pu+//77XP9mcnov+vn5CW9vb419r7+XcuPh4SE6d+6s3tb2vSSEEGPGjBE5fVUeOXJEABB//vmnxv6QkJBs+z08PAQAERISUqBrjYuLE7a2tqJp06bi5cuXGmWz/s137txZeHh4ZDtfTp9l2v6eMj8zhg4dqnHOHj16iHLlyqm3FyxYkO2zrbhhk1kx5O3tjYEDB2LJkiV4+PCh3s47fPhw9c/m5uZ
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
2023-11-28 23:58:51 +00:00
}
],
"source": [
2023-11-29 23:45:43 +00:00
"X = []\n",
"Y = []\n",
"for review in reviews:\n",
" X.append(review[\"helpful_total\"])\n",
" Y.append(review[\"helpful_n\"])\n",
2023-11-28 23:58:51 +00:00
"\n",
2023-11-29 23:45:43 +00:00
"plt.scatter(X,Y,s=1,color=\"k\")\n",
"plt.axline((0, 0), slope=1, color=\"r\")\n",
"plt.ylabel(\"Num. Helpful Ratings\")\n",
"plt.xlabel(\"Num. Total Ratings\")\n",
"plt.title(\"Num. Helpful Ratings vs Num. Total Ratings\")"
2023-11-28 23:58:51 +00:00
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}