diff --git a/CompressedTreeParser.ipynb b/CompressedTreeParser.ipynb index 6435d62..3a84036 100644 --- a/CompressedTreeParser.ipynb +++ b/CompressedTreeParser.ipynb @@ -37,37 +37,71 @@ "execution_count": 3, "id": "12ad454d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "here1\n", + " protocl src dst classfication\n", + "0 6 40234 5228 other\n", + "1 6 40234 5228 other\n", + "2 6 443 46330 Dropcam\n", + "3 6 3063 443 other\n", + "4 1 0 0 Netatmo Camera\n", + "... ... ... ... ...\n", + "2419339 6 443 47940 Dropcam\n", + "2419340 6 47940 443 other\n", + "2419341 6 443 47940 Dropcam\n", + "2419342 0 0 0 iHome PowerPlug\n", + "2419343 0 0 0 other\n", + "\n", + "[2419344 rows x 4 columns]\n", + "{8, 20}\n", + "{13}\n", + "[6, 40234, 5228]\n", + "other\n" + ] + } + ], "source": [ "predict_Yt = []\n", + "index=0\n", "with open('compressed_tree.json', 'r') as file:\n", " data = json.load(file)\n", " classes = data[\"classes\"]\n", " for x in X:\n", " counter = 0\n", " class_set = []\n", + " paths_set = []\n", " for feature in reversed(data['layers']): #Have to reverse this list due to structure of the data.csv file and how it aligns with the compressed tree layers\n", " for node in data['layers'][feature]:\n", " if node['min'] is None:\n", - " if x[counter] < node['max']:\n", + " if x[counter] <= node['max']:\n", " class_set.append(node['classes'])\n", + " paths_set.append(node[\"paths\"])\n", " break #is this an issue?\n", " else:\n", " continue\n", " elif node['max'] is None:\n", " if node['min'] < x[counter]:\n", " class_set.append(node['classes'])\n", + " paths_set.append(node[\"paths\"])\n", " break #is this an issue?\n", " else:\n", " continue\n", - " elif node['min'] < x[counter] and x[counter] < node['max']:\n", + " elif node['min'] < x[counter] and x[counter] <= node['max']:\n", " class_set.append(node['classes'])\n", + " paths_set.append(node[\"paths\"])\n", " break #is this an issue?\n", "\n", " counter += 1\n", " result = set(class_set[0])\n", + " paths = set(paths_set[0])\n", " for s in class_set[1:]:\n", " result.intersection_update(s)\n", + " for s in paths_set[1:]:\n", + " paths.intersection_update(s)\n", "\n", " #predict_Yt.append(list(result))\n", " #print(result)\n", @@ -75,8 +109,26 @@ " prediction = list(result)[0]\n", " pred_class = classes[prediction]\n", " predict_Yt.append(pred_class)\n", + " elif len(paths) == 1:\n", + " print(\"here1\")\n", + " print(pd.read_csv('data.csv'))\n", + " print(result)\n", + " print(paths)\n", + " print(x)\n", + " print(Y[index])\n", + " break\n", + " predict_Yt.append(None)\n", " else:\n", - " predict_Yt.append(None)" + " print(\"here2\")\n", + " print(pd.read_csv('data.csv'))\n", + " print(result)\n", + " print(paths)\n", + " print(x)\n", + " print(Y[index])\n", + " break\n", + " predict_Yt.append(None)\n", + " \n", + " index += 1" ] }, { @@ -86,10 +138,14 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.8448217242194891\n" + "ename": "IndexError", + "evalue": "list index out of range", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mIndexError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[32m 1\u001b[39m correct = \u001b[32m0\u001b[39m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mlen\u001b[39m(Y)):\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m prediction = \u001b[43mpredict_Yt\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m\n\u001b[32m 4\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m prediction != \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m Y[i] == prediction:\n\u001b[32m 5\u001b[39m correct += \u001b[32m1\u001b[39m\n", + "\u001b[31mIndexError\u001b[39m: list index out of range" ] } ],