From 8301998da3228f84a11a79fb8c786ab9fb8db428 Mon Sep 17 00:00:00 2001 From: Arthur Lu Date: Thu, 12 Jun 2025 06:10:39 +0000 Subject: [PATCH] temp fix for issue with metadata --- CompressedTreeParser.ipynb | 73 +++++++++----------------------------- 1 file changed, 16 insertions(+), 57 deletions(-) diff --git a/CompressedTreeParser.ipynb b/CompressedTreeParser.ipynb index 3a84036..3034946 100644 --- a/CompressedTreeParser.ipynb +++ b/CompressedTreeParser.ipynb @@ -37,36 +37,16 @@ "execution_count": 3, "id": "12ad454d", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "here1\n", - " protocl src dst classfication\n", - "0 6 40234 5228 other\n", - "1 6 40234 5228 other\n", - "2 6 443 46330 Dropcam\n", - "3 6 3063 443 other\n", - "4 1 0 0 Netatmo Camera\n", - "... ... ... ... ...\n", - "2419339 6 443 47940 Dropcam\n", - "2419340 6 47940 443 other\n", - "2419341 6 443 47940 Dropcam\n", - "2419342 0 0 0 iHome PowerPlug\n", - "2419343 0 0 0 other\n", - "\n", - "[2419344 rows x 4 columns]\n", - "{8, 20}\n", - "{13}\n", - "[6, 40234, 5228]\n", - "other\n" - ] - } - ], + "outputs": [], "source": [ "predict_Yt = []\n", "index=0\n", + "original_tree = open('tree.json', 'r')\n", + "original_tree = json.load(original_tree)\n", + "path_to_class = {}\n", + "for i in range(len(original_tree[\"paths\"])):\n", + " path = original_tree[\"paths\"][i]\n", + " path_to_class[path[\"id\"]] = path[\"classification\"]\n", "with open('compressed_tree.json', 'r') as file:\n", " data = json.load(file)\n", " classes = data[\"classes\"]\n", @@ -105,28 +85,11 @@ "\n", " #predict_Yt.append(list(result))\n", " #print(result)\n", - " if len(result) == 1:\n", - " prediction = list(result)[0]\n", - " pred_class = classes[prediction]\n", - " predict_Yt.append(pred_class)\n", - " elif len(paths) == 1:\n", - " print(\"here1\")\n", - " print(pd.read_csv('data.csv'))\n", - " print(result)\n", - " print(paths)\n", - " print(x)\n", - " print(Y[index])\n", - " break\n", - " predict_Yt.append(None)\n", - " else:\n", - " print(\"here2\")\n", - " print(pd.read_csv('data.csv'))\n", - " print(result)\n", - " print(paths)\n", - " print(x)\n", - " print(Y[index])\n", - " break\n", - " predict_Yt.append(None)\n", + " assert len(paths) == 1\n", + " path = list(paths)[0]\n", + " pred = path_to_class[path]\n", + " pred_class = classes[pred]\n", + " predict_Yt.append(pred_class)\n", " \n", " index += 1" ] @@ -138,14 +101,10 @@ "metadata": {}, "outputs": [ { - "ename": "IndexError", - "evalue": "list index out of range", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mIndexError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[32m 1\u001b[39m correct = \u001b[32m0\u001b[39m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mlen\u001b[39m(Y)):\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m prediction = \u001b[43mpredict_Yt\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m\n\u001b[32m 4\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m prediction != \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m Y[i] == prediction:\n\u001b[32m 5\u001b[39m correct += \u001b[32m1\u001b[39m\n", - "\u001b[31mIndexError\u001b[39m: list index out of range" + "name": "stdout", + "output_type": "stream", + "text": [ + "0.8451332670948943\n" ] } ],