From 8301998da3228f84a11a79fb8c786ab9fb8db428 Mon Sep 17 00:00:00 2001
From: Arthur Lu <root@tronnet.net>
Date: Thu, 12 Jun 2025 06:10:39 +0000
Subject: [PATCH] temp fix for issue with metadata

---
 CompressedTreeParser.ipynb | 73 +++++++++-----------------------------
 1 file changed, 16 insertions(+), 57 deletions(-)

diff --git a/CompressedTreeParser.ipynb b/CompressedTreeParser.ipynb
index 3a84036..3034946 100644
--- a/CompressedTreeParser.ipynb
+++ b/CompressedTreeParser.ipynb
@@ -37,36 +37,16 @@
    "execution_count": 3,
    "id": "12ad454d",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "here1\n",
-      "         protocl    src    dst    classfication\n",
-      "0              6  40234   5228            other\n",
-      "1              6  40234   5228            other\n",
-      "2              6    443  46330          Dropcam\n",
-      "3              6   3063    443            other\n",
-      "4              1      0      0   Netatmo Camera\n",
-      "...          ...    ...    ...              ...\n",
-      "2419339        6    443  47940          Dropcam\n",
-      "2419340        6  47940    443            other\n",
-      "2419341        6    443  47940          Dropcam\n",
-      "2419342        0      0      0  iHome PowerPlug\n",
-      "2419343        0      0      0            other\n",
-      "\n",
-      "[2419344 rows x 4 columns]\n",
-      "{8, 20}\n",
-      "{13}\n",
-      "[6, 40234, 5228]\n",
-      "other\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "predict_Yt = []\n",
     "index=0\n",
+    "original_tree = open('tree.json', 'r')\n",
+    "original_tree = json.load(original_tree)\n",
+    "path_to_class = {}\n",
+    "for i in range(len(original_tree[\"paths\"])):\n",
+    "    path = original_tree[\"paths\"][i]\n",
+    "    path_to_class[path[\"id\"]] = path[\"classification\"]\n",
     "with open('compressed_tree.json', 'r') as file:\n",
     "    data = json.load(file)\n",
     "    classes = data[\"classes\"]\n",
@@ -105,28 +85,11 @@
     "\n",
     "        #predict_Yt.append(list(result))\n",
     "        #print(result)\n",
-    "        if len(result) == 1:\n",
-    "            prediction = list(result)[0]\n",
-    "            pred_class = classes[prediction]\n",
-    "            predict_Yt.append(pred_class)\n",
-    "        elif len(paths) == 1:\n",
-    "            print(\"here1\")\n",
-    "            print(pd.read_csv('data.csv'))\n",
-    "            print(result)\n",
-    "            print(paths)\n",
-    "            print(x)\n",
-    "            print(Y[index])\n",
-    "            break\n",
-    "            predict_Yt.append(None)\n",
-    "        else:\n",
-    "            print(\"here2\")\n",
-    "            print(pd.read_csv('data.csv'))\n",
-    "            print(result)\n",
-    "            print(paths)\n",
-    "            print(x)\n",
-    "            print(Y[index])\n",
-    "            break\n",
-    "            predict_Yt.append(None)\n",
+    "        assert len(paths) == 1\n",
+    "        path = list(paths)[0]\n",
+    "        pred = path_to_class[path]\n",
+    "        pred_class = classes[pred]\n",
+    "        predict_Yt.append(pred_class)\n",
     "        \n",
     "        index += 1"
    ]
@@ -138,14 +101,10 @@
    "metadata": {},
    "outputs": [
     {
-     "ename": "IndexError",
-     "evalue": "list index out of range",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
-      "\u001b[31mIndexError\u001b[39m                                Traceback (most recent call last)",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[32m      1\u001b[39m correct = \u001b[32m0\u001b[39m\n\u001b[32m      2\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mlen\u001b[39m(Y)):\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m     prediction = \u001b[43mpredict_Yt\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m\n\u001b[32m      4\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m prediction != \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m Y[i] == prediction:\n\u001b[32m      5\u001b[39m         correct += \u001b[32m1\u001b[39m\n",
-      "\u001b[31mIndexError\u001b[39m: list index out of range"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.8451332670948943\n"
      ]
     }
    ],