fix range bug in TreeCompress, update parser to autofail non-singular classifications

2026-07-16 19:43:06 +00:00 · 2025-06-11 19:10:19 +00:00
parent 7bee40ecf9
commit 6de3807fe2
4 changed files with 49 additions and 109 deletions
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 1,
   "id": "938dec51",
   "metadata": {},
   "outputs": [],
@@ -17,102 +17,37 @@
    "from matplotlib import pyplot as plt\n",
    "from labels import mac_to_label\n",
    "import json\n",
-    "import math\n"
+    "import math"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 2,
   "id": "442624c7",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[[6, 40234, 5228], [6, 40234, 5228], [6, 443, 46330], [6, 3063, 443], [1, 0, 0], [17, 61725, 53], [6, 5228, 40234], [6, 443, 3063], [0, 0, 0], [0, 0, 0], [6, 40234, 5228], [17, 4500, 45966], [17, 53, 61725], [1, 0, 0], [6, 46330, 443], [6, 443, 46330], [0, 0, 0], [1, 0, 0], [6, 3063, 443], [6, 443, 3063]]\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "Set1 = pd.read_csv('data.csv').values.tolist()\n",
    "X = [i[0:3] for i in Set1]\n",
-    "Y =[i[3] for i in Set1]\n",
-    "\n",
-    "print(X[0:20])\n"
+    "Y =[i[3] for i in Set1]"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
-   "id": "f18850b1",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['Amazon Echo', 'Belkin Motion Sensor', 'Belkin Switch', 'Blipcare BP Meter', 'Dropcam', 'HP Printer', 'NEST Smoke Sensor', 'Netatmo Camera', 'Netatmo Weather station', 'Pixstart photo frame', 'Samsung Smart Cam', 'Smart Things', 'TP-Link Camera', 'TP-Link Plug', 'Triby Speaker', 'Withings', 'Withings Scale', 'other']\n"
-     ]
-    }
-   ],
-   "source": [
-    "classes  = [\n",
-    "        \"Amazon Echo\",\n",
-    "        \"Belkin Motion Sensor\",\n",
-    "        \"Belkin Switch\",\n",
-    "        \"Blipcare BP Meter\",\n",
-    "        \"Dropcam\",\n",
-    "        \"HP Printer\",\n",
-    "        \"NEST Smoke Sensor\",\n",
-    "        \"Netatmo Camera\",\n",
-    "        \"Netatmo Weather station\",\n",
-    "        \"Pixstart photo frame\",\n",
-    "        \"Samsung Smart Cam\",\n",
-    "        \"Smart Things\",\n",
-    "        \"TP-Link Camera\",\n",
-    "        \"TP-Link Plug\",\n",
-    "        \"Triby Speaker\",\n",
-    "        \"Withings\",\n",
-    "        \"Withings Scale\",\n",
-    "        \"other\"\n",
-    "    ]\n",
-    "\n",
-    "print(classes)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
   "id": "12ad454d",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[[1, 11, 17], [1, 11, 17], [1, 4, 11, 15, 17], [17], [8, 17], [], [1, 11, 17, 15], [11], [8, 17], [8, 17], [1, 11, 17], [], [], [8, 17], [17]]\n",
-      "['other', 'other', 'Dropcam', 'other', 'Netatmo Camera', 'other', 'Triby Speaker', 'Smart Things', 'other', 'Belkin Switch', 'other', 'Netatmo Camera', 'Netatmo Camera', 'other', 'other']\n",
-      "947072\n",
-      "947072\n",
-      "The accuracy was:  0.0\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "predict_Yt = []\n",
    "with open('compressed_tree.json', 'r') as file:\n",
    "    data = json.load(file)\n",
-    "    #print(data['layers']['dst'])\n",
+    "    classes = data[\"classes\"]\n",
    "    for x in X:\n",
    "        counter = 0\n",
    "        class_set = []\n",
    "        for feature in reversed(data['layers']): #Have to reverse this list due to structure of the data.csv file and how it aligns with the compressed tree layers\n",
-    "            #print(\"The feature is: \", feature)\n",
-    "\n",
    "            for node in data['layers'][feature]:\n",
-    "                #print(\"The node is: \", node)\n",
-    "                #print(f\"The min is: {node['min']}, the max is {node['max']}\")\n",
    "                if node['min'] is None:\n",
    "                    if x[counter] < node['max']:\n",
    "                        class_set.append(node['classes'])\n",
@@ -130,39 +65,42 @@
    "                    break #is this an issue?\n",
    "\n",
    "            counter += 1\n",
-    "        #print(\"The list of classes is: \", class_set)\n",
    "        result = set(class_set[0])\n",
    "        for s in class_set[1:]:\n",
    "            result.intersection_update(s)\n",
    "\n",
-    "        #print(\"The result was: \", result)\n",
-    "        predict_Yt.append(list(result))\n",
-    "        #print(predict_Yt)\n",
+    "        #predict_Yt.append(list(result))\n",
+    "        #print(result)\n",
+    "        if len(result) == 1:\n",
+    "            prediction = list(result)[0]\n",
+    "            pred_class = classes[prediction]\n",
+    "            predict_Yt.append(pred_class)\n",
+    "        else:\n",
+    "            predict_Yt.append(None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "8b4c56b6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.8448217242194891\n"
+     ]
+    }
+   ],
+   "source": [
+    "correct = 0\n",
+    "for i in range(len(Y)):\n",
+    "    prediction = predict_Yt[i]\n",
+    "    if prediction != None and Y[i] == prediction:\n",
+    "        correct += 1\n",
    "\n",
-    "        \n",
-    "\n",
-    "print(predict_Yt[0:15])\n",
-    "print(Y[0:15])\n",
-    "\n",
-    "\n",
-    "print(len(predict_Yt))\n",
-    "print(len(Y))\n",
-    "\n",
-    "\n",
-    "counter = 0\n",
-    "for index in range(len(Y)):\n",
-    "    for possible_class_index in predict_Yt[index]:\n",
-    "        if Y[index] == classes[possible_class_index]:\n",
-    "            counter += 1\n",
-    "\n",
-    "\n",
-    "print(\"The accuracy was: \", counter / len(Y))\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n"
+    "print(correct / len(Y))"
   ]
  }
 ],
@@ -182,7 +120,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.12.7"
  }
 },
 "nbformat": 4,
@@ -109,13 +109,13 @@
    "\t\tvalue = condition[\"value\"]\n",
    "\n",
    "\t\t# move the min/max for the corresponding feature in compressed\n",
-    "\t\tif operation == \"<=\" and compressed[feature][\"min\"] is None:\n",
+    "\t\tif operation == \"<=\" and compressed[feature][\"max\"] is None:\n",
    "\t\t\tcompressed[feature][\"max\"] = value\n",
-    "\t\telif operation == \">\" and compressed[feature][\"max\"] is None:\n",
+    "\t\telif operation == \">\" and compressed[feature][\"min\"] is None:\n",
    "\t\t\tcompressed[feature][\"min\"] = value\n",
-    "\t\telif operation == \"<=\" and value < compressed[feature][\"min\"]:\n",
+    "\t\telif operation == \"<=\" and value < compressed[feature][\"max\"]:\n",
    "\t\t\tcompressed[feature][\"max\"] = value\n",
-    "\t\telif operation == \">\" and value > compressed[feature][\"max\"]:\n",
+    "\t\telif operation == \">\" and value > compressed[feature][\"min\"]:\n",
    "\t\t\tcompressed[feature][\"min\"] = value\n",
    "\n",
    "\tpath[\"compressed\"] = compressed"
@@ -263,7 +263,7 @@
      "[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
      "id mapping: \n",
      "[['dst_range', 'dst_meta'], ['src_range', 'src_meta'], ['protocl_range', 'protocl_meta'], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]\n",
-      "TCAM bits: 3584\n",
+      "TCAM bits: 3520\n",
      "RAM bits:  522\n"
     ]
    }