From e811171a73bc3ed72a0d0897c70edc2c3bb18cac Mon Sep 17 00:00:00 2001
From: krishpatel <krishpatel@ucla.edu>
Date: Wed, 11 Jun 2025 11:10:49 -0700
Subject: [PATCH] Implemented working compressed tree parser to get
 classification accuracy

---
 .DS_Store                  | Bin 0 -> 8196 bytes
 CompressedTreeParser.ipynb | 190 +++++++++++++++++++++++++++++++++++++
 TreeCompress.ipynb         |  37 ++++----
 TreeToRMT.ipynb            |   4 +-
 4 files changed, 209 insertions(+), 22 deletions(-)
 create mode 100644 .DS_Store
 create mode 100644 CompressedTreeParser.ipynb

diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..475c46308b8b3447fe37e5ec4ac20716ba199d11
GIT binary patch
literal 8196
zcmeHMyKWOf6g^{7*ou^dI7J$y{Q$5CNJw;>;1m=z!M#~O6pZ}B*@Q>6J^z42K~2k#
z@G(?y?wygfv(AEO5D|B*nX{g~=gz%nW<4GofIGPy9RY^`nyi9*+ib2Wa$o93X@s8J
zL`OdG829i1579xsVQn3zfGJ=Km;$DNDex~Sz&%?^vu5A-cda!AOo9JW0r`ALSOwF7
zg-8A9V53I>;ugEL@S5iU<s<>qfQ3h;(x*Q?xIlH`h~Ye)_Y*NErU46&o(|{faA9N@
zPAE=CCofTRxYVPyrhqB1seqi_cX5PAJh|VG&hHskJjb8o{nP1UIG8R5w3LVIE9<8p
zT6hI)qtDk8?=VEh`VwR5iCOMDEkBl62ee#ZM&63{7krH|A}`nXV~+faeGANEKRD*2
zy;JhjBr78;BV$O$RczlluZ-I&*)KKXWz2AQ$@p9vVIN(_HpGP9ClV2Q>li1R_Z)-R
zheU}^$;Sas@m6yag>riIF-z7tBd_!GrQ)Ye%Mm#lrkt-8^~;G8*9@2J6R!)b;%MBR
zikFsJ4(Mk}Zbx#cFg{cmJIp%cC|Bh4!?-mwB#K8f)Ydr`<yQ2NbDi|*>kX^Se4UqX
z6)!EEQNv>@_l($KzN1Ravck%m@2UUS)McVqGmbXr_>yrhvk~)krIh(CT1kYzj4@i%
z`|FkQgxQwYcI^x|m}eCA?%GF>KF*kJkMY*64_RmBr>ad|Mdu5~-q+PFE9^HjCeLOx
zV|1!Tw%UYMFb!CElrHJlU6^S5zX)Q*6u3bO-14y=%K87a{`dbIl#zvM3YY?as({<-
zKJT88rMvnyY}w@#))%Z&#IEqD6gJTp0lr`VVaV%*rou@B79QEdVm}0^4Az(ee^r5>
Db#NQh

literal 0
HcmV?d00001

diff --git a/CompressedTreeParser.ipynb b/CompressedTreeParser.ipynb
new file mode 100644
index 0000000..e72603c
--- /dev/null
+++ b/CompressedTreeParser.ipynb
@@ -0,0 +1,190 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "938dec51",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import argparse\n",
+    "from sklearn.tree import DecisionTreeClassifier, plot_tree, _tree\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "from sklearn.tree import export_graphviz\n",
+    "import pydotplus\n",
+    "from matplotlib import pyplot as plt\n",
+    "from labels import mac_to_label\n",
+    "import json\n",
+    "import math\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "442624c7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[6, 40234, 5228], [6, 40234, 5228], [6, 443, 46330], [6, 3063, 443], [1, 0, 0], [17, 61725, 53], [6, 5228, 40234], [6, 443, 3063], [0, 0, 0], [0, 0, 0], [6, 40234, 5228], [17, 4500, 45966], [17, 53, 61725], [1, 0, 0], [6, 46330, 443], [6, 443, 46330], [0, 0, 0], [1, 0, 0], [6, 3063, 443], [6, 443, 3063]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "Set1 = pd.read_csv('data.csv').values.tolist()\n",
+    "X = [i[0:3] for i in Set1]\n",
+    "Y =[i[3] for i in Set1]\n",
+    "\n",
+    "print(X[0:20])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "f18850b1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['Amazon Echo', 'Belkin Motion Sensor', 'Belkin Switch', 'Blipcare BP Meter', 'Dropcam', 'HP Printer', 'NEST Smoke Sensor', 'Netatmo Camera', 'Netatmo Weather station', 'Pixstart photo frame', 'Samsung Smart Cam', 'Smart Things', 'TP-Link Camera', 'TP-Link Plug', 'Triby Speaker', 'Withings', 'Withings Scale', 'other']\n"
+     ]
+    }
+   ],
+   "source": [
+    "classes  = [\n",
+    "        \"Amazon Echo\",\n",
+    "        \"Belkin Motion Sensor\",\n",
+    "        \"Belkin Switch\",\n",
+    "        \"Blipcare BP Meter\",\n",
+    "        \"Dropcam\",\n",
+    "        \"HP Printer\",\n",
+    "        \"NEST Smoke Sensor\",\n",
+    "        \"Netatmo Camera\",\n",
+    "        \"Netatmo Weather station\",\n",
+    "        \"Pixstart photo frame\",\n",
+    "        \"Samsung Smart Cam\",\n",
+    "        \"Smart Things\",\n",
+    "        \"TP-Link Camera\",\n",
+    "        \"TP-Link Plug\",\n",
+    "        \"Triby Speaker\",\n",
+    "        \"Withings\",\n",
+    "        \"Withings Scale\",\n",
+    "        \"other\"\n",
+    "    ]\n",
+    "\n",
+    "print(classes)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12ad454d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[1, 11, 17], [1, 11, 17], [1, 4, 11, 15, 17], [17], [8, 17], [], [1, 11, 17, 15], [11], [8, 17], [8, 17], [1, 11, 17], [], [], [8, 17], [17]]\n",
+      "['other', 'other', 'Dropcam', 'other', 'Netatmo Camera', 'other', 'Triby Speaker', 'Smart Things', 'other', 'Belkin Switch', 'other', 'Netatmo Camera', 'Netatmo Camera', 'other', 'other']\n",
+      "947072\n",
+      "947072\n",
+      "The accuracy was:  0.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "predict_Yt = []\n",
+    "with open('compressed_tree.json', 'r') as file:\n",
+    "    data = json.load(file)\n",
+    "    #print(data['layers']['dst'])\n",
+    "    for x in X:\n",
+    "        counter = 0\n",
+    "        class_set = []\n",
+    "        for feature in reversed(data['layers']): #Have to reverse this list due to structure of the data.csv file and how it aligns with the compressed tree layers\n",
+    "            #print(\"The feature is: \", feature)\n",
+    "\n",
+    "            for node in data['layers'][feature]:\n",
+    "                #print(\"The node is: \", node)\n",
+    "                #print(f\"The min is: {node['min']}, the max is {node['max']}\")\n",
+    "                if node['min'] is None:\n",
+    "                    if x[counter] < node['max']:\n",
+    "                        class_set.append(node['classes'])\n",
+    "                        break #is this an issue?\n",
+    "                    else:\n",
+    "                        continue\n",
+    "                elif node['max'] is None:\n",
+    "                    if node['min'] < x[counter]:\n",
+    "                        class_set.append(node['classes'])\n",
+    "                        break #is this an issue?\n",
+    "                    else:\n",
+    "                        continue\n",
+    "                elif node['min'] < x[counter] and x[counter] < node['max']:\n",
+    "                    class_set.append(node['classes'])\n",
+    "                    break #is this an issue?\n",
+    "\n",
+    "            counter += 1\n",
+    "        #print(\"The list of classes is: \", class_set)\n",
+    "        result = set(class_set[0])\n",
+    "        for s in class_set[1:]:\n",
+    "            result.intersection_update(s)\n",
+    "\n",
+    "        #print(\"The result was: \", result)\n",
+    "        predict_Yt.append(list(result))\n",
+    "        #print(predict_Yt)\n",
+    "\n",
+    "        \n",
+    "\n",
+    "print(predict_Yt[0:15])\n",
+    "print(Y[0:15])\n",
+    "\n",
+    "\n",
+    "print(len(predict_Yt))\n",
+    "print(len(Y))\n",
+    "\n",
+    "\n",
+    "counter = 0\n",
+    "for index in range(len(Y)):\n",
+    "    for possible_class_index in predict_Yt[index]:\n",
+    "        if Y[index] == classes[possible_class_index]:\n",
+    "            counter += 1\n",
+    "\n",
+    "\n",
+    "print(\"The accuracy was: \", counter / len(Y))\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/TreeCompress.ipynb b/TreeCompress.ipynb
index 59208a4..11ae0fc 100644
--- a/TreeCompress.ipynb
+++ b/TreeCompress.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 9,
    "id": "ec310f34",
    "metadata": {},
    "outputs": [],
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 10,
    "id": "5b54797e",
    "metadata": {},
    "outputs": [],
@@ -28,7 +28,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 11,
    "id": "a38fdb8a",
    "metadata": {},
    "outputs": [],
@@ -38,14 +38,14 @@
     "i = 0\n",
     "\n",
     "path_ids = set()\n",
-    "path_classes = tree[\"classes\"]\n",
+    "path_classes = set()\n",
     "\n",
     "# for each path in the tree\n",
     "for path in paths:\n",
     "\t# assign a path id \n",
     "\tpath[\"id\"] = i\n",
     "\tpath_ids.add(i)\n",
-    "\t#path_classes.add(path[\"classification\"])\n",
+    "\tpath_classes.add(path[\"classification\"])\n",
     "\ti += 1\t\n",
     "\t# for each condition\n",
     "\tconditions = path[\"conditions\"]\n",
@@ -60,7 +60,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 12,
    "id": "2fd4f738",
    "metadata": {},
    "outputs": [],
@@ -83,7 +83,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 13,
    "id": "98cde024",
    "metadata": {},
    "outputs": [],
@@ -123,7 +123,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 14,
    "id": "b6fbadbf",
    "metadata": {},
    "outputs": [],
@@ -171,19 +171,16 @@
     "\t\t# for each bucket which encompases the condition's range, add this path's id to the sets \n",
     "\t\ti = 0\n",
     "\t\tfor bp in breakpoints[feature_name]:\n",
-    "\t\t\tif is_in_range(bp, lower, upper):\n",
+    "\t\t\tin_range = is_in_range(bp, lower, upper)\n",
+    "\t\t\tif in_range:\n",
     "\t\t\t\tbuckets_id[feature_name][i].add(ID)\n",
     "\t\t\t\tbuckets_class[feature_name][i].add(Class)\n",
-    "\t\t\ti += 1\n",
-    "\n",
-    "\t\tif is_in_range(bp+1, lower, upper):\n",
-    "\t\t\tbuckets_id[feature_name][i].add(ID)\n",
-    "\t\t\tbuckets_class[feature_name][i].add(Class)"
+    "\t\t\ti += 1"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 15,
    "id": "0a767971",
    "metadata": {},
    "outputs": [],
@@ -201,13 +198,13 @@
     "\tfor i in range(1, len(buckets_id[feature_name]) - 1):\n",
     "\t\tlower = breakpoints[feature_name][i-1]\n",
     "\t\tupper = breakpoints[feature_name][i]\n",
-    "\t\tpaths = buckets_id[feature_name][i]\n",
+    "\t\tmembers = buckets_id[feature_name][i]\n",
     "\t\tclasses = buckets_class[feature_name][i]\n",
     "\t\t#print(f\"{feature_name} = [{lower}, {upper}]: {buckets[feature_name][i]}\")\n",
     "\t\tcompressed_layers[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
     "\tlower = breakpoints[feature_name][len(breakpoints[feature_name]) - 1]\n",
     "\tupper = None\n",
-    "\tpaths = buckets_id[feature_name][len(buckets_id[feature_name]) - 1]\n",
+    "\tmembers = buckets_id[feature_name][len(buckets_id[feature_name]) - 1]\n",
     "\tclasses = buckets_class[feature_name][len(buckets_class[feature_name]) - 1]\n",
     "\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n",
     "\tcompressed_layers[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
@@ -222,7 +219,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 16,
    "id": "561b0bc1",
    "metadata": {},
    "outputs": [],
@@ -241,7 +238,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "switch",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -255,7 +252,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.7"
+   "version": "3.10.6"
   }
  },
  "nbformat": 4,
diff --git a/TreeToRMT.ipynb b/TreeToRMT.ipynb
index d0def9e..8c8d546 100644
--- a/TreeToRMT.ipynb
+++ b/TreeToRMT.ipynb
@@ -382,7 +382,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "switch",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -396,7 +396,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.7"
+   "version": "3.10.6"
   }
  },
  "nbformat": 4,