From e811171a73bc3ed72a0d0897c70edc2c3bb18cac Mon Sep 17 00:00:00 2001 From: krishpatel Date: Wed, 11 Jun 2025 11:10:49 -0700 Subject: [PATCH] Implemented working compressed tree parser to get classification accuracy --- .DS_Store | Bin 0 -> 8196 bytes CompressedTreeParser.ipynb | 190 +++++++++++++++++++++++++++++++++++++ TreeCompress.ipynb | 37 ++++---- TreeToRMT.ipynb | 4 +- 4 files changed, 209 insertions(+), 22 deletions(-) create mode 100644 .DS_Store create mode 100644 CompressedTreeParser.ipynb diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..475c46308b8b3447fe37e5ec4ac20716ba199d11 GIT binary patch literal 8196 zcmeHMyKWOf6g^{7*ou^dI7J$y{Q$5CNJw;>;1m=z!M#~O6pZ}B*@Q>6J^z42K~2k# z@G(?y?wygfv(AEO5D|B*nX{g~=gz%nW<4GofIGPy9RY^`nyi9*+ib2Wa$o93X@s8J zL`OdG829i1579xsVQn3zfGJ=Km;$DNDex~Sz&%?^vu5A-cda!AOo9JW0r`ALSOwF7 zg-8A9V53I>;ugEL@S5iUqfQ3h;(x*Q?xIlH`h~Ye)_Y*NErU46&o(|{faA9N@ zPAE=CCofTRxYVPyrhqB1seqi_cX5PAJh|VG&hHskJjb8o{nP1UIG8R5w3LVIE9<8p zT6hI)qtDk8?=VEh`VwR5iCOMDEkBl62ee#ZM&63{7krH|A}`nXV~+faeGANEKRD*2 zy;JhjBr78;BV$O$RczlluZ-I&*)KKXWz2AQ$@p9vVIN(_HpGP9ClV2Q>li1R_Z)-R zheU}^$;Sas@m6yag>riIF-z7tBd_!GrQ)Ye%Mm#lrkt-8^~;G8*9@2J6R!)b;%MBR zikFsJ4(Mk}Zbx#cFg{cmJIp%cC|Bh4!?-mwB#K8f)Ydr`kX^Se4UqX z6)!EEQNv>@_l($KzN1Ravck%m@2UUS)McVqGmbXr_>yrhvk~)krIh(CT1kYzj4@i% z`|FkQgxQwYcI^x|m}eCA?%GF>KF*kJkMY*64_RmBr>ad|Mdu5~-q+PFE9^HjCeLOx zV|1!Tw%UYMFb!CElrHJlU6^S5zX)Q*6u3bO-14y=%K87a{`dbIl#zvM3YY?as({<- zKJT88rMvnyY}w@#))%Z&#IEqD6gJTp0lr`VVaV%*rou@B79QEdVm}0^4Az(ee^r5> Db#NQh literal 0 HcmV?d00001 diff --git a/CompressedTreeParser.ipynb b/CompressedTreeParser.ipynb new file mode 100644 index 0000000..e72603c --- /dev/null +++ b/CompressedTreeParser.ipynb @@ -0,0 +1,190 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 17, + "id": "938dec51", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import argparse\n", + "from sklearn.tree import DecisionTreeClassifier, plot_tree, _tree\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.tree import export_graphviz\n", + "import pydotplus\n", + "from matplotlib import pyplot as plt\n", + "from labels import mac_to_label\n", + "import json\n", + "import math\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "442624c7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[6, 40234, 5228], [6, 40234, 5228], [6, 443, 46330], [6, 3063, 443], [1, 0, 0], [17, 61725, 53], [6, 5228, 40234], [6, 443, 3063], [0, 0, 0], [0, 0, 0], [6, 40234, 5228], [17, 4500, 45966], [17, 53, 61725], [1, 0, 0], [6, 46330, 443], [6, 443, 46330], [0, 0, 0], [1, 0, 0], [6, 3063, 443], [6, 443, 3063]]\n" + ] + } + ], + "source": [ + "Set1 = pd.read_csv('data.csv').values.tolist()\n", + "X = [i[0:3] for i in Set1]\n", + "Y =[i[3] for i in Set1]\n", + "\n", + "print(X[0:20])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "f18850b1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Amazon Echo', 'Belkin Motion Sensor', 'Belkin Switch', 'Blipcare BP Meter', 'Dropcam', 'HP Printer', 'NEST Smoke Sensor', 'Netatmo Camera', 'Netatmo Weather station', 'Pixstart photo frame', 'Samsung Smart Cam', 'Smart Things', 'TP-Link Camera', 'TP-Link Plug', 'Triby Speaker', 'Withings', 'Withings Scale', 'other']\n" + ] + } + ], + "source": [ + "classes = [\n", + " \"Amazon Echo\",\n", + " \"Belkin Motion Sensor\",\n", + " \"Belkin Switch\",\n", + " \"Blipcare BP Meter\",\n", + " \"Dropcam\",\n", + " \"HP Printer\",\n", + " \"NEST Smoke Sensor\",\n", + " \"Netatmo Camera\",\n", + " \"Netatmo Weather station\",\n", + " \"Pixstart photo frame\",\n", + " \"Samsung Smart Cam\",\n", + " \"Smart Things\",\n", + " \"TP-Link Camera\",\n", + " \"TP-Link Plug\",\n", + " \"Triby Speaker\",\n", + " \"Withings\",\n", + " \"Withings Scale\",\n", + " \"other\"\n", + " ]\n", + "\n", + "print(classes)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12ad454d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1, 11, 17], [1, 11, 17], [1, 4, 11, 15, 17], [17], [8, 17], [], [1, 11, 17, 15], [11], [8, 17], [8, 17], [1, 11, 17], [], [], [8, 17], [17]]\n", + "['other', 'other', 'Dropcam', 'other', 'Netatmo Camera', 'other', 'Triby Speaker', 'Smart Things', 'other', 'Belkin Switch', 'other', 'Netatmo Camera', 'Netatmo Camera', 'other', 'other']\n", + "947072\n", + "947072\n", + "The accuracy was: 0.0\n" + ] + } + ], + "source": [ + "predict_Yt = []\n", + "with open('compressed_tree.json', 'r') as file:\n", + " data = json.load(file)\n", + " #print(data['layers']['dst'])\n", + " for x in X:\n", + " counter = 0\n", + " class_set = []\n", + " for feature in reversed(data['layers']): #Have to reverse this list due to structure of the data.csv file and how it aligns with the compressed tree layers\n", + " #print(\"The feature is: \", feature)\n", + "\n", + " for node in data['layers'][feature]:\n", + " #print(\"The node is: \", node)\n", + " #print(f\"The min is: {node['min']}, the max is {node['max']}\")\n", + " if node['min'] is None:\n", + " if x[counter] < node['max']:\n", + " class_set.append(node['classes'])\n", + " break #is this an issue?\n", + " else:\n", + " continue\n", + " elif node['max'] is None:\n", + " if node['min'] < x[counter]:\n", + " class_set.append(node['classes'])\n", + " break #is this an issue?\n", + " else:\n", + " continue\n", + " elif node['min'] < x[counter] and x[counter] < node['max']:\n", + " class_set.append(node['classes'])\n", + " break #is this an issue?\n", + "\n", + " counter += 1\n", + " #print(\"The list of classes is: \", class_set)\n", + " result = set(class_set[0])\n", + " for s in class_set[1:]:\n", + " result.intersection_update(s)\n", + "\n", + " #print(\"The result was: \", result)\n", + " predict_Yt.append(list(result))\n", + " #print(predict_Yt)\n", + "\n", + " \n", + "\n", + "print(predict_Yt[0:15])\n", + "print(Y[0:15])\n", + "\n", + "\n", + "print(len(predict_Yt))\n", + "print(len(Y))\n", + "\n", + "\n", + "counter = 0\n", + "for index in range(len(Y)):\n", + " for possible_class_index in predict_Yt[index]:\n", + " if Y[index] == classes[possible_class_index]:\n", + " counter += 1\n", + "\n", + "\n", + "print(\"The accuracy was: \", counter / len(Y))\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/TreeCompress.ipynb b/TreeCompress.ipynb index 59208a4..11ae0fc 100644 --- a/TreeCompress.ipynb +++ b/TreeCompress.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 9, "id": "ec310f34", "metadata": {}, "outputs": [], @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 10, "id": "5b54797e", "metadata": {}, "outputs": [], @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 11, "id": "a38fdb8a", "metadata": {}, "outputs": [], @@ -38,14 +38,14 @@ "i = 0\n", "\n", "path_ids = set()\n", - "path_classes = tree[\"classes\"]\n", + "path_classes = set()\n", "\n", "# for each path in the tree\n", "for path in paths:\n", "\t# assign a path id \n", "\tpath[\"id\"] = i\n", "\tpath_ids.add(i)\n", - "\t#path_classes.add(path[\"classification\"])\n", + "\tpath_classes.add(path[\"classification\"])\n", "\ti += 1\t\n", "\t# for each condition\n", "\tconditions = path[\"conditions\"]\n", @@ -60,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 12, "id": "2fd4f738", "metadata": {}, "outputs": [], @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 13, "id": "98cde024", "metadata": {}, "outputs": [], @@ -123,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 14, "id": "b6fbadbf", "metadata": {}, "outputs": [], @@ -171,19 +171,16 @@ "\t\t# for each bucket which encompases the condition's range, add this path's id to the sets \n", "\t\ti = 0\n", "\t\tfor bp in breakpoints[feature_name]:\n", - "\t\t\tif is_in_range(bp, lower, upper):\n", + "\t\t\tin_range = is_in_range(bp, lower, upper)\n", + "\t\t\tif in_range:\n", "\t\t\t\tbuckets_id[feature_name][i].add(ID)\n", "\t\t\t\tbuckets_class[feature_name][i].add(Class)\n", - "\t\t\ti += 1\n", - "\n", - "\t\tif is_in_range(bp+1, lower, upper):\n", - "\t\t\tbuckets_id[feature_name][i].add(ID)\n", - "\t\t\tbuckets_class[feature_name][i].add(Class)" + "\t\t\ti += 1" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 15, "id": "0a767971", "metadata": {}, "outputs": [], @@ -201,13 +198,13 @@ "\tfor i in range(1, len(buckets_id[feature_name]) - 1):\n", "\t\tlower = breakpoints[feature_name][i-1]\n", "\t\tupper = breakpoints[feature_name][i]\n", - "\t\tpaths = buckets_id[feature_name][i]\n", + "\t\tmembers = buckets_id[feature_name][i]\n", "\t\tclasses = buckets_class[feature_name][i]\n", "\t\t#print(f\"{feature_name} = [{lower}, {upper}]: {buckets[feature_name][i]}\")\n", "\t\tcompressed_layers[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n", "\tlower = breakpoints[feature_name][len(breakpoints[feature_name]) - 1]\n", "\tupper = None\n", - "\tpaths = buckets_id[feature_name][len(buckets_id[feature_name]) - 1]\n", + "\tmembers = buckets_id[feature_name][len(buckets_id[feature_name]) - 1]\n", "\tclasses = buckets_class[feature_name][len(buckets_class[feature_name]) - 1]\n", "\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n", "\tcompressed_layers[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n", @@ -222,7 +219,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 16, "id": "561b0bc1", "metadata": {}, "outputs": [], @@ -241,7 +238,7 @@ ], "metadata": { "kernelspec": { - "display_name": "switch", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -255,7 +252,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.7" + "version": "3.10.6" } }, "nbformat": 4, diff --git a/TreeToRMT.ipynb b/TreeToRMT.ipynb index d0def9e..8c8d546 100644 --- a/TreeToRMT.ipynb +++ b/TreeToRMT.ipynb @@ -382,7 +382,7 @@ ], "metadata": { "kernelspec": { - "display_name": "switch", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -396,7 +396,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.7" + "version": "3.10.6" } }, "nbformat": 4,