diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..475c463 Binary files /dev/null and b/.DS_Store differ diff --git a/CompressedTreeParser.ipynb b/CompressedTreeParser.ipynb new file mode 100644 index 0000000..e72603c --- /dev/null +++ b/CompressedTreeParser.ipynb @@ -0,0 +1,190 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 17, + "id": "938dec51", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import argparse\n", + "from sklearn.tree import DecisionTreeClassifier, plot_tree, _tree\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.tree import export_graphviz\n", + "import pydotplus\n", + "from matplotlib import pyplot as plt\n", + "from labels import mac_to_label\n", + "import json\n", + "import math\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "442624c7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[6, 40234, 5228], [6, 40234, 5228], [6, 443, 46330], [6, 3063, 443], [1, 0, 0], [17, 61725, 53], [6, 5228, 40234], [6, 443, 3063], [0, 0, 0], [0, 0, 0], [6, 40234, 5228], [17, 4500, 45966], [17, 53, 61725], [1, 0, 0], [6, 46330, 443], [6, 443, 46330], [0, 0, 0], [1, 0, 0], [6, 3063, 443], [6, 443, 3063]]\n" + ] + } + ], + "source": [ + "Set1 = pd.read_csv('data.csv').values.tolist()\n", + "X = [i[0:3] for i in Set1]\n", + "Y =[i[3] for i in Set1]\n", + "\n", + "print(X[0:20])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "f18850b1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Amazon Echo', 'Belkin Motion Sensor', 'Belkin Switch', 'Blipcare BP Meter', 'Dropcam', 'HP Printer', 'NEST Smoke Sensor', 'Netatmo Camera', 'Netatmo Weather station', 'Pixstart photo frame', 'Samsung Smart Cam', 'Smart Things', 'TP-Link Camera', 'TP-Link Plug', 'Triby Speaker', 'Withings', 'Withings Scale', 'other']\n" + ] + } + ], + "source": [ + "classes = [\n", + " \"Amazon Echo\",\n", + " \"Belkin Motion Sensor\",\n", + " \"Belkin Switch\",\n", + " \"Blipcare BP Meter\",\n", + " \"Dropcam\",\n", + " \"HP Printer\",\n", + " \"NEST Smoke Sensor\",\n", + " \"Netatmo Camera\",\n", + " \"Netatmo Weather station\",\n", + " \"Pixstart photo frame\",\n", + " \"Samsung Smart Cam\",\n", + " \"Smart Things\",\n", + " \"TP-Link Camera\",\n", + " \"TP-Link Plug\",\n", + " \"Triby Speaker\",\n", + " \"Withings\",\n", + " \"Withings Scale\",\n", + " \"other\"\n", + " ]\n", + "\n", + "print(classes)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12ad454d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1, 11, 17], [1, 11, 17], [1, 4, 11, 15, 17], [17], [8, 17], [], [1, 11, 17, 15], [11], [8, 17], [8, 17], [1, 11, 17], [], [], [8, 17], [17]]\n", + "['other', 'other', 'Dropcam', 'other', 'Netatmo Camera', 'other', 'Triby Speaker', 'Smart Things', 'other', 'Belkin Switch', 'other', 'Netatmo Camera', 'Netatmo Camera', 'other', 'other']\n", + "947072\n", + "947072\n", + "The accuracy was: 0.0\n" + ] + } + ], + "source": [ + "predict_Yt = []\n", + "with open('compressed_tree.json', 'r') as file:\n", + " data = json.load(file)\n", + " #print(data['layers']['dst'])\n", + " for x in X:\n", + " counter = 0\n", + " class_set = []\n", + " for feature in reversed(data['layers']): #Have to reverse this list due to structure of the data.csv file and how it aligns with the compressed tree layers\n", + " #print(\"The feature is: \", feature)\n", + "\n", + " for node in data['layers'][feature]:\n", + " #print(\"The node is: \", node)\n", + " #print(f\"The min is: {node['min']}, the max is {node['max']}\")\n", + " if node['min'] is None:\n", + " if x[counter] < node['max']:\n", + " class_set.append(node['classes'])\n", + " break #is this an issue?\n", + " else:\n", + " continue\n", + " elif node['max'] is None:\n", + " if node['min'] < x[counter]:\n", + " class_set.append(node['classes'])\n", + " break #is this an issue?\n", + " else:\n", + " continue\n", + " elif node['min'] < x[counter] and x[counter] < node['max']:\n", + " class_set.append(node['classes'])\n", + " break #is this an issue?\n", + "\n", + " counter += 1\n", + " #print(\"The list of classes is: \", class_set)\n", + " result = set(class_set[0])\n", + " for s in class_set[1:]:\n", + " result.intersection_update(s)\n", + "\n", + " #print(\"The result was: \", result)\n", + " predict_Yt.append(list(result))\n", + " #print(predict_Yt)\n", + "\n", + " \n", + "\n", + "print(predict_Yt[0:15])\n", + "print(Y[0:15])\n", + "\n", + "\n", + "print(len(predict_Yt))\n", + "print(len(Y))\n", + "\n", + "\n", + "counter = 0\n", + "for index in range(len(Y)):\n", + " for possible_class_index in predict_Yt[index]:\n", + " if Y[index] == classes[possible_class_index]:\n", + " counter += 1\n", + "\n", + "\n", + "print(\"The accuracy was: \", counter / len(Y))\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/TreeCompress.ipynb b/TreeCompress.ipynb index 59208a4..11ae0fc 100644 --- a/TreeCompress.ipynb +++ b/TreeCompress.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 9, "id": "ec310f34", "metadata": {}, "outputs": [], @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 10, "id": "5b54797e", "metadata": {}, "outputs": [], @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 11, "id": "a38fdb8a", "metadata": {}, "outputs": [], @@ -38,14 +38,14 @@ "i = 0\n", "\n", "path_ids = set()\n", - "path_classes = tree[\"classes\"]\n", + "path_classes = set()\n", "\n", "# for each path in the tree\n", "for path in paths:\n", "\t# assign a path id \n", "\tpath[\"id\"] = i\n", "\tpath_ids.add(i)\n", - "\t#path_classes.add(path[\"classification\"])\n", + "\tpath_classes.add(path[\"classification\"])\n", "\ti += 1\t\n", "\t# for each condition\n", "\tconditions = path[\"conditions\"]\n", @@ -60,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 12, "id": "2fd4f738", "metadata": {}, "outputs": [], @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 13, "id": "98cde024", "metadata": {}, "outputs": [], @@ -123,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 14, "id": "b6fbadbf", "metadata": {}, "outputs": [], @@ -171,19 +171,16 @@ "\t\t# for each bucket which encompases the condition's range, add this path's id to the sets \n", "\t\ti = 0\n", "\t\tfor bp in breakpoints[feature_name]:\n", - "\t\t\tif is_in_range(bp, lower, upper):\n", + "\t\t\tin_range = is_in_range(bp, lower, upper)\n", + "\t\t\tif in_range:\n", "\t\t\t\tbuckets_id[feature_name][i].add(ID)\n", "\t\t\t\tbuckets_class[feature_name][i].add(Class)\n", - "\t\t\ti += 1\n", - "\n", - "\t\tif is_in_range(bp+1, lower, upper):\n", - "\t\t\tbuckets_id[feature_name][i].add(ID)\n", - "\t\t\tbuckets_class[feature_name][i].add(Class)" + "\t\t\ti += 1" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 15, "id": "0a767971", "metadata": {}, "outputs": [], @@ -201,13 +198,13 @@ "\tfor i in range(1, len(buckets_id[feature_name]) - 1):\n", "\t\tlower = breakpoints[feature_name][i-1]\n", "\t\tupper = breakpoints[feature_name][i]\n", - "\t\tpaths = buckets_id[feature_name][i]\n", + "\t\tmembers = buckets_id[feature_name][i]\n", "\t\tclasses = buckets_class[feature_name][i]\n", "\t\t#print(f\"{feature_name} = [{lower}, {upper}]: {buckets[feature_name][i]}\")\n", "\t\tcompressed_layers[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n", "\tlower = breakpoints[feature_name][len(breakpoints[feature_name]) - 1]\n", "\tupper = None\n", - "\tpaths = buckets_id[feature_name][len(buckets_id[feature_name]) - 1]\n", + "\tmembers = buckets_id[feature_name][len(buckets_id[feature_name]) - 1]\n", "\tclasses = buckets_class[feature_name][len(buckets_class[feature_name]) - 1]\n", "\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n", "\tcompressed_layers[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n", @@ -222,7 +219,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 16, "id": "561b0bc1", "metadata": {}, "outputs": [], @@ -241,7 +238,7 @@ ], "metadata": { "kernelspec": { - "display_name": "switch", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -255,7 +252,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.7" + "version": "3.10.6" } }, "nbformat": 4, diff --git a/TreeToRMT.ipynb b/TreeToRMT.ipynb index d0def9e..8c8d546 100644 --- a/TreeToRMT.ipynb +++ b/TreeToRMT.ipynb @@ -382,7 +382,7 @@ ], "metadata": { "kernelspec": { - "display_name": "switch", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -396,7 +396,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.7" + "version": "3.10.6" } }, "nbformat": 4,