mirror of
https://github.com/ltcptgeneral/IdealRMT-DecisionTrees.git
synced 2025-09-04 14:27:23 +00:00
Implemented working compressed tree parser to get classification accuracy
This commit is contained in:
190
CompressedTreeParser.ipynb
Normal file
190
CompressedTreeParser.ipynb
Normal file
@@ -0,0 +1,190 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "938dec51",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import argparse\n",
|
||||
"from sklearn.tree import DecisionTreeClassifier, plot_tree, _tree\n",
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"from sklearn.tree import export_graphviz\n",
|
||||
"import pydotplus\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"from labels import mac_to_label\n",
|
||||
"import json\n",
|
||||
"import math\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "442624c7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[6, 40234, 5228], [6, 40234, 5228], [6, 443, 46330], [6, 3063, 443], [1, 0, 0], [17, 61725, 53], [6, 5228, 40234], [6, 443, 3063], [0, 0, 0], [0, 0, 0], [6, 40234, 5228], [17, 4500, 45966], [17, 53, 61725], [1, 0, 0], [6, 46330, 443], [6, 443, 46330], [0, 0, 0], [1, 0, 0], [6, 3063, 443], [6, 443, 3063]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"Set1 = pd.read_csv('data.csv').values.tolist()\n",
|
||||
"X = [i[0:3] for i in Set1]\n",
|
||||
"Y =[i[3] for i in Set1]\n",
|
||||
"\n",
|
||||
"print(X[0:20])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "f18850b1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['Amazon Echo', 'Belkin Motion Sensor', 'Belkin Switch', 'Blipcare BP Meter', 'Dropcam', 'HP Printer', 'NEST Smoke Sensor', 'Netatmo Camera', 'Netatmo Weather station', 'Pixstart photo frame', 'Samsung Smart Cam', 'Smart Things', 'TP-Link Camera', 'TP-Link Plug', 'Triby Speaker', 'Withings', 'Withings Scale', 'other']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"classes = [\n",
|
||||
" \"Amazon Echo\",\n",
|
||||
" \"Belkin Motion Sensor\",\n",
|
||||
" \"Belkin Switch\",\n",
|
||||
" \"Blipcare BP Meter\",\n",
|
||||
" \"Dropcam\",\n",
|
||||
" \"HP Printer\",\n",
|
||||
" \"NEST Smoke Sensor\",\n",
|
||||
" \"Netatmo Camera\",\n",
|
||||
" \"Netatmo Weather station\",\n",
|
||||
" \"Pixstart photo frame\",\n",
|
||||
" \"Samsung Smart Cam\",\n",
|
||||
" \"Smart Things\",\n",
|
||||
" \"TP-Link Camera\",\n",
|
||||
" \"TP-Link Plug\",\n",
|
||||
" \"Triby Speaker\",\n",
|
||||
" \"Withings\",\n",
|
||||
" \"Withings Scale\",\n",
|
||||
" \"other\"\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"print(classes)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "12ad454d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[1, 11, 17], [1, 11, 17], [1, 4, 11, 15, 17], [17], [8, 17], [], [1, 11, 17, 15], [11], [8, 17], [8, 17], [1, 11, 17], [], [], [8, 17], [17]]\n",
|
||||
"['other', 'other', 'Dropcam', 'other', 'Netatmo Camera', 'other', 'Triby Speaker', 'Smart Things', 'other', 'Belkin Switch', 'other', 'Netatmo Camera', 'Netatmo Camera', 'other', 'other']\n",
|
||||
"947072\n",
|
||||
"947072\n",
|
||||
"The accuracy was: 0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"predict_Yt = []\n",
|
||||
"with open('compressed_tree.json', 'r') as file:\n",
|
||||
" data = json.load(file)\n",
|
||||
" #print(data['layers']['dst'])\n",
|
||||
" for x in X:\n",
|
||||
" counter = 0\n",
|
||||
" class_set = []\n",
|
||||
" for feature in reversed(data['layers']): #Have to reverse this list due to structure of the data.csv file and how it aligns with the compressed tree layers\n",
|
||||
" #print(\"The feature is: \", feature)\n",
|
||||
"\n",
|
||||
" for node in data['layers'][feature]:\n",
|
||||
" #print(\"The node is: \", node)\n",
|
||||
" #print(f\"The min is: {node['min']}, the max is {node['max']}\")\n",
|
||||
" if node['min'] is None:\n",
|
||||
" if x[counter] < node['max']:\n",
|
||||
" class_set.append(node['classes'])\n",
|
||||
" break #is this an issue?\n",
|
||||
" else:\n",
|
||||
" continue\n",
|
||||
" elif node['max'] is None:\n",
|
||||
" if node['min'] < x[counter]:\n",
|
||||
" class_set.append(node['classes'])\n",
|
||||
" break #is this an issue?\n",
|
||||
" else:\n",
|
||||
" continue\n",
|
||||
" elif node['min'] < x[counter] and x[counter] < node['max']:\n",
|
||||
" class_set.append(node['classes'])\n",
|
||||
" break #is this an issue?\n",
|
||||
"\n",
|
||||
" counter += 1\n",
|
||||
" #print(\"The list of classes is: \", class_set)\n",
|
||||
" result = set(class_set[0])\n",
|
||||
" for s in class_set[1:]:\n",
|
||||
" result.intersection_update(s)\n",
|
||||
"\n",
|
||||
" #print(\"The result was: \", result)\n",
|
||||
" predict_Yt.append(list(result))\n",
|
||||
" #print(predict_Yt)\n",
|
||||
"\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"print(predict_Yt[0:15])\n",
|
||||
"print(Y[0:15])\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(len(predict_Yt))\n",
|
||||
"print(len(Y))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"counter = 0\n",
|
||||
"for index in range(len(Y)):\n",
|
||||
" for possible_class_index in predict_Yt[index]:\n",
|
||||
" if Y[index] == classes[possible_class_index]:\n",
|
||||
" counter += 1\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(\"The accuracy was: \", counter / len(Y))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 9,
|
||||
"id": "ec310f34",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -14,7 +14,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 10,
|
||||
"id": "5b54797e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -28,7 +28,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 11,
|
||||
"id": "a38fdb8a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -38,14 +38,14 @@
|
||||
"i = 0\n",
|
||||
"\n",
|
||||
"path_ids = set()\n",
|
||||
"path_classes = tree[\"classes\"]\n",
|
||||
"path_classes = set()\n",
|
||||
"\n",
|
||||
"# for each path in the tree\n",
|
||||
"for path in paths:\n",
|
||||
"\t# assign a path id \n",
|
||||
"\tpath[\"id\"] = i\n",
|
||||
"\tpath_ids.add(i)\n",
|
||||
"\t#path_classes.add(path[\"classification\"])\n",
|
||||
"\tpath_classes.add(path[\"classification\"])\n",
|
||||
"\ti += 1\t\n",
|
||||
"\t# for each condition\n",
|
||||
"\tconditions = path[\"conditions\"]\n",
|
||||
@@ -60,7 +60,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 12,
|
||||
"id": "2fd4f738",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -83,7 +83,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 13,
|
||||
"id": "98cde024",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -123,7 +123,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 14,
|
||||
"id": "b6fbadbf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -171,19 +171,16 @@
|
||||
"\t\t# for each bucket which encompases the condition's range, add this path's id to the sets \n",
|
||||
"\t\ti = 0\n",
|
||||
"\t\tfor bp in breakpoints[feature_name]:\n",
|
||||
"\t\t\tif is_in_range(bp, lower, upper):\n",
|
||||
"\t\t\tin_range = is_in_range(bp, lower, upper)\n",
|
||||
"\t\t\tif in_range:\n",
|
||||
"\t\t\t\tbuckets_id[feature_name][i].add(ID)\n",
|
||||
"\t\t\t\tbuckets_class[feature_name][i].add(Class)\n",
|
||||
"\t\t\ti += 1\n",
|
||||
"\n",
|
||||
"\t\tif is_in_range(bp+1, lower, upper):\n",
|
||||
"\t\t\tbuckets_id[feature_name][i].add(ID)\n",
|
||||
"\t\t\tbuckets_class[feature_name][i].add(Class)"
|
||||
"\t\t\ti += 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 15,
|
||||
"id": "0a767971",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -201,13 +198,13 @@
|
||||
"\tfor i in range(1, len(buckets_id[feature_name]) - 1):\n",
|
||||
"\t\tlower = breakpoints[feature_name][i-1]\n",
|
||||
"\t\tupper = breakpoints[feature_name][i]\n",
|
||||
"\t\tpaths = buckets_id[feature_name][i]\n",
|
||||
"\t\tmembers = buckets_id[feature_name][i]\n",
|
||||
"\t\tclasses = buckets_class[feature_name][i]\n",
|
||||
"\t\t#print(f\"{feature_name} = [{lower}, {upper}]: {buckets[feature_name][i]}\")\n",
|
||||
"\t\tcompressed_layers[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
|
||||
"\tlower = breakpoints[feature_name][len(breakpoints[feature_name]) - 1]\n",
|
||||
"\tupper = None\n",
|
||||
"\tpaths = buckets_id[feature_name][len(buckets_id[feature_name]) - 1]\n",
|
||||
"\tmembers = buckets_id[feature_name][len(buckets_id[feature_name]) - 1]\n",
|
||||
"\tclasses = buckets_class[feature_name][len(buckets_class[feature_name]) - 1]\n",
|
||||
"\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n",
|
||||
"\tcompressed_layers[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
|
||||
@@ -222,7 +219,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 16,
|
||||
"id": "561b0bc1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -241,7 +238,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "switch",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -255,7 +252,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.7"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@@ -382,7 +382,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "switch",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -396,7 +396,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.7"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
Reference in New Issue
Block a user