mirror of
https://github.com/ltcptgeneral/IdealRMT-DecisionTrees.git
synced 2025-09-04 14:27:23 +00:00
185 lines
6.5 KiB
Plaintext
185 lines
6.5 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "938dec51",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"import argparse\n",
|
|
"from sklearn.tree import DecisionTreeClassifier, plot_tree, _tree\n",
|
|
"from sklearn.metrics import accuracy_score\n",
|
|
"from sklearn.tree import export_graphviz\n",
|
|
"import pydotplus\n",
|
|
"from matplotlib import pyplot as plt\n",
|
|
"from labels import mac_to_label\n",
|
|
"import json\n",
|
|
"import math"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "442624c7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"Set1 = pd.read_csv('data.csv').values.tolist()\n",
|
|
"X = [i[0:3] for i in Set1]\n",
|
|
"Y =[i[3] for i in Set1]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "12ad454d",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"here1\n",
|
|
" protocl src dst classfication\n",
|
|
"0 6 40234 5228 other\n",
|
|
"1 6 40234 5228 other\n",
|
|
"2 6 443 46330 Dropcam\n",
|
|
"3 6 3063 443 other\n",
|
|
"4 1 0 0 Netatmo Camera\n",
|
|
"... ... ... ... ...\n",
|
|
"2419339 6 443 47940 Dropcam\n",
|
|
"2419340 6 47940 443 other\n",
|
|
"2419341 6 443 47940 Dropcam\n",
|
|
"2419342 0 0 0 iHome PowerPlug\n",
|
|
"2419343 0 0 0 other\n",
|
|
"\n",
|
|
"[2419344 rows x 4 columns]\n",
|
|
"{8, 20}\n",
|
|
"{13}\n",
|
|
"[6, 40234, 5228]\n",
|
|
"other\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"predict_Yt = []\n",
|
|
"index=0\n",
|
|
"with open('compressed_tree.json', 'r') as file:\n",
|
|
" data = json.load(file)\n",
|
|
" classes = data[\"classes\"]\n",
|
|
" for x in X:\n",
|
|
" counter = 0\n",
|
|
" class_set = []\n",
|
|
" paths_set = []\n",
|
|
" for feature in reversed(data['layers']): #Have to reverse this list due to structure of the data.csv file and how it aligns with the compressed tree layers\n",
|
|
" for node in data['layers'][feature]:\n",
|
|
" if node['min'] is None:\n",
|
|
" if x[counter] <= node['max']:\n",
|
|
" class_set.append(node['classes'])\n",
|
|
" paths_set.append(node[\"paths\"])\n",
|
|
" break #is this an issue?\n",
|
|
" else:\n",
|
|
" continue\n",
|
|
" elif node['max'] is None:\n",
|
|
" if node['min'] < x[counter]:\n",
|
|
" class_set.append(node['classes'])\n",
|
|
" paths_set.append(node[\"paths\"])\n",
|
|
" break #is this an issue?\n",
|
|
" else:\n",
|
|
" continue\n",
|
|
" elif node['min'] < x[counter] and x[counter] <= node['max']:\n",
|
|
" class_set.append(node['classes'])\n",
|
|
" paths_set.append(node[\"paths\"])\n",
|
|
" break #is this an issue?\n",
|
|
"\n",
|
|
" counter += 1\n",
|
|
" result = set(class_set[0])\n",
|
|
" paths = set(paths_set[0])\n",
|
|
" for s in class_set[1:]:\n",
|
|
" result.intersection_update(s)\n",
|
|
" for s in paths_set[1:]:\n",
|
|
" paths.intersection_update(s)\n",
|
|
"\n",
|
|
" #predict_Yt.append(list(result))\n",
|
|
" #print(result)\n",
|
|
" if len(result) == 1:\n",
|
|
" prediction = list(result)[0]\n",
|
|
" pred_class = classes[prediction]\n",
|
|
" predict_Yt.append(pred_class)\n",
|
|
" elif len(paths) == 1:\n",
|
|
" print(\"here1\")\n",
|
|
" print(pd.read_csv('data.csv'))\n",
|
|
" print(result)\n",
|
|
" print(paths)\n",
|
|
" print(x)\n",
|
|
" print(Y[index])\n",
|
|
" break\n",
|
|
" predict_Yt.append(None)\n",
|
|
" else:\n",
|
|
" print(\"here2\")\n",
|
|
" print(pd.read_csv('data.csv'))\n",
|
|
" print(result)\n",
|
|
" print(paths)\n",
|
|
" print(x)\n",
|
|
" print(Y[index])\n",
|
|
" break\n",
|
|
" predict_Yt.append(None)\n",
|
|
" \n",
|
|
" index += 1"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "8b4c56b6",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"ename": "IndexError",
|
|
"evalue": "list index out of range",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
|
"\u001b[31mIndexError\u001b[39m Traceback (most recent call last)",
|
|
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[32m 1\u001b[39m correct = \u001b[32m0\u001b[39m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mlen\u001b[39m(Y)):\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m prediction = \u001b[43mpredict_Yt\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m\n\u001b[32m 4\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m prediction != \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m Y[i] == prediction:\n\u001b[32m 5\u001b[39m correct += \u001b[32m1\u001b[39m\n",
|
|
"\u001b[31mIndexError\u001b[39m: list index out of range"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"correct = 0\n",
|
|
"for i in range(len(Y)):\n",
|
|
" prediction = predict_Yt[i]\n",
|
|
" if prediction != None and Y[i] == prediction:\n",
|
|
" correct += 1\n",
|
|
"\n",
|
|
"print(correct / len(Y))"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|