{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "938dec51", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import argparse\n", "from sklearn.tree import DecisionTreeClassifier, plot_tree, _tree\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.tree import export_graphviz\n", "import pydotplus\n", "from matplotlib import pyplot as plt\n", "from labels import mac_to_label\n", "import json\n", "import math" ] }, { "cell_type": "code", "execution_count": 2, "id": "442624c7", "metadata": {}, "outputs": [], "source": [ "Set1 = pd.read_csv('data.csv').values.tolist()\n", "X = [i[0:3] for i in Set1]\n", "Y =[i[3] for i in Set1]" ] }, { "cell_type": "code", "execution_count": 3, "id": "12ad454d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "here1\n", " protocl src dst classfication\n", "0 6 40234 5228 other\n", "1 6 40234 5228 other\n", "2 6 443 46330 Dropcam\n", "3 6 3063 443 other\n", "4 1 0 0 Netatmo Camera\n", "... ... ... ... ...\n", "2419339 6 443 47940 Dropcam\n", "2419340 6 47940 443 other\n", "2419341 6 443 47940 Dropcam\n", "2419342 0 0 0 iHome PowerPlug\n", "2419343 0 0 0 other\n", "\n", "[2419344 rows x 4 columns]\n", "{8, 20}\n", "{13}\n", "[6, 40234, 5228]\n", "other\n" ] } ], "source": [ "predict_Yt = []\n", "index=0\n", "with open('compressed_tree.json', 'r') as file:\n", " data = json.load(file)\n", " classes = data[\"classes\"]\n", " for x in X:\n", " counter = 0\n", " class_set = []\n", " paths_set = []\n", " for feature in reversed(data['layers']): #Have to reverse this list due to structure of the data.csv file and how it aligns with the compressed tree layers\n", " for node in data['layers'][feature]:\n", " if node['min'] is None:\n", " if x[counter] <= node['max']:\n", " class_set.append(node['classes'])\n", " paths_set.append(node[\"paths\"])\n", " break #is this an issue?\n", " else:\n", " continue\n", " elif node['max'] is None:\n", " if node['min'] < x[counter]:\n", " class_set.append(node['classes'])\n", " paths_set.append(node[\"paths\"])\n", " break #is this an issue?\n", " else:\n", " continue\n", " elif node['min'] < x[counter] and x[counter] <= node['max']:\n", " class_set.append(node['classes'])\n", " paths_set.append(node[\"paths\"])\n", " break #is this an issue?\n", "\n", " counter += 1\n", " result = set(class_set[0])\n", " paths = set(paths_set[0])\n", " for s in class_set[1:]:\n", " result.intersection_update(s)\n", " for s in paths_set[1:]:\n", " paths.intersection_update(s)\n", "\n", " #predict_Yt.append(list(result))\n", " #print(result)\n", " if len(result) == 1:\n", " prediction = list(result)[0]\n", " pred_class = classes[prediction]\n", " predict_Yt.append(pred_class)\n", " elif len(paths) == 1:\n", " print(\"here1\")\n", " print(pd.read_csv('data.csv'))\n", " print(result)\n", " print(paths)\n", " print(x)\n", " print(Y[index])\n", " break\n", " predict_Yt.append(None)\n", " else:\n", " print(\"here2\")\n", " print(pd.read_csv('data.csv'))\n", " print(result)\n", " print(paths)\n", " print(x)\n", " print(Y[index])\n", " break\n", " predict_Yt.append(None)\n", " \n", " index += 1" ] }, { "cell_type": "code", "execution_count": 4, "id": "8b4c56b6", "metadata": {}, "outputs": [ { "ename": "IndexError", "evalue": "list index out of range", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mIndexError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[32m 1\u001b[39m correct = \u001b[32m0\u001b[39m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mlen\u001b[39m(Y)):\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m prediction = \u001b[43mpredict_Yt\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m\n\u001b[32m 4\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m prediction != \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m Y[i] == prediction:\n\u001b[32m 5\u001b[39m correct += \u001b[32m1\u001b[39m\n", "\u001b[31mIndexError\u001b[39m: list index out of range" ] } ], "source": [ "correct = 0\n", "for i in range(len(Y)):\n", " prediction = predict_Yt[i]\n", " if prediction != None and Y[i] == prediction:\n", " correct += 1\n", "\n", "print(correct / len(Y))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 5 }