IdealRMT-DecisionTrees/CompressedTreeParser.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "938dec51",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import argparse\n",
    "from sklearn.tree import DecisionTreeClassifier, plot_tree, _tree\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.tree import export_graphviz\n",
    "import pydotplus\n",
    "from matplotlib import pyplot as plt\n",
    "from labels import mac_to_label\n",
    "import json\n",
    "import math"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "442624c7",
   "metadata": {},
   "outputs": [],
   "source": [
    "Set1 = pd.read_csv('data.csv').values.tolist()\n",
    "X = [i[0:3] for i in Set1]\n",
    "Y =[i[3] for i in Set1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "12ad454d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "here1\n",
      "         protocl    src    dst    classfication\n",
      "0              6  40234   5228            other\n",
      "1              6  40234   5228            other\n",
      "2              6    443  46330          Dropcam\n",
      "3              6   3063    443            other\n",
      "4              1      0      0   Netatmo Camera\n",
      "...          ...    ...    ...              ...\n",
      "2419339        6    443  47940          Dropcam\n",
      "2419340        6  47940    443            other\n",
      "2419341        6    443  47940          Dropcam\n",
      "2419342        0      0      0  iHome PowerPlug\n",
      "2419343        0      0      0            other\n",
      "\n",
      "[2419344 rows x 4 columns]\n",
      "{8, 20}\n",
      "{13}\n",
      "[6, 40234, 5228]\n",
      "other\n"
     ]
    }
   ],
   "source": [
    "predict_Yt = []\n",
    "index=0\n",
    "with open('compressed_tree.json', 'r') as file:\n",
    "    data = json.load(file)\n",
    "    classes = data[\"classes\"]\n",
    "    for x in X:\n",
    "        counter = 0\n",
    "        class_set = []\n",
    "        paths_set = []\n",
    "        for feature in reversed(data['layers']): #Have to reverse this list due to structure of the data.csv file and how it aligns with the compressed tree layers\n",
    "            for node in data['layers'][feature]:\n",
    "                if node['min'] is None:\n",
    "                    if x[counter] <= node['max']:\n",
    "                        class_set.append(node['classes'])\n",
    "                        paths_set.append(node[\"paths\"])\n",
    "                        break #is this an issue?\n",
    "                    else:\n",
    "                        continue\n",
    "                elif node['max'] is None:\n",
    "                    if node['min'] < x[counter]:\n",
    "                        class_set.append(node['classes'])\n",
    "                        paths_set.append(node[\"paths\"])\n",
    "                        break #is this an issue?\n",
    "                    else:\n",
    "                        continue\n",
    "                elif node['min'] < x[counter] and x[counter] <= node['max']:\n",
    "                    class_set.append(node['classes'])\n",
    "                    paths_set.append(node[\"paths\"])\n",
    "                    break #is this an issue?\n",
    "\n",
    "            counter += 1\n",
    "        result = set(class_set[0])\n",
    "        paths = set(paths_set[0])\n",
    "        for s in class_set[1:]:\n",
    "            result.intersection_update(s)\n",
    "        for s in paths_set[1:]:\n",
    "            paths.intersection_update(s)\n",
    "\n",
    "        #predict_Yt.append(list(result))\n",
    "        #print(result)\n",
    "        if len(result) == 1:\n",
    "            prediction = list(result)[0]\n",
    "            pred_class = classes[prediction]\n",
    "            predict_Yt.append(pred_class)\n",
    "        elif len(paths) == 1:\n",
    "            print(\"here1\")\n",
    "            print(pd.read_csv('data.csv'))\n",
    "            print(result)\n",
    "            print(paths)\n",
    "            print(x)\n",
    "            print(Y[index])\n",
    "            break\n",
    "            predict_Yt.append(None)\n",
    "        else:\n",
    "            print(\"here2\")\n",
    "            print(pd.read_csv('data.csv'))\n",
    "            print(result)\n",
    "            print(paths)\n",
    "            print(x)\n",
    "            print(Y[index])\n",
    "            break\n",
    "            predict_Yt.append(None)\n",
    "        \n",
    "        index += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "8b4c56b6",
   "metadata": {},
   "outputs": [
    {
     "ename": "IndexError",
     "evalue": "list index out of range",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mIndexError\u001b[39m                                Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[32m      1\u001b[39m correct = \u001b[32m0\u001b[39m\n\u001b[32m      2\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mlen\u001b[39m(Y)):\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m     prediction = \u001b[43mpredict_Yt\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m\n\u001b[32m      4\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m prediction != \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m Y[i] == prediction:\n\u001b[32m      5\u001b[39m         correct += \u001b[32m1\u001b[39m\n",
      "\u001b[31mIndexError\u001b[39m: list index out of range"
     ]
    }
   ],
   "source": [
    "correct = 0\n",
    "for i in range(len(Y)):\n",
    "    prediction = predict_Yt[i]\n",
    "    if prediction != None and Y[i] == prediction:\n",
    "        correct += 1\n",
    "\n",
    "print(correct / len(Y))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}