Fixed loop ordering and path_to_class in JSON

This commit is contained in:
Jai Parera
2025-06-11 23:37:33 -07:00
parent 8301998da3
commit 1585399c7d
3 changed files with 111 additions and 51 deletions

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 138,
"id": "938dec51",
"metadata": {},
"outputs": [],
@@ -22,31 +22,34 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 139,
"id": "442624c7",
"metadata": {},
"outputs": [],
"source": [
"Set1 = pd.read_csv('data.csv').values.tolist()\n",
"Set1 = pd.read_csv('data/combined/data.csv').values.tolist()\n",
"X = [i[0:3] for i in Set1]\n",
"Y =[i[3] for i in Set1]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 142,
"id": "12ad454d",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'0': 20, '1': 20, '2': 9, '3': 20, '4': 0, '5': 13, '6': 20, '7': 0, '8': 12, '9': 4, '10': 20, '11': 4, '12': 1, '13': 16, '14': 20, '15': 2, '16': 20, '17': 0, '18': 20, '19': 20, '20': 20, '21': 20, '22': 20, '23': 1, '24': 2, '25': 20, '26': 13, '27': 11, '28': 20, '29': 20}\n"
]
}
],
"source": [
"predict_Yt = []\n",
"index=0\n",
"original_tree = open('tree.json', 'r')\n",
"original_tree = json.load(original_tree)\n",
"path_to_class = {}\n",
"for i in range(len(original_tree[\"paths\"])):\n",
" path = original_tree[\"paths\"][i]\n",
" path_to_class[path[\"id\"]] = path[\"classification\"]\n",
"\n",
"with open('compressed_tree.json', 'r') as file:\n",
" data = json.load(file)\n",
" classes = data[\"classes\"]\n",
@@ -54,26 +57,28 @@
" counter = 0\n",
" class_set = []\n",
" paths_set = []\n",
" for feature in reversed(data['layers']): #Have to reverse this list due to structure of the data.csv file and how it aligns with the compressed tree layers\n",
" for node in data['layers'][feature]:\n",
" if node['min'] is None:\n",
" if x[counter] <= node['max']:\n",
" features = [\"protocol\", \"src\", \"dst\"]\n",
" for feature in features:\n",
" if feature in data[\"layers\"]:\n",
" for node in data['layers'][feature]:\n",
" if node['min'] is None:\n",
" if x[counter] <= node['max']:\n",
" class_set.append(node['classes'])\n",
" paths_set.append(node[\"paths\"])\n",
" break #is this an issue?\n",
" else:\n",
" continue\n",
" elif node['max'] is None:\n",
" if node['min'] < x[counter]:\n",
" class_set.append(node['classes'])\n",
" paths_set.append(node[\"paths\"])\n",
" break #is this an issue?\n",
" else:\n",
" continue\n",
" elif node['min'] < x[counter] and x[counter] <= node['max']:\n",
" class_set.append(node['classes'])\n",
" paths_set.append(node[\"paths\"])\n",
" break #is this an issue?\n",
" else:\n",
" continue\n",
" elif node['max'] is None:\n",
" if node['min'] < x[counter]:\n",
" class_set.append(node['classes'])\n",
" paths_set.append(node[\"paths\"])\n",
" break #is this an issue?\n",
" else:\n",
" continue\n",
" elif node['min'] < x[counter] and x[counter] <= node['max']:\n",
" class_set.append(node['classes'])\n",
" paths_set.append(node[\"paths\"])\n",
" break #is this an issue?\n",
"\n",
" counter += 1\n",
" result = set(class_set[0])\n",
@@ -85,9 +90,13 @@
"\n",
" #predict_Yt.append(list(result))\n",
" #print(result)\n",
" if len(paths) != 1:\n",
" print(paths)\n",
" print(x)\n",
" print(result)\n",
" assert len(paths) == 1\n",
" path = list(paths)[0]\n",
" pred = path_to_class[path]\n",
" pred = data[\"path_to_class\"][str(path)]\n",
" pred_class = classes[pred]\n",
" predict_Yt.append(pred_class)\n",
" \n",
@@ -96,7 +105,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 143,
"id": "8b4c56b6",
"metadata": {},
"outputs": [
@@ -104,7 +113,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"0.8451332670948943\n"
"0.8410252791654538\n"
]
}
],