Fixed loop ordering and path_to_class in JSON

This commit is contained in:
Jai Parera
2025-06-11 23:37:33 -07:00
parent 8301998da3
commit 1585399c7d
3 changed files with 111 additions and 51 deletions

View File

@@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 138,
"id": "938dec51", "id": "938dec51",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -22,31 +22,34 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 139,
"id": "442624c7", "id": "442624c7",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"Set1 = pd.read_csv('data.csv').values.tolist()\n", "Set1 = pd.read_csv('data/combined/data.csv').values.tolist()\n",
"X = [i[0:3] for i in Set1]\n", "X = [i[0:3] for i in Set1]\n",
"Y =[i[3] for i in Set1]" "Y =[i[3] for i in Set1]"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 142,
"id": "12ad454d", "id": "12ad454d",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'0': 20, '1': 20, '2': 9, '3': 20, '4': 0, '5': 13, '6': 20, '7': 0, '8': 12, '9': 4, '10': 20, '11': 4, '12': 1, '13': 16, '14': 20, '15': 2, '16': 20, '17': 0, '18': 20, '19': 20, '20': 20, '21': 20, '22': 20, '23': 1, '24': 2, '25': 20, '26': 13, '27': 11, '28': 20, '29': 20}\n"
]
}
],
"source": [ "source": [
"predict_Yt = []\n", "predict_Yt = []\n",
"index=0\n", "index=0\n",
"original_tree = open('tree.json', 'r')\n", "\n",
"original_tree = json.load(original_tree)\n",
"path_to_class = {}\n",
"for i in range(len(original_tree[\"paths\"])):\n",
" path = original_tree[\"paths\"][i]\n",
" path_to_class[path[\"id\"]] = path[\"classification\"]\n",
"with open('compressed_tree.json', 'r') as file:\n", "with open('compressed_tree.json', 'r') as file:\n",
" data = json.load(file)\n", " data = json.load(file)\n",
" classes = data[\"classes\"]\n", " classes = data[\"classes\"]\n",
@@ -54,26 +57,28 @@
" counter = 0\n", " counter = 0\n",
" class_set = []\n", " class_set = []\n",
" paths_set = []\n", " paths_set = []\n",
" for feature in reversed(data['layers']): #Have to reverse this list due to structure of the data.csv file and how it aligns with the compressed tree layers\n", " features = [\"protocol\", \"src\", \"dst\"]\n",
" for node in data['layers'][feature]:\n", " for feature in features:\n",
" if node['min'] is None:\n", " if feature in data[\"layers\"]:\n",
" if x[counter] <= node['max']:\n", " for node in data['layers'][feature]:\n",
" if node['min'] is None:\n",
" if x[counter] <= node['max']:\n",
" class_set.append(node['classes'])\n",
" paths_set.append(node[\"paths\"])\n",
" break #is this an issue?\n",
" else:\n",
" continue\n",
" elif node['max'] is None:\n",
" if node['min'] < x[counter]:\n",
" class_set.append(node['classes'])\n",
" paths_set.append(node[\"paths\"])\n",
" break #is this an issue?\n",
" else:\n",
" continue\n",
" elif node['min'] < x[counter] and x[counter] <= node['max']:\n",
" class_set.append(node['classes'])\n", " class_set.append(node['classes'])\n",
" paths_set.append(node[\"paths\"])\n", " paths_set.append(node[\"paths\"])\n",
" break #is this an issue?\n", " break #is this an issue?\n",
" else:\n",
" continue\n",
" elif node['max'] is None:\n",
" if node['min'] < x[counter]:\n",
" class_set.append(node['classes'])\n",
" paths_set.append(node[\"paths\"])\n",
" break #is this an issue?\n",
" else:\n",
" continue\n",
" elif node['min'] < x[counter] and x[counter] <= node['max']:\n",
" class_set.append(node['classes'])\n",
" paths_set.append(node[\"paths\"])\n",
" break #is this an issue?\n",
"\n", "\n",
" counter += 1\n", " counter += 1\n",
" result = set(class_set[0])\n", " result = set(class_set[0])\n",
@@ -85,9 +90,13 @@
"\n", "\n",
" #predict_Yt.append(list(result))\n", " #predict_Yt.append(list(result))\n",
" #print(result)\n", " #print(result)\n",
" if len(paths) != 1:\n",
" print(paths)\n",
" print(x)\n",
" print(result)\n",
" assert len(paths) == 1\n", " assert len(paths) == 1\n",
" path = list(paths)[0]\n", " path = list(paths)[0]\n",
" pred = path_to_class[path]\n", " pred = data[\"path_to_class\"][str(path)]\n",
" pred_class = classes[pred]\n", " pred_class = classes[pred]\n",
" predict_Yt.append(pred_class)\n", " predict_Yt.append(pred_class)\n",
" \n", " \n",
@@ -96,7 +105,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 143,
"id": "8b4c56b6", "id": "8b4c56b6",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -104,7 +113,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"0.8451332670948943\n" "0.8410252791654538\n"
] ]
} }
], ],

File diff suppressed because one or more lines are too long

View File

@@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 73,
"id": "ec310f34", "id": "ec310f34",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -14,7 +14,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 74,
"id": "5b54797e", "id": "5b54797e",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -28,7 +28,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 75,
"id": "a38fdb8a", "id": "a38fdb8a",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -60,7 +60,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 76,
"id": "2fd4f738", "id": "2fd4f738",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -83,7 +83,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 77,
"id": "98cde024", "id": "98cde024",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -123,7 +123,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 78,
"id": "b6fbadbf", "id": "b6fbadbf",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -183,7 +183,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 79,
"id": "0a767971", "id": "0a767971",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -213,16 +213,22 @@
"\tcompressed_layers[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n", "\tcompressed_layers[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
"\t#print(\"=\"*40)\n", "\t#print(\"=\"*40)\n",
"\n", "\n",
"path_to_class = {}\n",
"for i in range(len(tree[\"paths\"])):\n",
" path = tree[\"paths\"][i]\n",
" path_to_class[path[\"id\"]] = path[\"classification\"]\n",
"\n",
"compressed_tree = {\n", "compressed_tree = {\n",
"\t\"paths\": path_ids,\n", "\t\"paths\": path_ids,\n",
"\t\"classes\": path_classes,\n", "\t\"classes\": path_classes,\n",
"\t\"layers\": compressed_layers,\n", "\t\"layers\": compressed_layers,\n",
" \"path_to_class\": path_to_class,\n",
"}" "}"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 80,
"id": "561b0bc1", "id": "561b0bc1",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -241,7 +247,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "switch", "display_name": "cs216",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@@ -255,7 +261,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.12.7" "version": "3.13.3"
} }
}, },
"nbformat": 4, "nbformat": 4,