upload example tree based on 10-01 data

update readme
update treetormt
2025-09-07 15:57:23 +00:00 · 2025-06-20 03:18:09 +00:00 · 2025-06-14 03:10:48 +00:00 · 2025-06-12 19:53:46 +00:00 · 2025-06-12 19:52:02 +00:00 · 2025-06-11 23:37:33 -07:00
23 changed files with 4643 additions and 54 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+# force LF for any shell script
+*.sh text eol=lf
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 data.*
 __pycache__
-tree.json
-compressed_tree.json
+data/*
+.DS_Store
+.ipynb_checkpoints/
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "ideal-rmt-simulator"]
+	path = ideal-rmt-simulator
+	url = https://github.com/rfchang/ideal-rmt-simulator
--- a/CompressedTreeParser.ipynb
+++ b/CompressedTreeParser.ipynb
@@ -0,0 +1,152 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 138,
+   "id": "938dec51",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import argparse\n",
+    "from sklearn.tree import DecisionTreeClassifier, plot_tree, _tree\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "from sklearn.tree import export_graphviz\n",
+    "import pydotplus\n",
+    "from matplotlib import pyplot as plt\n",
+    "from labels import mac_to_label\n",
+    "import json\n",
+    "import math"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 139,
+   "id": "442624c7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Set1 = pd.read_csv('data/combined/data.csv').values.tolist()\n",
+    "X = [i[0:3] for i in Set1]\n",
+    "Y =[i[3] for i in Set1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 142,
+   "id": "12ad454d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'0': 20, '1': 20, '2': 9, '3': 20, '4': 0, '5': 13, '6': 20, '7': 0, '8': 12, '9': 4, '10': 20, '11': 4, '12': 1, '13': 16, '14': 20, '15': 2, '16': 20, '17': 0, '18': 20, '19': 20, '20': 20, '21': 20, '22': 20, '23': 1, '24': 2, '25': 20, '26': 13, '27': 11, '28': 20, '29': 20}\n"
+     ]
+    }
+   ],
+   "source": [
+    "predict_Yt = []\n",
+    "index=0\n",
+    "\n",
+    "with open('compressed_tree.json', 'r') as file:\n",
+    "    data = json.load(file)\n",
+    "    classes = data[\"classes\"]\n",
+    "    for x in X:\n",
+    "        counter = 0\n",
+    "        class_set = []\n",
+    "        paths_set = []\n",
+    "        features = [\"protocol\", \"src\", \"dst\"]\n",
+    "        for feature in features:\n",
+    "            if feature in data[\"layers\"]:\n",
+    "                for node in data['layers'][feature]:\n",
+    "                    if node['min'] is None:\n",
+    "                        if x[counter] <= node['max']:\n",
+    "                            class_set.append(node['classes'])\n",
+    "                            paths_set.append(node[\"paths\"])\n",
+    "                            break #is this an issue?\n",
+    "                        else:\n",
+    "                            continue\n",
+    "                    elif node['max'] is None:\n",
+    "                        if node['min'] < x[counter]:\n",
+    "                            class_set.append(node['classes'])\n",
+    "                            paths_set.append(node[\"paths\"])\n",
+    "                            break #is this an issue?\n",
+    "                        else:\n",
+    "                            continue\n",
+    "                    elif node['min'] < x[counter] and x[counter] <= node['max']:\n",
+    "                        class_set.append(node['classes'])\n",
+    "                        paths_set.append(node[\"paths\"])\n",
+    "                        break #is this an issue?\n",
+    "\n",
+    "            counter += 1\n",
+    "        result = set(class_set[0])\n",
+    "        paths = set(paths_set[0])\n",
+    "        for s in class_set[1:]:\n",
+    "            result.intersection_update(s)\n",
+    "        for s in paths_set[1:]:\n",
+    "            paths.intersection_update(s)\n",
+    "\n",
+    "        #predict_Yt.append(list(result))\n",
+    "        #print(result)\n",
+    "        if len(paths) != 1:\n",
+    "            print(paths)\n",
+    "            print(x)\n",
+    "            print(result)\n",
+    "        assert len(paths) == 1\n",
+    "        path = list(paths)[0]\n",
+    "        pred = data[\"path_to_class\"][str(path)]\n",
+    "        pred_class = classes[pred]\n",
+    "        predict_Yt.append(pred_class)\n",
+    "        \n",
+    "        index += 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 143,
+   "id": "8b4c56b6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.8410252791654538\n"
+     ]
+    }
+   ],
+   "source": [
+    "correct = 0\n",
+    "for i in range(len(Y)):\n",
+    "    prediction = predict_Yt[i]\n",
+    "    if prediction != None and Y[i] == prediction:\n",
+    "        correct += 1\n",
+    "\n",
+    "print(correct / len(Y))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/DecisionTree.ipynb
+++ b/DecisionTree.ipynb
--- a/ExtractDataset.ipynb
+++ b/ExtractDataset.ipynb
@@ -89,7 +89,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "switch",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
@@ -103,7 +103,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.12.7"
+   "version": "3.12.9"
  }
 },
 "nbformat": 4,
--- a/README.md
+++ b/README.md
@@ -2,18 +2,30 @@

 Run `pip install -r requirements.txt`

+Run `setup.sh`
+
 # Tree Generation

 ## Download Dataset

-Download the *September 22 2016* dataset from: https://iotanalytics.unsw.edu.au/iottraces.html#bib18tmc
+Download the *September 22 2016* dataset (or others) from: https://iotanalytics.unsw.edu.au/iottraces.html#bib18tmc

-Rename the file as data.pcap
+Place these into the `data/tar` folder.
+
+Run `extract_tars.sh` which will extract and place the `.pcap` files at the corresponding location inside `data/pcap`.

 ## Preprocessing Dataset

-Run `ExtractDataset.ipynb`, this will take a few minutes
+Run `extract_all_datasets.py` which will extract the data from each file in `data/pcap` and turn it into the corresponding `.csv` file inside `data/processed`. This will take a few minutes per file. Combine the data under `data/csv` using `combine_csv.py`. This will overwrite `data/combined/data.csv` which you can use for the decision tree.

 ## Training

-Run `DecisionTree.ipynb`, the tree should be output in `tree`
+Run `DecisionTree.ipynb`, the tree should be output in `tree.json`
+
+## Compression
+
+Run `TreeCompress.ipynb`, the tree should be output in `compressed_tree.json`
+
+## RMT
+
+Run `TreeToRMT.ipynb`, it will report the TCAM and SRAM usage of the compressed tree
--- a/TreeCompress.ipynb
+++ b/TreeCompress.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 73,
   "id": "ec310f34",
   "metadata": {},
   "outputs": [],
@@ -14,7 +14,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 74,
   "id": "5b54797e",
   "metadata": {},
   "outputs": [],
@@ -28,22 +28,25 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 75,
   "id": "a38fdb8a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# First cleanup the tree by rounding the decision points to integer values\n",
    "# We assume all features will use integer values. If this is not the case, then training data should be normalized so that integer values can be accurate enough\n",
-    "# we also enumerate all the paths for later use\n",
-    "\n",
    "i = 0\n",
    "\n",
+    "path_ids = set()\n",
+    "path_classes = tree[\"classes\"]\n",
+    "\n",
    "# for each path in the tree\n",
    "for path in paths:\n",
    "\t# assign a path id \n",
    "\tpath[\"id\"] = i\n",
-    "\ti += 1\n",
+    "\tpath_ids.add(i)\n",
+    "\t#path_classes.add(path[\"classification\"])\n",
+    "\ti += 1\t\n",
    "\t# for each condition\n",
    "\tconditions = path[\"conditions\"]\n",
    "\tfor condition in conditions:\n",
@@ -57,7 +60,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 76,
   "id": "2fd4f738",
   "metadata": {},
   "outputs": [],
@@ -80,7 +83,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 77,
   "id": "98cde024",
   "metadata": {},
   "outputs": [],
@@ -106,13 +109,13 @@
    "\t\tvalue = condition[\"value\"]\n",
    "\n",
    "\t\t# move the min/max for the corresponding feature in compressed\n",
-    "\t\tif operation == \"<=\" and compressed[feature][\"min\"] is None:\n",
+    "\t\tif operation == \"<=\" and compressed[feature][\"max\"] is None:\n",
    "\t\t\tcompressed[feature][\"max\"] = value\n",
-    "\t\telif operation == \">\" and compressed[feature][\"max\"] is None:\n",
+    "\t\telif operation == \">\" and compressed[feature][\"min\"] is None:\n",
    "\t\t\tcompressed[feature][\"min\"] = value\n",
-    "\t\telif operation == \"<=\" and value < compressed[feature][\"min\"]:\n",
+    "\t\telif operation == \"<=\" and value < compressed[feature][\"max\"]:\n",
    "\t\t\tcompressed[feature][\"max\"] = value\n",
-    "\t\telif operation == \">\" and value > compressed[feature][\"max\"]:\n",
+    "\t\telif operation == \">\" and value > compressed[feature][\"min\"]:\n",
    "\t\t\tcompressed[feature][\"min\"] = value\n",
    "\n",
    "\tpath[\"compressed\"] = compressed"
@@ -120,7 +123,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 78,
   "id": "b6fbadbf",
   "metadata": {},
   "outputs": [],
@@ -168,49 +171,64 @@
    "\t\t# for each bucket which encompases the condition's range, add this path's id to the sets \n",
    "\t\ti = 0\n",
    "\t\tfor bp in breakpoints[feature_name]:\n",
-    "\t\t\tin_range = is_in_range(bp, lower, upper)\n",
-    "\t\t\tif in_range:\n",
+    "\t\t\tif is_in_range(bp, lower, upper):\n",
    "\t\t\t\tbuckets_id[feature_name][i].add(ID)\n",
    "\t\t\t\tbuckets_class[feature_name][i].add(Class)\n",
-    "\t\t\ti += 1"
+    "\t\t\ti += 1\n",
+    "\n",
+    "\t\tif is_in_range(bp+1, lower, upper):\n",
+    "\t\t\tbuckets_id[feature_name][i].add(ID)\n",
+    "\t\t\tbuckets_class[feature_name][i].add(Class)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 79,
   "id": "0a767971",
   "metadata": {},
   "outputs": [],
   "source": [
    "# combine breakpoints and buckets to one representation\n",
    "\n",
-    "compressed_tree = defaultdict(list)\n",
+    "compressed_layers = defaultdict(list)\n",
    "for feature_name in buckets_id:\n",
    "\tlower = None\n",
    "\tupper = breakpoints[feature_name][0]\n",
    "\tpaths = buckets_id[feature_name][0]\n",
    "\tclasses = buckets_class[feature_name][0]\n",
    "\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n",
-    "\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
+    "\tcompressed_layers[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
    "\tfor i in range(1, len(buckets_id[feature_name]) - 1):\n",
    "\t\tlower = breakpoints[feature_name][i-1]\n",
    "\t\tupper = breakpoints[feature_name][i]\n",
-    "\t\tmembers = buckets_id[feature_name][i]\n",
+    "\t\tpaths = buckets_id[feature_name][i]\n",
    "\t\tclasses = buckets_class[feature_name][i]\n",
    "\t\t#print(f\"{feature_name} = [{lower}, {upper}]: {buckets[feature_name][i]}\")\n",
-    "\t\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
+    "\t\tcompressed_layers[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
    "\tlower = breakpoints[feature_name][len(breakpoints[feature_name]) - 1]\n",
    "\tupper = None\n",
-    "\tmembers = buckets_id[feature_name][len(buckets_id[feature_name]) - 1]\n",
+    "\tpaths = buckets_id[feature_name][len(buckets_id[feature_name]) - 1]\n",
    "\tclasses = buckets_class[feature_name][len(buckets_class[feature_name]) - 1]\n",
    "\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n",
-    "\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
-    "\t#print(\"=\"*40)"
+    "\tcompressed_layers[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
+    "\t#print(\"=\"*40)\n",
+    "\n",
+    "path_to_class = {}\n",
+    "for i in range(len(tree[\"paths\"])):\n",
+    "    path = tree[\"paths\"][i]\n",
+    "    path_to_class[path[\"id\"]] = path[\"classification\"]\n",
+    "\n",
+    "compressed_tree = {\n",
+    "\t\"paths\": path_ids,\n",
+    "\t\"classes\": path_classes,\n",
+    "\t\"layers\": compressed_layers,\n",
+    "    \"path_to_class\": path_to_class,\n",
+    "}"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 80,
   "id": "561b0bc1",
   "metadata": {},
   "outputs": [],
@@ -229,7 +247,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "switch",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
@@ -243,7 +261,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.12.7"
+   "version": "3.12.9"
  }
 },
 "nbformat": 4,
--- a/TreeToRMT.ipynb
+++ b/TreeToRMT.ipynb
@@ -0,0 +1,412 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "58fc6db9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import math"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "e07be4b3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "f = open(\"compressed_tree.json\")\n",
+    "tree = json.loads(f.read())\n",
+    "layers = tree[\"layers\"]\n",
+    "classes = tree[\"classes\"]\n",
+    "f.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "1516ff91",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "field_width = {\n",
+    "\t\"src\": 16,\n",
+    "\t\"dst\": 16,\n",
+    "\t\"protocl\": 8,\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f9193827",
+   "metadata": {},
+   "source": [
+    "# Worst Case RMT"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "5e37cfc5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def worst_case_rmt(tree):\n",
+    "\trmt = []\n",
+    "\tstep = 0\n",
+    "\n",
+    "\ttcam_bits = 0\n",
+    "\tram_bits = 0\n",
+    "\n",
+    "\tfor layer in layers:\n",
+    "\t\tnum_ranges = len(layers[layer])\n",
+    "\t\t# assume that each range requires all of 2*k prefixes when performing prefix expansion\n",
+    "\t\t# therefore there are 2*k * R for R ranges and width k\n",
+    "\t\tnum_prefixes = 2 * field_width[layer] * num_ranges\n",
+    "\t\tprefix_width = field_width[layer]\n",
+    "\n",
+    "\t\ttcam = {\n",
+    "\t\t\t\"id\": f\"{layer}_range\",\n",
+    "\t\t\t\"step\": step,\n",
+    "\t\t\t\"match\": \"ternary\",\n",
+    "\t\t\t\"entries\": num_prefixes,\n",
+    "\t\t\t\"key_size\": prefix_width\n",
+    "\t\t}\n",
+    "\t\ttcam_bits += num_prefixes * prefix_width\n",
+    "\n",
+    "\t\t# assume basic pointer reuse for metadata storage\n",
+    "\t\tram = {\n",
+    "\t\t\t\"id\": f\"{layer}_meta\",\n",
+    "\t\t\t\"step\": step,\n",
+    "\t\t\t\"match\": \"exact\",\n",
+    "\t\t\t\"method\": \"index\",\n",
+    "\t\t\t\"key_size\": math.ceil(math.log2(num_ranges)),\n",
+    "\t\t\t\"data_size\": len(classes)\n",
+    "\t\t}\n",
+    "\t\tram_bits += num_ranges * len(classes)\n",
+    "\n",
+    "\t\trmt.append(tcam)\n",
+    "\t\trmt.append(ram)\n",
+    "\n",
+    "\t\tstep += 1\n",
+    "\n",
+    "\treturn rmt, tcam_bits, ram_bits\n",
+    "\n",
+    "x, tcam_bits, ram_bits = worst_case_rmt(tree)\n",
+    "f = open(\"worst_case_rmt.json\", \"w+\")\n",
+    "f.write(json.dumps(x, indent=4))\n",
+    "f.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "0dc1d6d4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "TCAM mapping: \n",
+      "[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
+      "SRAM mapping: \n",
+      "[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
+      "id mapping: \n",
+      "[['dst_range', 'dst_meta'], ['src_range', 'src_meta'], ['protocl_range', 'protocl_meta'], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]\n",
+      "TCAM bits: 13184\n",
+      "RAM bits:  504\n"
+     ]
+    }
+   ],
+   "source": [
+    "! command python3 ideal-rmt-simulator/sim.py naive_rmt.json\n",
+    "print(f\"TCAM bits: {tcam_bits}\")\n",
+    "print(f\"RAM bits:  {ram_bits}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2a628655",
+   "metadata": {},
+   "source": [
+    "# Naive Range Expansion "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "fb9febe9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# shamelessly stolen from: https://github.com/autolyticus/range-to-prefix/blob/master/rangetoprefix.C\n",
+    "\n",
+    "def int_to_bin(i, width):\n",
+    "\treturn bin(i)[2:].zfill(width)\n",
+    "\n",
+    "def increment_dc(pfx):\n",
+    "\tidx = pfx.find(\"*\")\n",
+    "\tif idx == -1:\n",
+    "\t\tidx = len(pfx)\n",
+    "\tidx = idx - 1\n",
+    "\t#print(pfx, pfx[:idx])\n",
+    "\treturn pfx[:idx] + \"*\" + pfx[idx+1:]\n",
+    "\t\n",
+    "def can_merge(pfx_a, pfx_b):\n",
+    "\tpfx_a = pfx_a.replace(\"*\", \"\")\n",
+    "\tpfx_b = pfx_b.replace(\"*\", \"\")\n",
+    "\treturn pfx_a[:-1] == pfx_b[:-1] and pfx_a[-1] != pfx_b[-1]\n",
+    "\n",
+    "def merge(pfx_a, prefixes):\n",
+    "\tpfx_a = increment_dc(pfx_a)\n",
+    "\tprefixes[-1] = pfx_a\n",
+    "\n",
+    "\tfor i in range(len(prefixes) - 2, -1, -1):\n",
+    "\t\tif can_merge(prefixes[i], prefixes[i+1]):\n",
+    "\t\t\tprefixes.pop()\n",
+    "\t\t\tpfx = increment_dc(prefixes[i])\n",
+    "\t\t\tprefixes[i] = pfx\n",
+    "\n",
+    "def convert_range(lower, upper, width):\n",
+    "\tprefixes = []\n",
+    "\tprefix = int_to_bin(lower, width)\n",
+    "\tprefixes.append(prefix)\n",
+    "\tnorm_upper = min(upper, 2**width-1)\n",
+    "\tfor i in range(lower+1, norm_upper+1):\n",
+    "\t\tprefix = int_to_bin(i, width)\n",
+    "\t\tif can_merge(prefix, prefixes[-1]):\n",
+    "\t\t\tmerge(prefix, prefixes)\n",
+    "\t\telse:\n",
+    "\t\t\tprefixes.append(prefix)\n",
+    "\treturn prefixes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "55167c28",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def naive_rmt(tree):\n",
+    "\trmt = []\n",
+    "\tstep = 0\n",
+    "\n",
+    "\ttcam_bits = 0\n",
+    "\tram_bits = 0\n",
+    "\n",
+    "\tfor layer in layers:\n",
+    "\t\tnum_prefixes = 0\n",
+    "\t\tprefix_width = field_width[layer]\n",
+    "\t\t# for each range in the layer, convert the ranges to prefixes using naive range expansion\n",
+    "\t\tfor r in layers[layer]:\n",
+    "\t\t\tif r[\"min\"] == None:\n",
+    "\t\t\t\tr[\"min\"] = 0\n",
+    "\t\t\telif r[\"max\"] == None:\n",
+    "\t\t\t\tr[\"max\"] = 2 ** prefix_width\n",
+    "\t\t\tprefixes = convert_range(r[\"min\"], r[\"max\"], prefix_width)\n",
+    "\t\t\tr[\"prefixes\"] = prefixes\n",
+    "\t\t\tnum_prefixes += len(prefixes)\n",
+    "\t\t\ttcam_bits += len(prefixes) * prefix_width\n",
+    "\n",
+    "\t\ttcam = {\n",
+    "\t\t\t\"id\": f\"{layer}_range\",\n",
+    "\t\t\t\"step\": step,\n",
+    "\t\t\t\"match\": \"ternary\",\n",
+    "\t\t\t\"entries\": num_prefixes,\n",
+    "\t\t\t\"key_size\": prefix_width,\n",
+    "\t\t\t\"ranges\": layers[layer]\n",
+    "\t\t}\n",
+    "\n",
+    "\t\tnum_ranges = len(layers[layer])\n",
+    "\t\t# assume no pointer reuse for metadata storage\n",
+    "\t\tram = {\n",
+    "\t\t\t\"id\": f\"{layer}_meta\",\n",
+    "\t\t\t\"step\": step,\n",
+    "\t\t\t\"match\": \"exact\",\n",
+    "\t\t\t\"method\": \"index\",\n",
+    "\t\t\t\"key_size\": math.ceil(math.log2(num_ranges)),\n",
+    "\t\t\t\"data_size\": len(classes)\n",
+    "\t\t}\n",
+    "\t\tram_bits += num_ranges * len(classes)\n",
+    "\n",
+    "\t\trmt.append(tcam)\n",
+    "\t\trmt.append(ram)\n",
+    "\n",
+    "\t\tstep += 1\n",
+    "\n",
+    "\treturn rmt, tcam_bits, ram_bits\n",
+    "\n",
+    "x, tcam_bits, ram_bits = naive_rmt(tree)\n",
+    "f = open(\"naive_rmt.json\", \"w+\")\n",
+    "f.write(json.dumps(x, indent=4))\n",
+    "f.close()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "48011528",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "TCAM mapping: \n",
+      "[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
+      "SRAM mapping: \n",
+      "[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
+      "id mapping: \n",
+      "[['dst_range', 'dst_meta'], ['src_range', 'src_meta'], ['protocl_range', 'protocl_meta'], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]\n",
+      "TCAM bits: 3320\n",
+      "RAM bits:  504\n"
+     ]
+    }
+   ],
+   "source": [
+    "! command python3 ideal-rmt-simulator/sim.py naive_rmt.json\n",
+    "print(f\"TCAM bits: {tcam_bits}\")\n",
+    "print(f\"RAM bits:  {ram_bits}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2504b1ba",
+   "metadata": {},
+   "source": [
+    "# Priority Aware Prefix Expansion"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "64b7271e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# for this technique, we note that given disjoint ranges [0,a][a,b],[b,c] ...\n",
+    "# then if using a TCAM that selects the first matching prefix, then [0,a],[0,b],[0,c] would be equivalent\n",
+    "# this is because if for some k<a, even though the range [0,b] could be selected, as long as the prefixes for [0,a] are before [0,b] then the correct prefix will still be selected\n",
+    "\n",
+    "def priority_aware(tree):\n",
+    "\trmt = []\n",
+    "\tstep = 0\n",
+    "\n",
+    "\ttcam_bits = 0\n",
+    "\tram_bits = 0\n",
+    "\n",
+    "\tfor layer in layers:\n",
+    "\t\tnum_prefixes = 0\n",
+    "\t\tprefix_width = field_width[layer]\n",
+    "\t\t# for each range, run the regular prefix expansion, and also the prefix expansion setting the minimum to 0\n",
+    "\t\t# then check which set of prefixes would be better\n",
+    "\t\t# we will assume the ranges are already disjoin and in the correct order\n",
+    "\t\tfor r in layers[layer]:\n",
+    "\t\t\tif r[\"min\"] == None:\n",
+    "\t\t\t\tr[\"min\"] = 0\n",
+    "\t\t\telif r[\"max\"] == None:\n",
+    "\t\t\t\tr[\"max\"] = 2 ** prefix_width\n",
+    "\t\t\tregular_prefixes = convert_range(r[\"min\"], r[\"max\"], prefix_width)\n",
+    "\t\t\tzero_start_prefixes = convert_range(0, r[\"max\"], prefix_width)\n",
+    "\n",
+    "\t\t\tif len(regular_prefixes) <= len(zero_start_prefixes):\n",
+    "\t\t\t\tpfx_type = \"exact\"\n",
+    "\t\t\t\tprefixes = regular_prefixes\n",
+    "\t\t\telse:\n",
+    "\t\t\t\tpfx_type = \"zero\"\n",
+    "\t\t\t\tprefixes = zero_start_prefixes\n",
+    "\n",
+    "\t\t\tr[\"prefixes\"] = prefixes\n",
+    "\t\t\tr[\"prefix_type\"] = pfx_type\n",
+    "\t\t\tnum_prefixes += len(prefixes)\n",
+    "\t\t\ttcam_bits += len(prefixes) * prefix_width\n",
+    "\n",
+    "\t\ttcam = {\n",
+    "\t\t\t\"id\": f\"{layer}_range\",\n",
+    "\t\t\t\"step\": step,\n",
+    "\t\t\t\"match\": \"ternary\",\n",
+    "\t\t\t\"entries\": num_prefixes,\n",
+    "\t\t\t\"key_size\": prefix_width,\n",
+    "\t\t\t\"ranges\": layers[layer]\n",
+    "\t\t}\n",
+    "\n",
+    "\t\tnum_ranges = len(layers[layer])\n",
+    "\t\t# assume no pointer reuse for metadata storage\n",
+    "\t\tram = {\n",
+    "\t\t\t\"id\": f\"{layer}_meta\",\n",
+    "\t\t\t\"step\": step,\n",
+    "\t\t\t\"match\": \"exact\",\n",
+    "\t\t\t\"method\": \"index\",\n",
+    "\t\t\t\"key_size\": math.ceil(math.log2(num_ranges)),\n",
+    "\t\t\t\"data_size\": len(classes)\n",
+    "\t\t}\n",
+    "\t\tram_bits += num_ranges * len(classes)\n",
+    "\n",
+    "\t\trmt.append(tcam)\n",
+    "\t\trmt.append(ram)\n",
+    "\n",
+    "\t\tstep += 1\n",
+    "\n",
+    "\treturn rmt, tcam_bits, ram_bits\n",
+    "\n",
+    "x, tcam_bits, ram_bits = priority_aware(tree)\n",
+    "f = open(\"priority_aware.json\", \"w+\")\n",
+    "f.write(json.dumps(x, indent=4))\n",
+    "f.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "cd706e41",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "TCAM mapping: \n",
+      "[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
+      "SRAM mapping: \n",
+      "[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
+      "id mapping: \n",
+      "[['dst_range', 'dst_meta'], ['src_range', 'src_meta'], ['protocl_range', 'protocl_meta'], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]\n",
+      "TCAM bits: 2152\n",
+      "RAM bits:  504\n"
+     ]
+    }
+   ],
+   "source": [
+    "! command python3 ideal-rmt-simulator/sim.py priority_aware.json\n",
+    "print(f\"TCAM bits: {tcam_bits}\")\n",
+    "print(f\"RAM bits:  {ram_bits}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/combine.py
+++ b/combine.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+"""combined.py
+
+Concatenate every CSV that matches the pattern
+    data/processed/<name>/<name>.csv
+into a single file:
+    data/combined/data.csv
+
+The script streams each source CSV in 1‑Mio‑row chunks so memory stays low.
+Typos in the historic column names (protocl/classfication) are fixed on‑the‑fly.
+
+Usage
+-----
+python combined.py
+
+You can optionally supply a different root directory:
+python combined.py --root other/processed_dir --out other/combined/data.csv
+"""
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+import os
+import pandas as pd
+
+CHUNK = 1_000_000  # rows per read_csv chunk
+
+
+def fix_cols(df: pd.DataFrame) -> pd.DataFrame:
+    """Rename legacy columns to canonical names."""
+    return df.rename(
+        columns={"protocl": "protocol", "classfication": "classification"}
+    )
+
+
+def find_source_csvs(proc_root: Path):
+    """Yield CSV paths that exactly match processed/<name>/<name>.csv."""
+    for sub in sorted(proc_root.iterdir()):
+        if not sub.is_dir():
+            continue
+        target = sub / f"{sub.name}.csv"
+        if target.exists():
+            yield target
+
+
+def combine(proc_root: Path, out_path: Path):
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+
+    first_write = True
+    for csv_path in find_source_csvs(proc_root):
+        print(f"→ adding {csv_path.relative_to(proc_root.parent)}")
+        for chunk in pd.read_csv(csv_path, chunksize=CHUNK):
+            chunk = fix_cols(chunk)
+            chunk.to_csv(
+                out_path,
+                mode="w" if first_write else "a",
+                header=first_write,
+                index=False,
+            )
+            first_write = False
+    print(f"✓ combined CSV written to {out_path}")
+
+
+def main():
+    p = argparse.ArgumentParser(description="Combine processed CSVs into one.")
+    p.add_argument("--root", default="data/processed", help="processed dir root")
+    p.add_argument("--out", default="data/combined/data.csv", help="output CSV")
+    args = p.parse_args()
+
+    combine(Path(args.root).expanduser(), Path(args.out).expanduser())
+
+
+if __name__ == "__main__":
+    main()
--- a/example/compressed_tree.json
+++ b/example/compressed_tree.json
@@ -0,0 +1,560 @@
+{
+    "paths": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22
+    ],
+    "classes": [
+        "Amazon Echo",
+        "Belkin Motion Sensor",
+        "Belkin Switch",
+        "Dropcam",
+        "HP Printer",
+        "LiFX Bulb",
+        "NEST Smoke Sensor",
+        "Netatmo Camera",
+        "Netatmo Weather station",
+        "Pixstart photo frame",
+        "Samsung Smart Cam",
+        "Smart Things",
+        "TP-Link Camera",
+        "TP-Link Plug",
+        "Triby Speaker",
+        "Withings",
+        "Withings Scale",
+        "Withings sleep sensor",
+        "iHome PowerPlug",
+        "other"
+    ],
+    "layers": {
+        "dst": [
+            {
+                "min": null,
+                "max": 2136,
+                "paths": [
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    6
+                ],
+                "classes": [
+                    8,
+                    19,
+                    4
+                ]
+            },
+            {
+                "min": 2136,
+                "max": 2224,
+                "paths": [
+                    7
+                ],
+                "classes": [
+                    11
+                ]
+            },
+            {
+                "min": 2224,
+                "max": 5016,
+                "paths": [
+                    8,
+                    9
+                ],
+                "classes": [
+                    1,
+                    19
+                ]
+            },
+            {
+                "min": 5016,
+                "max": 25848,
+                "paths": [
+                    10,
+                    11,
+                    12
+                ],
+                "classes": [
+                    19,
+                    7
+                ]
+            },
+            {
+                "min": 25848,
+                "max": 47936,
+                "paths": [
+                    10,
+                    11,
+                    13
+                ],
+                "classes": [
+                    19,
+                    7
+                ]
+            },
+            {
+                "min": 47936,
+                "max": 47944,
+                "paths": [
+                    14
+                ],
+                "classes": [
+                    3
+                ]
+            },
+            {
+                "min": 47944,
+                "max": 49152,
+                "paths": [
+                    16,
+                    15
+                ],
+                "classes": [
+                    10,
+                    7
+                ]
+            },
+            {
+                "min": 49152,
+                "max": 49160,
+                "paths": [
+                    17,
+                    18
+                ],
+                "classes": [
+                    16,
+                    2
+                ]
+            },
+            {
+                "min": 49160,
+                "max": null,
+                "paths": [
+                    19,
+                    20,
+                    21,
+                    22
+                ],
+                "classes": [
+                    17,
+                    19,
+                    15
+                ]
+            }
+        ],
+        "src": [
+            {
+                "min": null,
+                "max": 64,
+                "paths": [
+                    0,
+                    1,
+                    7,
+                    8,
+                    10,
+                    14,
+                    15,
+                    17,
+                    19
+                ],
+                "classes": [
+                    3,
+                    7,
+                    11,
+                    15,
+                    16,
+                    19
+                ]
+            },
+            {
+                "min": 64,
+                "max": 128,
+                "paths": [
+                    3,
+                    5,
+                    7,
+                    8,
+                    10,
+                    14,
+                    15,
+                    17,
+                    19
+                ],
+                "classes": [
+                    3,
+                    4,
+                    7,
+                    11,
+                    15,
+                    16,
+                    19
+                ]
+            },
+            {
+                "min": 128,
+                "max": 280,
+                "paths": [
+                    3,
+                    6,
+                    7,
+                    8,
+                    10,
+                    14,
+                    15,
+                    17,
+                    19
+                ],
+                "classes": [
+                    3,
+                    4,
+                    7,
+                    11,
+                    15,
+                    16,
+                    19
+                ]
+            },
+            {
+                "min": 280,
+                "max": 816,
+                "paths": [
+                    3,
+                    6,
+                    7,
+                    8,
+                    11,
+                    14,
+                    15,
+                    17,
+                    19
+                ],
+                "classes": [
+                    3,
+                    4,
+                    7,
+                    11,
+                    15,
+                    16,
+                    19
+                ]
+            },
+            {
+                "min": 816,
+                "max": 1576,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    8,
+                    11,
+                    14,
+                    15,
+                    17,
+                    19
+                ],
+                "classes": [
+                    3,
+                    7,
+                    11,
+                    15,
+                    16,
+                    19
+                ]
+            },
+            {
+                "min": 1576,
+                "max": 2488,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    8,
+                    11,
+                    14,
+                    15,
+                    18,
+                    19
+                ],
+                "classes": [
+                    2,
+                    3,
+                    7,
+                    11,
+                    15,
+                    19
+                ]
+            },
+            {
+                "min": 2488,
+                "max": 4776,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    8,
+                    11,
+                    14,
+                    16,
+                    18,
+                    19
+                ],
+                "classes": [
+                    2,
+                    3,
+                    7,
+                    10,
+                    11,
+                    15,
+                    19
+                ]
+            },
+            {
+                "min": 4776,
+                "max": 5224,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    8,
+                    11,
+                    14,
+                    16,
+                    18,
+                    20
+                ],
+                "classes": [
+                    2,
+                    3,
+                    7,
+                    10,
+                    11,
+                    17,
+                    19
+                ]
+            },
+            {
+                "min": 5224,
+                "max": 9048,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    8,
+                    12,
+                    13,
+                    14,
+                    16,
+                    18,
+                    20
+                ],
+                "classes": [
+                    2,
+                    3,
+                    10,
+                    11,
+                    17,
+                    19
+                ]
+            },
+            {
+                "min": 9048,
+                "max": 43008,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    8,
+                    12,
+                    13,
+                    14,
+                    16,
+                    18,
+                    21
+                ],
+                "classes": [
+                    2,
+                    3,
+                    10,
+                    11,
+                    19
+                ]
+            },
+            {
+                "min": 43008,
+                "max": 50384,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    9,
+                    12,
+                    13,
+                    14,
+                    16,
+                    18,
+                    21
+                ],
+                "classes": [
+                    1,
+                    2,
+                    3,
+                    10,
+                    11,
+                    19
+                ]
+            },
+            {
+                "min": 50384,
+                "max": null,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    9,
+                    12,
+                    13,
+                    14,
+                    16,
+                    18,
+                    22
+                ],
+                "classes": [
+                    1,
+                    2,
+                    3,
+                    10,
+                    11,
+                    19
+                ]
+            }
+        ],
+        "protocl": [
+            {
+                "min": null,
+                "max": 0,
+                "paths": [
+                    0,
+                    2,
+                    3,
+                    4,
+                    7,
+                    8,
+                    9,
+                    10,
+                    11,
+                    12,
+                    13,
+                    14,
+                    15,
+                    16,
+                    17,
+                    18,
+                    19,
+                    20,
+                    21,
+                    22
+                ],
+                "classes": [
+                    1,
+                    2,
+                    3,
+                    4,
+                    7,
+                    8,
+                    10,
+                    11,
+                    15,
+                    16,
+                    17,
+                    19
+                ]
+            },
+            {
+                "min": 0,
+                "max": null,
+                "paths": [
+                    1,
+                    2,
+                    5,
+                    6,
+                    7,
+                    8,
+                    9,
+                    10,
+                    11,
+                    12,
+                    13,
+                    14,
+                    15,
+                    16,
+                    17,
+                    18,
+                    19,
+                    20,
+                    21,
+                    22
+                ],
+                "classes": [
+                    1,
+                    2,
+                    3,
+                    7,
+                    8,
+                    10,
+                    11,
+                    15,
+                    16,
+                    17,
+                    19
+                ]
+            }
+        ]
+    },
+    "path_to_class": {
+        "0": 19,
+        "1": 19,
+        "2": 8,
+        "3": 4,
+        "4": 19,
+        "5": 19,
+        "6": 19,
+        "7": 11,
+        "8": 19,
+        "9": 1,
+        "10": 19,
+        "11": 7,
+        "12": 19,
+        "13": 19,
+        "14": 3,
+        "15": 7,
+        "16": 10,
+        "17": 16,
+        "18": 2,
+        "19": 15,
+        "20": 17,
+        "21": 19,
+        "22": 19
+    }
+}
--- a/example/naive_rmt.json
+++ b/example/naive_rmt.json
@@ -0,0 +1,734 @@
+[
+    {
+        "id": "dst_range",
+        "step": 0,
+        "match": "ternary",
+        "entries": 68,
+        "key_size": 16,
+        "ranges": [
+            {
+                "min": 0,
+                "max": 2136,
+                "paths": [
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    6
+                ],
+                "classes": [
+                    8,
+                    19,
+                    4
+                ],
+                "prefixes": [
+                    "00000***********",
+                    "0000100000******",
+                    "000010000100****",
+                    "0000100001010***",
+                    "0000100001011000"
+                ]
+            },
+            {
+                "min": 2136,
+                "max": 2224,
+                "paths": [
+                    7
+                ],
+                "classes": [
+                    11
+                ],
+                "prefixes": [
+                    "0000100001011***",
+                    "00001000011*****",
+                    "00001000100*****",
+                    "000010001010****",
+                    "0000100010110000"
+                ]
+            },
+            {
+                "min": 2224,
+                "max": 5016,
+                "paths": [
+                    8,
+                    9
+                ],
+                "classes": [
+                    1,
+                    19
+                ],
+                "prefixes": [
+                    "000010001011****",
+                    "0000100011******",
+                    "00001001********",
+                    "0000101*********",
+                    "000011**********",
+                    "0001000*********",
+                    "00010010********",
+                    "000100110*******",
+                    "000100111000****",
+                    "0001001110010***",
+                    "0001001110011000"
+                ]
+            },
+            {
+                "min": 5016,
+                "max": 25848,
+                "paths": [
+                    10,
+                    11,
+                    12
+                ],
+                "classes": [
+                    19,
+                    7
+                ],
+                "prefixes": [
+                    "0001001110011***",
+                    "00010011101*****",
+                    "0001001111******",
+                    "000101**********",
+                    "00011***********",
+                    "001*************",
+                    "010*************",
+                    "011000**********",
+                    "011001000*******",
+                    "0110010010******",
+                    "01100100110*****",
+                    "011001001110****",
+                    "0110010011110***",
+                    "0110010011111000"
+                ]
+            },
+            {
+                "min": 25848,
+                "max": 47936,
+                "paths": [
+                    10,
+                    11,
+                    13
+                ],
+                "classes": [
+                    19,
+                    7
+                ],
+                "prefixes": [
+                    "0110010011111***",
+                    "01100101********",
+                    "0110011*********",
+                    "01101***********",
+                    "0111************",
+                    "100*************",
+                    "1010************",
+                    "10110***********",
+                    "1011100*********",
+                    "10111010********",
+                    "1011101100******",
+                    "1011101101000000"
+                ]
+            },
+            {
+                "min": 47936,
+                "max": 47944,
+                "paths": [
+                    14
+                ],
+                "classes": [
+                    3
+                ],
+                "prefixes": [
+                    "1011101101000***",
+                    "1011101101001000"
+                ]
+            },
+            {
+                "min": 47944,
+                "max": 49152,
+                "paths": [
+                    16,
+                    15
+                ],
+                "classes": [
+                    10,
+                    7
+                ],
+                "prefixes": [
+                    "1011101101001***",
+                    "101110110101****",
+                    "10111011011*****",
+                    "101110111*******",
+                    "101111**********",
+                    "1100000000000000"
+                ]
+            },
+            {
+                "min": 49152,
+                "max": 49160,
+                "paths": [
+                    17,
+                    18
+                ],
+                "classes": [
+                    16,
+                    2
+                ],
+                "prefixes": [
+                    "1100000000000***",
+                    "1100000000001000"
+                ]
+            },
+            {
+                "min": 49160,
+                "max": 65536,
+                "paths": [
+                    19,
+                    20,
+                    21,
+                    22
+                ],
+                "classes": [
+                    17,
+                    19,
+                    15
+                ],
+                "prefixes": [
+                    "1100000000001***",
+                    "110000000001****",
+                    "11000000001*****",
+                    "1100000001******",
+                    "110000001*******",
+                    "11000001********",
+                    "1100001*********",
+                    "110001**********",
+                    "11001***********",
+                    "1101************",
+                    "111*************"
+                ]
+            }
+        ]
+    },
+    {
+        "id": "dst_meta",
+        "step": 0,
+        "match": "exact",
+        "method": "index",
+        "key_size": 4,
+        "data_size": 20
+    },
+    {
+        "id": "src_range",
+        "step": 1,
+        "match": "ternary",
+        "entries": 87,
+        "key_size": 16,
+        "ranges": [
+            {
+                "min": 0,
+                "max": 64,
+                "paths": [
+                    0,
+                    1,
+                    7,
+                    8,
+                    10,
+                    14,
+                    15,
+                    17,
+                    19
+                ],
+                "classes": [
+                    3,
+                    7,
+                    11,
+                    15,
+                    16,
+                    19
+                ],
+                "prefixes": [
+                    "0000000000******",
+                    "0000000001000000"
+                ]
+            },
+            {
+                "min": 64,
+                "max": 128,
+                "paths": [
+                    3,
+                    5,
+                    7,
+                    8,
+                    10,
+                    14,
+                    15,
+                    17,
+                    19
+                ],
+                "classes": [
+                    3,
+                    4,
+                    7,
+                    11,
+                    15,
+                    16,
+                    19
+                ],
+                "prefixes": [
+                    "0000000001******",
+                    "0000000010000000"
+                ]
+            },
+            {
+                "min": 128,
+                "max": 280,
+                "paths": [
+                    3,
+                    6,
+                    7,
+                    8,
+                    10,
+                    14,
+                    15,
+                    17,
+                    19
+                ],
+                "classes": [
+                    3,
+                    4,
+                    7,
+                    11,
+                    15,
+                    16,
+                    19
+                ],
+                "prefixes": [
+                    "000000001*******",
+                    "000000010000****",
+                    "0000000100010***",
+                    "0000000100011000"
+                ]
+            },
+            {
+                "min": 280,
+                "max": 816,
+                "paths": [
+                    3,
+                    6,
+                    7,
+                    8,
+                    11,
+                    14,
+                    15,
+                    17,
+                    19
+                ],
+                "classes": [
+                    3,
+                    4,
+                    7,
+                    11,
+                    15,
+                    16,
+                    19
+                ],
+                "prefixes": [
+                    "0000000100011***",
+                    "00000001001*****",
+                    "0000000101******",
+                    "000000011*******",
+                    "00000010********",
+                    "00000011000*****",
+                    "000000110010****",
+                    "0000001100110000"
+                ]
+            },
+            {
+                "min": 816,
+                "max": 1576,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    8,
+                    11,
+                    14,
+                    15,
+                    17,
+                    19
+                ],
+                "classes": [
+                    3,
+                    7,
+                    11,
+                    15,
+                    16,
+                    19
+                ],
+                "prefixes": [
+                    "000000110011****",
+                    "0000001101******",
+                    "000000111*******",
+                    "0000010*********",
+                    "00000110000*****",
+                    "0000011000100***",
+                    "0000011000101000"
+                ]
+            },
+            {
+                "min": 1576,
+                "max": 2488,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    8,
+                    11,
+                    14,
+                    15,
+                    18,
+                    19
+                ],
+                "classes": [
+                    2,
+                    3,
+                    7,
+                    11,
+                    15,
+                    19
+                ],
+                "prefixes": [
+                    "0000011000101***",
+                    "000001100011****",
+                    "0000011001******",
+                    "000001101*******",
+                    "00000111********",
+                    "00001000********",
+                    "000010010*******",
+                    "00001001100*****",
+                    "000010011010****",
+                    "0000100110110***",
+                    "0000100110111000"
+                ]
+            },
+            {
+                "min": 2488,
+                "max": 4776,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    8,
+                    11,
+                    14,
+                    16,
+                    18,
+                    19
+                ],
+                "classes": [
+                    2,
+                    3,
+                    7,
+                    10,
+                    11,
+                    15,
+                    19
+                ],
+                "prefixes": [
+                    "0000100110111***",
+                    "0000100111******",
+                    "0000101*********",
+                    "000011**********",
+                    "0001000*********",
+                    "000100100*******",
+                    "00010010100*****",
+                    "0001001010100***",
+                    "0001001010101000"
+                ]
+            },
+            {
+                "min": 4776,
+                "max": 5224,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    8,
+                    11,
+                    14,
+                    16,
+                    18,
+                    20
+                ],
+                "classes": [
+                    2,
+                    3,
+                    7,
+                    10,
+                    11,
+                    17,
+                    19
+                ],
+                "prefixes": [
+                    "0001001010101***",
+                    "000100101011****",
+                    "0001001011******",
+                    "00010011********",
+                    "0001010000******",
+                    "00010100010*****",
+                    "0001010001100***",
+                    "0001010001101000"
+                ]
+            },
+            {
+                "min": 5224,
+                "max": 9048,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    8,
+                    12,
+                    13,
+                    14,
+                    16,
+                    18,
+                    20
+                ],
+                "classes": [
+                    2,
+                    3,
+                    10,
+                    11,
+                    17,
+                    19
+                ],
+                "prefixes": [
+                    "0001010001101***",
+                    "000101000111****",
+                    "000101001*******",
+                    "00010101********",
+                    "0001011*********",
+                    "00011***********",
+                    "0010000*********",
+                    "00100010********",
+                    "0010001100******",
+                    "001000110100****",
+                    "0010001101010***",
+                    "0010001101011000"
+                ]
+            },
+            {
+                "min": 9048,
+                "max": 43008,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    8,
+                    12,
+                    13,
+                    14,
+                    16,
+                    18,
+                    21
+                ],
+                "classes": [
+                    2,
+                    3,
+                    10,
+                    11,
+                    19
+                ],
+                "prefixes": [
+                    "0010001101011***",
+                    "00100011011*****",
+                    "001000111*******",
+                    "001001**********",
+                    "00101***********",
+                    "0011************",
+                    "01**************",
+                    "100*************",
+                    "10100***********",
+                    "1010100000000000"
+                ]
+            },
+            {
+                "min": 43008,
+                "max": 50384,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    9,
+                    12,
+                    13,
+                    14,
+                    16,
+                    18,
+                    21
+                ],
+                "classes": [
+                    1,
+                    2,
+                    3,
+                    10,
+                    11,
+                    19
+                ],
+                "prefixes": [
+                    "10101***********",
+                    "1011************",
+                    "110000**********",
+                    "110001000*******",
+                    "1100010010******",
+                    "110001001100****",
+                    "1100010011010000"
+                ]
+            },
+            {
+                "min": 50384,
+                "max": 65536,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    9,
+                    12,
+                    13,
+                    14,
+                    16,
+                    18,
+                    22
+                ],
+                "classes": [
+                    1,
+                    2,
+                    3,
+                    10,
+                    11,
+                    19
+                ],
+                "prefixes": [
+                    "110001001101****",
+                    "11000100111*****",
+                    "11000101********",
+                    "1100011*********",
+                    "11001***********",
+                    "1101************",
+                    "111*************"
+                ]
+            }
+        ]
+    },
+    {
+        "id": "src_meta",
+        "step": 1,
+        "match": "exact",
+        "method": "index",
+        "key_size": 4,
+        "data_size": 20
+    },
+    {
+        "id": "protocl_range",
+        "step": 2,
+        "match": "ternary",
+        "entries": 2,
+        "key_size": 8,
+        "ranges": [
+            {
+                "min": 0,
+                "max": 0,
+                "paths": [
+                    0,
+                    2,
+                    3,
+                    4,
+                    7,
+                    8,
+                    9,
+                    10,
+                    11,
+                    12,
+                    13,
+                    14,
+                    15,
+                    16,
+                    17,
+                    18,
+                    19,
+                    20,
+                    21,
+                    22
+                ],
+                "classes": [
+                    1,
+                    2,
+                    3,
+                    4,
+                    7,
+                    8,
+                    10,
+                    11,
+                    15,
+                    16,
+                    17,
+                    19
+                ],
+                "prefixes": [
+                    "00000000"
+                ]
+            },
+            {
+                "min": 0,
+                "max": 256,
+                "paths": [
+                    1,
+                    2,
+                    5,
+                    6,
+                    7,
+                    8,
+                    9,
+                    10,
+                    11,
+                    12,
+                    13,
+                    14,
+                    15,
+                    16,
+                    17,
+                    18,
+                    19,
+                    20,
+                    21,
+                    22
+                ],
+                "classes": [
+                    1,
+                    2,
+                    3,
+                    7,
+                    8,
+                    10,
+                    11,
+                    15,
+                    16,
+                    17,
+                    19
+                ],
+                "prefixes": [
+                    "********"
+                ]
+            }
+        ]
+    },
+    {
+        "id": "protocl_meta",
+        "step": 2,
+        "match": "exact",
+        "method": "index",
+        "key_size": 1,
+        "data_size": 20
+    }
+]
--- a/example/priority_aware.json
+++ b/example/priority_aware.json
@@ -0,0 +1,700 @@
+[
+    {
+        "id": "dst_range",
+        "step": 0,
+        "match": "ternary",
+        "entries": 42,
+        "key_size": 16,
+        "ranges": [
+            {
+                "min": 0,
+                "max": 2136,
+                "paths": [
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    6
+                ],
+                "classes": [
+                    8,
+                    19,
+                    4
+                ],
+                "prefixes": [
+                    "00000***********",
+                    "0000100000******",
+                    "000010000100****",
+                    "0000100001010***",
+                    "0000100001011000"
+                ],
+                "prefix_type": "exact"
+            },
+            {
+                "min": 2136,
+                "max": 2224,
+                "paths": [
+                    7
+                ],
+                "classes": [
+                    11
+                ],
+                "prefixes": [
+                    "0000100001011***",
+                    "00001000011*****",
+                    "00001000100*****",
+                    "000010001010****",
+                    "0000100010110000"
+                ],
+                "prefix_type": "exact"
+            },
+            {
+                "min": 2224,
+                "max": 5016,
+                "paths": [
+                    8,
+                    9
+                ],
+                "classes": [
+                    1,
+                    19
+                ],
+                "prefixes": [
+                    "0000************",
+                    "0001000*********",
+                    "00010010********",
+                    "000100110*******",
+                    "000100111000****",
+                    "0001001110010***",
+                    "0001001110011000"
+                ],
+                "prefix_type": "zero"
+            },
+            {
+                "min": 5016,
+                "max": 25848,
+                "paths": [
+                    10,
+                    11,
+                    12
+                ],
+                "classes": [
+                    19,
+                    7
+                ],
+                "prefixes": [
+                    "00**************",
+                    "010*************",
+                    "011000**********",
+                    "011001000*******",
+                    "0110010010******",
+                    "01100100110*****",
+                    "011001001110****",
+                    "0110010011110***",
+                    "0110010011111000"
+                ],
+                "prefix_type": "zero"
+            },
+            {
+                "min": 25848,
+                "max": 47936,
+                "paths": [
+                    10,
+                    11,
+                    13
+                ],
+                "classes": [
+                    19,
+                    7
+                ],
+                "prefixes": [
+                    "0***************",
+                    "100*************",
+                    "1010************",
+                    "10110***********",
+                    "1011100*********",
+                    "10111010********",
+                    "1011101100******",
+                    "1011101101000000"
+                ],
+                "prefix_type": "zero"
+            },
+            {
+                "min": 47936,
+                "max": 47944,
+                "paths": [
+                    14
+                ],
+                "classes": [
+                    3
+                ],
+                "prefixes": [
+                    "1011101101000***",
+                    "1011101101001000"
+                ],
+                "prefix_type": "exact"
+            },
+            {
+                "min": 47944,
+                "max": 49152,
+                "paths": [
+                    16,
+                    15
+                ],
+                "classes": [
+                    10,
+                    7
+                ],
+                "prefixes": [
+                    "0***************",
+                    "10**************",
+                    "1100000000000000"
+                ],
+                "prefix_type": "zero"
+            },
+            {
+                "min": 49152,
+                "max": 49160,
+                "paths": [
+                    17,
+                    18
+                ],
+                "classes": [
+                    16,
+                    2
+                ],
+                "prefixes": [
+                    "1100000000000***",
+                    "1100000000001000"
+                ],
+                "prefix_type": "exact"
+            },
+            {
+                "min": 49160,
+                "max": 65536,
+                "paths": [
+                    19,
+                    20,
+                    21,
+                    22
+                ],
+                "classes": [
+                    17,
+                    19,
+                    15
+                ],
+                "prefixes": [
+                    "****************"
+                ],
+                "prefix_type": "zero"
+            }
+        ]
+    },
+    {
+        "id": "dst_meta",
+        "step": 0,
+        "match": "exact",
+        "method": "index",
+        "key_size": 4,
+        "data_size": 20
+    },
+    {
+        "id": "src_range",
+        "step": 1,
+        "match": "ternary",
+        "entries": 56,
+        "key_size": 16,
+        "ranges": [
+            {
+                "min": 0,
+                "max": 64,
+                "paths": [
+                    0,
+                    1,
+                    7,
+                    8,
+                    10,
+                    14,
+                    15,
+                    17,
+                    19
+                ],
+                "classes": [
+                    3,
+                    7,
+                    11,
+                    15,
+                    16,
+                    19
+                ],
+                "prefixes": [
+                    "0000000000******",
+                    "0000000001000000"
+                ],
+                "prefix_type": "exact"
+            },
+            {
+                "min": 64,
+                "max": 128,
+                "paths": [
+                    3,
+                    5,
+                    7,
+                    8,
+                    10,
+                    14,
+                    15,
+                    17,
+                    19
+                ],
+                "classes": [
+                    3,
+                    4,
+                    7,
+                    11,
+                    15,
+                    16,
+                    19
+                ],
+                "prefixes": [
+                    "0000000001******",
+                    "0000000010000000"
+                ],
+                "prefix_type": "exact"
+            },
+            {
+                "min": 128,
+                "max": 280,
+                "paths": [
+                    3,
+                    6,
+                    7,
+                    8,
+                    10,
+                    14,
+                    15,
+                    17,
+                    19
+                ],
+                "classes": [
+                    3,
+                    4,
+                    7,
+                    11,
+                    15,
+                    16,
+                    19
+                ],
+                "prefixes": [
+                    "000000001*******",
+                    "000000010000****",
+                    "0000000100010***",
+                    "0000000100011000"
+                ],
+                "prefix_type": "exact"
+            },
+            {
+                "min": 280,
+                "max": 816,
+                "paths": [
+                    3,
+                    6,
+                    7,
+                    8,
+                    11,
+                    14,
+                    15,
+                    17,
+                    19
+                ],
+                "classes": [
+                    3,
+                    4,
+                    7,
+                    11,
+                    15,
+                    16,
+                    19
+                ],
+                "prefixes": [
+                    "0000000*********",
+                    "00000010********",
+                    "00000011000*****",
+                    "000000110010****",
+                    "0000001100110000"
+                ],
+                "prefix_type": "zero"
+            },
+            {
+                "min": 816,
+                "max": 1576,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    8,
+                    11,
+                    14,
+                    15,
+                    17,
+                    19
+                ],
+                "classes": [
+                    3,
+                    7,
+                    11,
+                    15,
+                    16,
+                    19
+                ],
+                "prefixes": [
+                    "000000**********",
+                    "0000010*********",
+                    "00000110000*****",
+                    "0000011000100***",
+                    "0000011000101000"
+                ],
+                "prefix_type": "zero"
+            },
+            {
+                "min": 1576,
+                "max": 2488,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    8,
+                    11,
+                    14,
+                    15,
+                    18,
+                    19
+                ],
+                "classes": [
+                    2,
+                    3,
+                    7,
+                    11,
+                    15,
+                    19
+                ],
+                "prefixes": [
+                    "00000***********",
+                    "00001000********",
+                    "000010010*******",
+                    "00001001100*****",
+                    "000010011010****",
+                    "0000100110110***",
+                    "0000100110111000"
+                ],
+                "prefix_type": "zero"
+            },
+            {
+                "min": 2488,
+                "max": 4776,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    8,
+                    11,
+                    14,
+                    16,
+                    18,
+                    19
+                ],
+                "classes": [
+                    2,
+                    3,
+                    7,
+                    10,
+                    11,
+                    15,
+                    19
+                ],
+                "prefixes": [
+                    "0000************",
+                    "0001000*********",
+                    "000100100*******",
+                    "00010010100*****",
+                    "0001001010100***",
+                    "0001001010101000"
+                ],
+                "prefix_type": "zero"
+            },
+            {
+                "min": 4776,
+                "max": 5224,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    8,
+                    11,
+                    14,
+                    16,
+                    18,
+                    20
+                ],
+                "classes": [
+                    2,
+                    3,
+                    7,
+                    10,
+                    11,
+                    17,
+                    19
+                ],
+                "prefixes": [
+                    "0000************",
+                    "000100**********",
+                    "0001010000******",
+                    "00010100010*****",
+                    "0001010001100***",
+                    "0001010001101000"
+                ],
+                "prefix_type": "zero"
+            },
+            {
+                "min": 5224,
+                "max": 9048,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    8,
+                    12,
+                    13,
+                    14,
+                    16,
+                    18,
+                    20
+                ],
+                "classes": [
+                    2,
+                    3,
+                    10,
+                    11,
+                    17,
+                    19
+                ],
+                "prefixes": [
+                    "000*************",
+                    "0010000*********",
+                    "00100010********",
+                    "0010001100******",
+                    "001000110100****",
+                    "0010001101010***",
+                    "0010001101011000"
+                ],
+                "prefix_type": "zero"
+            },
+            {
+                "min": 9048,
+                "max": 43008,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    8,
+                    12,
+                    13,
+                    14,
+                    16,
+                    18,
+                    21
+                ],
+                "classes": [
+                    2,
+                    3,
+                    10,
+                    11,
+                    19
+                ],
+                "prefixes": [
+                    "0***************",
+                    "100*************",
+                    "10100***********",
+                    "1010100000000000"
+                ],
+                "prefix_type": "zero"
+            },
+            {
+                "min": 43008,
+                "max": 50384,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    9,
+                    12,
+                    13,
+                    14,
+                    16,
+                    18,
+                    21
+                ],
+                "classes": [
+                    1,
+                    2,
+                    3,
+                    10,
+                    11,
+                    19
+                ],
+                "prefixes": [
+                    "10101***********",
+                    "1011************",
+                    "110000**********",
+                    "110001000*******",
+                    "1100010010******",
+                    "110001001100****",
+                    "1100010011010000"
+                ],
+                "prefix_type": "exact"
+            },
+            {
+                "min": 50384,
+                "max": 65536,
+                "paths": [
+                    4,
+                    6,
+                    7,
+                    9,
+                    12,
+                    13,
+                    14,
+                    16,
+                    18,
+                    22
+                ],
+                "classes": [
+                    1,
+                    2,
+                    3,
+                    10,
+                    11,
+                    19
+                ],
+                "prefixes": [
+                    "****************"
+                ],
+                "prefix_type": "zero"
+            }
+        ]
+    },
+    {
+        "id": "src_meta",
+        "step": 1,
+        "match": "exact",
+        "method": "index",
+        "key_size": 4,
+        "data_size": 20
+    },
+    {
+        "id": "protocl_range",
+        "step": 2,
+        "match": "ternary",
+        "entries": 2,
+        "key_size": 8,
+        "ranges": [
+            {
+                "min": 0,
+                "max": 0,
+                "paths": [
+                    0,
+                    2,
+                    3,
+                    4,
+                    7,
+                    8,
+                    9,
+                    10,
+                    11,
+                    12,
+                    13,
+                    14,
+                    15,
+                    16,
+                    17,
+                    18,
+                    19,
+                    20,
+                    21,
+                    22
+                ],
+                "classes": [
+                    1,
+                    2,
+                    3,
+                    4,
+                    7,
+                    8,
+                    10,
+                    11,
+                    15,
+                    16,
+                    17,
+                    19
+                ],
+                "prefixes": [
+                    "00000000"
+                ],
+                "prefix_type": "exact"
+            },
+            {
+                "min": 0,
+                "max": 256,
+                "paths": [
+                    1,
+                    2,
+                    5,
+                    6,
+                    7,
+                    8,
+                    9,
+                    10,
+                    11,
+                    12,
+                    13,
+                    14,
+                    15,
+                    16,
+                    17,
+                    18,
+                    19,
+                    20,
+                    21,
+                    22
+                ],
+                "classes": [
+                    1,
+                    2,
+                    3,
+                    7,
+                    8,
+                    10,
+                    11,
+                    15,
+                    16,
+                    17,
+                    19
+                ],
+                "prefixes": [
+                    "********"
+                ],
+                "prefix_type": "exact"
+            }
+        ]
+    },
+    {
+        "id": "protocl_meta",
+        "step": 2,
+        "match": "exact",
+        "method": "index",
+        "key_size": 1,
+        "data_size": 20
+    }
+]
--- a/example/tree.json
+++ b/example/tree.json
@@ -0,0 +1,744 @@
+{
+    "features": {
+        "dst": [
+            47936.0,
+            2128.0,
+            5024.0,
+            2224.0,
+            25856.0,
+            47936.0,
+            49168.0,
+            49152.0
+        ],
+        "src": [
+            64.0,
+            64.0,
+            816.0,
+            128.0,
+            43008.0,
+            5232.0,
+            288.0,
+            2480.0,
+            1584.0,
+            9040.0,
+            4784.0,
+            50384.0
+        ],
+        "protocl": [
+            0.0,
+            0.0
+        ]
+    },
+    "paths": [
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 2128.0
+                },
+                {
+                    "feature": "src",
+                    "operation": "<=",
+                    "value": 64.0
+                },
+                {
+                    "feature": "src",
+                    "operation": "<=",
+                    "value": 64.0
+                },
+                {
+                    "feature": "protocl",
+                    "operation": "<=",
+                    "value": 0.0
+                }
+            ],
+            "classification": 19,
+            "id": 0
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 2128.0
+                },
+                {
+                    "feature": "src",
+                    "operation": "<=",
+                    "value": 64.0
+                },
+                {
+                    "feature": "src",
+                    "operation": "<=",
+                    "value": 64.0
+                },
+                {
+                    "feature": "protocl",
+                    "operation": ">",
+                    "value": 0.0
+                }
+            ],
+            "classification": 19,
+            "id": 1
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 2128.0
+                },
+                {
+                    "feature": "src",
+                    "operation": "<=",
+                    "value": 64.0
+                },
+                {
+                    "feature": "src",
+                    "operation": ">",
+                    "value": 64.0
+                }
+            ],
+            "classification": 8,
+            "id": 2
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 2128.0
+                },
+                {
+                    "feature": "src",
+                    "operation": ">",
+                    "value": 64.0
+                },
+                {
+                    "feature": "protocl",
+                    "operation": "<=",
+                    "value": 0.0
+                },
+                {
+                    "feature": "src",
+                    "operation": "<=",
+                    "value": 816.0
+                }
+            ],
+            "classification": 4,
+            "id": 3
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 2128.0
+                },
+                {
+                    "feature": "src",
+                    "operation": ">",
+                    "value": 64.0
+                },
+                {
+                    "feature": "protocl",
+                    "operation": "<=",
+                    "value": 0.0
+                },
+                {
+                    "feature": "src",
+                    "operation": ">",
+                    "value": 816.0
+                }
+            ],
+            "classification": 19,
+            "id": 4
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 2128.0
+                },
+                {
+                    "feature": "src",
+                    "operation": ">",
+                    "value": 64.0
+                },
+                {
+                    "feature": "protocl",
+                    "operation": ">",
+                    "value": 0.0
+                },
+                {
+                    "feature": "src",
+                    "operation": "<=",
+                    "value": 128.0
+                }
+            ],
+            "classification": 19,
+            "id": 5
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 2128.0
+                },
+                {
+                    "feature": "src",
+                    "operation": ">",
+                    "value": 64.0
+                },
+                {
+                    "feature": "protocl",
+                    "operation": ">",
+                    "value": 0.0
+                },
+                {
+                    "feature": "src",
+                    "operation": ">",
+                    "value": 128.0
+                }
+            ],
+            "classification": 19,
+            "id": 6
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 2128.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 5024.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 2224.0
+                }
+            ],
+            "classification": 11,
+            "id": 7
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 2128.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 5024.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 2224.0
+                },
+                {
+                    "feature": "src",
+                    "operation": "<=",
+                    "value": 43008.0
+                }
+            ],
+            "classification": 19,
+            "id": 8
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 2128.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 5024.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 2224.0
+                },
+                {
+                    "feature": "src",
+                    "operation": ">",
+                    "value": 43008.0
+                }
+            ],
+            "classification": 1,
+            "id": 9
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 2128.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 5024.0
+                },
+                {
+                    "feature": "src",
+                    "operation": "<=",
+                    "value": 5232.0
+                },
+                {
+                    "feature": "src",
+                    "operation": "<=",
+                    "value": 288.0
+                }
+            ],
+            "classification": 19,
+            "id": 10
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 2128.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 5024.0
+                },
+                {
+                    "feature": "src",
+                    "operation": "<=",
+                    "value": 5232.0
+                },
+                {
+                    "feature": "src",
+                    "operation": ">",
+                    "value": 288.0
+                }
+            ],
+            "classification": 7,
+            "id": 11
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 2128.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 5024.0
+                },
+                {
+                    "feature": "src",
+                    "operation": ">",
+                    "value": 5232.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 25856.0
+                }
+            ],
+            "classification": 19,
+            "id": 12
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 2128.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 5024.0
+                },
+                {
+                    "feature": "src",
+                    "operation": ">",
+                    "value": 5232.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 25856.0
+                }
+            ],
+            "classification": 19,
+            "id": 13
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 47936.0
+                }
+            ],
+            "classification": 3,
+            "id": 14
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 49168.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 49152.0
+                },
+                {
+                    "feature": "src",
+                    "operation": "<=",
+                    "value": 2480.0
+                }
+            ],
+            "classification": 7,
+            "id": 15
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 49168.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 49152.0
+                },
+                {
+                    "feature": "src",
+                    "operation": ">",
+                    "value": 2480.0
+                }
+            ],
+            "classification": 10,
+            "id": 16
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 49168.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 49152.0
+                },
+                {
+                    "feature": "src",
+                    "operation": "<=",
+                    "value": 1584.0
+                }
+            ],
+            "classification": 16,
+            "id": 17
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": "<=",
+                    "value": 49168.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 49152.0
+                },
+                {
+                    "feature": "src",
+                    "operation": ">",
+                    "value": 1584.0
+                }
+            ],
+            "classification": 2,
+            "id": 18
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 49168.0
+                },
+                {
+                    "feature": "src",
+                    "operation": "<=",
+                    "value": 9040.0
+                },
+                {
+                    "feature": "src",
+                    "operation": "<=",
+                    "value": 4784.0
+                }
+            ],
+            "classification": 15,
+            "id": 19
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 49168.0
+                },
+                {
+                    "feature": "src",
+                    "operation": "<=",
+                    "value": 9040.0
+                },
+                {
+                    "feature": "src",
+                    "operation": ">",
+                    "value": 4784.0
+                }
+            ],
+            "classification": 17,
+            "id": 20
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 49168.0
+                },
+                {
+                    "feature": "src",
+                    "operation": ">",
+                    "value": 9040.0
+                },
+                {
+                    "feature": "src",
+                    "operation": "<=",
+                    "value": 50384.0
+                }
+            ],
+            "classification": 19,
+            "id": 21
+        },
+        {
+            "conditions": [
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 47936.0
+                },
+                {
+                    "feature": "dst",
+                    "operation": ">",
+                    "value": 49168.0
+                },
+                {
+                    "feature": "src",
+                    "operation": ">",
+                    "value": 9040.0
+                },
+                {
+                    "feature": "src",
+                    "operation": ">",
+                    "value": 50384.0
+                }
+            ],
+            "classification": 19,
+            "id": 22
+        }
+    ],
+    "classes": [
+        "Amazon Echo",
+        "Belkin Motion Sensor",
+        "Belkin Switch",
+        "Dropcam",
+        "HP Printer",
+        "LiFX Bulb",
+        "NEST Smoke Sensor",
+        "Netatmo Camera",
+        "Netatmo Weather station",
+        "Pixstart photo frame",
+        "Samsung Smart Cam",
+        "Smart Things",
+        "TP-Link Camera",
+        "TP-Link Plug",
+        "Triby Speaker",
+        "Withings",
+        "Withings Scale",
+        "Withings sleep sensor",
+        "iHome PowerPlug",
+        "other"
+    ]
+}
--- a/example/worst_case_rmt.json
+++ b/example/worst_case_rmt.json
@@ -0,0 +1,47 @@
+[
+    {
+        "id": "dst_range",
+        "step": 0,
+        "match": "ternary",
+        "entries": 288,
+        "key_size": 16
+    },
+    {
+        "id": "dst_meta",
+        "step": 0,
+        "match": "exact",
+        "method": "index",
+        "key_size": 4,
+        "data_size": 20
+    },
+    {
+        "id": "src_range",
+        "step": 1,
+        "match": "ternary",
+        "entries": 384,
+        "key_size": 16
+    },
+    {
+        "id": "src_meta",
+        "step": 1,
+        "match": "exact",
+        "method": "index",
+        "key_size": 4,
+        "data_size": 20
+    },
+    {
+        "id": "protocl_range",
+        "step": 2,
+        "match": "ternary",
+        "entries": 32,
+        "key_size": 8
+    },
+    {
+        "id": "protocl_meta",
+        "step": 2,
+        "match": "exact",
+        "method": "index",
+        "key_size": 1,
+        "data_size": 20
+    }
+]
--- a/extract_all_datasets.py
+++ b/extract_all_datasets.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+from pathlib import Path
+import numpy as np
+import pandas as pd
+from labels import mac_to_label
+from tqdm import tqdm
+import os
+
+ROOT       = Path(__file__).resolve().parent
+PCAP_DIR   = ROOT / "data" / "pcap"
+CSV_DIR    = ROOT / "data" / "processed"
+CSV_DIR.mkdir(parents=True, exist_ok=True)
+
+BATCH = 100_000   # packets per chunk
+
+from scapy.all import rdpcap
+
+
+def process_pcap(pcap_path: str, csv_path: str) -> None:
+    all_packets = rdpcap(pcap_path)
+
+    print("rdpcap done", flush=True)
+    results = []
+    for packet in tqdm(all_packets):
+        size = len(packet)
+        try:
+            proto = packet.proto
+        except AttributeError:
+            proto = 0
+        try:
+            sport = packet.sport
+            dport = packet.dport
+        except AttributeError:
+            sport = 0
+            dport = 0
+
+        proto = int(proto)
+        sport = int(sport)
+        dport = int(dport)
+
+        if "Ether" in packet:
+            eth_dst = packet["Ether"].dst
+            if eth_dst in mac_to_label:
+                classification = mac_to_label[eth_dst]
+            else:
+                classification = "other"
+        else:
+            classification = "other"
+
+        metric = [proto,sport,dport,classification]
+        results.append(metric)
+    results = (np.array(results)).T
+
+    # store the features in the dataframe
+    dataframe = pd.DataFrame({'protocl':results[0],'src':results[1],'dst':results[2],'classfication':results[3]})
+    columns = ['protocl','src','dst','classfication']
+
+    # save the dataframe to the csv file, if not exsit, create one.
+    if os.path.exists(csv_path):
+        dataframe.to_csv(csv_path,index=False,sep=',',mode='a',columns = columns, header=False)
+    else:
+        dataframe.to_csv(csv_path,index=False,sep=',',columns = columns)
+        
+    print("Done")
+
+
+
+def main() -> None:
+    for pcap in sorted(PCAP_DIR.rglob("*.pcap")):
+        rel_csv = pcap.relative_to(PCAP_DIR).with_suffix(".csv")
+        csv_path = CSV_DIR / rel_csv
+        if csv_path.exists():
+            print(f"Skip {rel_csv} (CSV exists)")
+            continue
+        print(f"Processing {rel_csv}")
+        csv_path.parent.mkdir(parents=True, exist_ok=True)
+        process_pcap(str(pcap), str(csv_path))
+
+if __name__ == "__main__":
+    main()
--- a/extract_tars.sh
+++ b/extract_tars.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+# Usage: extract_all.sh SOURCE_DIR TARGET_DIR
+# For every .tar, .tar.gz, .tgz, .tar.bz2, .tar.xz in SOURCE_DIR:
+#   1. Create TARGET_DIR/<name>/
+#   2. If TARGET_DIR/<name>/<name>.pcap already exists, skip the archive.
+#   3. Otherwise, extract the archive into its own folder.
+
+set -euo pipefail
+
+if [[ $# -ne 2 ]]; then
+  echo "Usage: $0 SOURCE_DIR TARGET_DIR" >&2
+  exit 1
+fi
+
+src_dir="$1"
+dst_dir="$2"
+mkdir -p "$dst_dir"
+
+# Strip common extensions to recover the base name
+strip_ext() {
+  local n="$1"
+  n=${n%.tar.gz}; n=${n%.tgz}; n=${n%.tar.bz2}; n=${n%.tar.xz}; n=${n%.tar}
+  echo "$n"
+}
+
+shopt -s nullglob
+for archive in "$src_dir"/*.tar{,.gz,.bz2,.xz} "$src_dir"/*.tgz; do
+  base=$(basename "$archive")
+  name=$(strip_ext "$base")
+  out_dir="$dst_dir/$name"
+  key_file="$out_dir/$name.pcap"
+
+  if [[ -f "$key_file" ]]; then
+    echo "Skipping $archive  —  $key_file already present"
+    continue
+  fi
+
+  echo "Extracting $archive into $out_dir"
+  mkdir -p "$out_dir"
+
+  case "$archive" in
+    *.tar)          tar -xf "$archive" -C "$out_dir" ;;
+    *.tar.gz|*.tgz) tar -xzf "$archive" -C "$out_dir" ;;
+    *.tar.bz2)      tar -xjf "$archive" -C "$out_dir" ;;
+    *.tar.xz)       tar -xJf "$archive" -C "$out_dir" ;;
+    *)              echo "Unknown type: $archive" ;;
+  esac
+done
+
+echo "All archives processed."
--- a/1
+++ b/1
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,4 +3,5 @@ numpy
 pandas
 scikit-learn
 pydotplus
-matplotlib
+matplotlib
+scipy
--- a/sanity_check/csvdiff.py
+++ b/sanity_check/csvdiff.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+"""
+csvdiff.py file1.csv file2.csv
+Streams both files; prints the first differing line or
+‘No differences found’. Uses O(1) memory.
+"""
+
+import sys
+from itertools import zip_longest
+from pathlib import Path
+
+def open_checked(p: str):
+    print(p)
+    path = Path(p)
+    try:
+        return path.open("r", newline=""), path
+    except FileNotFoundError:
+        sys.exit(f"Error: {path} not found")
+
+def human(n: int) -> str:
+    return f"{n:,}"
+
+def main(a_path: str, b_path: str) -> None:
+    fa, a = open_checked(a_path)
+    fb, b = open_checked(b_path)
+
+    with fa, fb:
+        for idx, (ra, rb) in enumerate(zip_longest(fa, fb), 1):
+            if ra != rb:
+                print(f"Files differ at line {human(idx)}")
+                if ra is None:
+                    print(f"{a} ended early")
+                elif rb is None:
+                    print(f"{b} ended early")
+                else:
+                    print(f"{a}: {ra.rstrip()}")
+                    print(f"{b}: {rb.rstrip()}")
+                return
+    print("No differences found")
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        sys.exit("Usage: csvdiff.py file1.csv file2.csv")
+    main(sys.argv[1], sys.argv[2])
--- a/sanity_check/data_visualization.ipynb
+++ b/sanity_check/data_visualization.ipynb
--- a/sanity_check/diversity_metrics.py
+++ b/sanity_check/diversity_metrics.py
@@ -0,0 +1,206 @@
+#!/usr/bin/env python3
+"""diversity_metrics.py (fast version)
+
+Estimate how much diversity each CSV adds without building a giant in‑memory
+DataFrame.  Designed for IoT packet logs with millions of rows.
+
+Quick summary printed as a GitHub‑style table (requires *tabulate*; falls back
+to pandas plain text).
+
+Usage
+-----
+python diversity_metrics.py path/to/processed_dir [-r] [--sample 50000]
+
+Metrics
+-------
+ΔEntropy  : change in Shannon entropy of *classification* counts
+ΔGini     : change in Gini impurity of the same counts
+χ² p      : Pearson χ² p‑value old vs new classification counts
+Jaccard   : similarity of unique (src,dst) pairs (0 → new pairs, 1 → no new)
+KS src p  : Kolmogorov–Smirnov p‑value, source‑port dist (uses sampling)
+KS dst p  : Kolmogorov–Smirnov p‑value, dest‑port  dist (uses sampling)
+
+Speed tricks
+------------
+* No growing DataFrame; we keep Counters / sets / lists.
+* Ports for KS are *sampled* (default 50 k) to bound cost.
+* (src,dst) pairs are hashed to a 32‑bit int to reduce set overhead.
+* pandas reads via **pyarrow** engine when available.
+"""
+
+import argparse
+from pathlib import Path
+from collections import Counter
+from typing import List, Set
+
+import numpy as np
+import pandas as pd
+from scipy.stats import chi2_contingency, ks_2samp, entropy
+
+try:
+    from tabulate import tabulate
+    _USE_TABULATE = True
+except ImportError:
+    _USE_TABULATE = False
+
+# -----------------------------------------------------------------------------
+# Helper metrics
+# -----------------------------------------------------------------------------
+
+def shannon(counts: Counter) -> float:
+    total = sum(counts.values())
+    if total == 0:
+        return 0.0
+    p = np.fromiter(counts.values(), dtype=float)
+    p /= total
+    return entropy(p, base=2)
+
+
+def gini(counts: Counter) -> float:
+    total = sum(counts.values())
+    if total == 0:
+        return 0.0
+    return 1.0 - sum((n / total) ** 2 for n in counts.values())
+
+
+def jaccard(a: Set[int], b: Set[int]) -> float:
+    if not a and not b:
+        return 1.0
+    return len(a & b) / len(a | b)
+
+# -----------------------------------------------------------------------------
+# Core analysis
+# -----------------------------------------------------------------------------
+
+def analyse(csv_files: List[Path], sample_size: int):
+    """Return list of dicts with diversity metrics for each added file."""
+
+    # cumulative state (no big DataFrame!)
+    class_counter: Counter = Counter()
+    pair_hashes: Set[int] = set()
+    src_list: List[int] = []
+    dst_list: List[int] = []
+
+    rows = []
+
+    for csv_path in csv_files:
+        df = pd.read_csv(
+            csv_path,
+            engine="pyarrow" if pd.__version__ >= "2" else "c",  # fast parse
+            usecols=["protocl", "src", "dst", "classfication"],
+            dtype={
+                "protocl": "uint16",
+                "protocol": "uint16",
+                "src": "uint16",
+                "dst": "uint16",
+            },
+        )
+        # normalise column names
+        df.rename(columns={"protocl": "protocol", "classfication": "classification"}, inplace=True)
+
+        # snapshot previous state
+        prev_class = class_counter.copy()
+        prev_pairs = pair_hashes.copy()
+        prev_src = np.asarray(src_list, dtype=np.uint16)
+        prev_dst = np.asarray(dst_list, dtype=np.uint16)
+
+        # --- update cumulative structures ------------------------------------
+        class_counter.update(df["classification"].value_counts().to_dict())
+
+        # hash (src,dst) into 32‑bit int to save memory
+        pair_ids = (df["src"].to_numpy(dtype=np.uint32) << np.uint32(16)) | \
+            df["dst"].to_numpy(dtype=np.uint32)
+
+
+        # extend port lists (keep small ints)
+        src_list.extend(df["src"].tolist())
+        dst_list.extend(df["dst"].tolist())
+
+        # --- metrics ----------------------------------------------------------
+        # χ² classification
+        chi_p = np.nan
+        if prev_class:
+            all_classes = list(set(prev_class) | set(df["classification"].unique()))
+            old = [prev_class.get(c, 0) for c in all_classes]
+            new = [df["classification"].value_counts().get(c, 0) for c in all_classes]
+            _, chi_p, _, _ = chi2_contingency([old, new])
+
+        # entropy & gini deltas
+        delta_entropy = shannon(class_counter) - shannon(prev_class)
+        delta_gini = gini(class_counter) - gini(prev_class)
+
+        # Jaccard on pair hashes
+        jc = jaccard(prev_pairs, pair_hashes)
+
+        # KS tests on sampled ports
+        ks_src_p = ks_dst_p = np.nan
+        if prev_src.size:
+            new_src = df["src"].to_numpy(dtype=np.uint16)
+            new_dst = df["dst"].to_numpy(dtype=np.uint16)
+            if prev_src.size > sample_size:
+                prev_src_sample = np.random.choice(prev_src, sample_size, replace=False)
+            else:
+                prev_src_sample = prev_src
+            if new_src.size > sample_size:
+                new_src_sample = np.random.choice(new_src, sample_size, replace=False)
+            else:
+                new_src_sample = new_src
+            if prev_dst.size > sample_size:
+                prev_dst_sample = np.random.choice(prev_dst, sample_size, replace=False)
+            else:
+                prev_dst_sample = prev_dst
+            if new_dst.size > sample_size:
+                new_dst_sample = np.random.choice(new_dst, sample_size, replace=False)
+            else:
+                new_dst_sample = new_dst
+
+            ks_src_p = ks_2samp(prev_src_sample, new_src_sample).pvalue
+            ks_dst_p = ks_2samp(prev_dst_sample, new_dst_sample).pvalue
+
+        rows.append(
+            {
+                "File": csv_path.name,
+                "Rows": len(df),
+                "ΔEntropy": round(delta_entropy, 4),
+                "ΔGini": round(delta_gini, 4),
+                "χ² p": f"{chi_p:.3g}" if not np.isnan(chi_p) else "NA",
+                "Jaccard": round(jc, 3),
+                "KS src p": f"{ks_src_p:.3g}" if not np.isnan(ks_src_p) else "NA",
+                "KS dst p": f"{ks_dst_p:.3g}" if not np.isnan(ks_dst_p) else "NA",
+            }
+        )
+    return rows
+
+# -----------------------------------------------------------------------------
+# CLI
+# -----------------------------------------------------------------------------
+
+def main():
+    ap = argparse.ArgumentParser(description="Evaluate diversity contribution of each CSV (fast version).")
+    ap.add_argument("csv_dir", help="Directory containing CSV files")
+    ap.add_argument("-r", "--recursive", action="store_true", help="Recursively search csv_dir")
+    ap.add_argument("--sample", type=int, default=50_000, help="Sample size for KS tests (default 50k)")
+    args = ap.parse_args()
+
+    root = Path(args.csv_dir)
+    pattern = "**/*.csv" if args.recursive else "*.csv"
+    csv_files = sorted(root.glob(pattern))
+    if not csv_files:
+        print("No CSV files found.")
+        return
+
+    table_rows = analyse(csv_files, args.sample)
+
+    if _USE_TABULATE:
+        print(tabulate(table_rows, headers="keys", tablefmt="github", floatfmt=".4f"))
+    else:
+        print(pd.DataFrame(table_rows).to_string(index=False))
+
+    print(
+        "\nLegend:\n  • p-values (χ², KS) < 0.05 → new file significantly shifts distribution (GOOD)"
+        "\n  • Positive ΔEntropy or ΔGini → richer mix; near 0 → little new info"
+        "\n  • Jaccard close to 0 → many unseen (src,dst) pairs; close to 1 → redundant."
+    )
+
+if __name__ == "__main__":
+    main()
--- a/setup.sh
+++ b/setup.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+# Creates the directory layout:
+#   data/
+#     tar/
+#     pcap/
+#     processed/
+
+set -euo pipefail
+
+root="$(cd -- "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+mkdir -p "$root"/data/{tar,pcap,processed,combined}
+
+echo "Directory structure ready under $root/data/"
Author	SHA1	Message	Date
Arthur Lu	51f920e2ba	upload example tree based on 10-01 data	2025-06-20 03:18:09 +00:00
Arthur Lu	1136bd93ea	update readme	2025-06-14 03:10:48 +00:00
Arthur Lu	2ad40946d1	update treetormt	2025-06-12 19:53:46 +00:00
Arthur Lu	50075b1acc	Merge remote-tracking branch 'origin/decision-tree-nudge-boundaries'	2025-06-12 19:52:02 +00:00
Jai Parera	1585399c7d	Fixed loop ordering and path_to_class in JSON	2025-06-11 23:37:33 -07:00
Arthur Lu	8301998da3	temp fix for issue with metadata	2025-06-12 06:10:39 +00:00
Arthur Lu	3b2d6b3186	fix eval bug in parser	2025-06-12 05:37:48 +00:00
Jai Parera	24fc2ed6f7	Add support for combined datasets and analysis	2025-06-11 20:38:37 -07:00
Nathan Huey	fda251f051	First try boundary nudging	2025-06-11 16:55:23 -07:00
Arthur Lu	541538fcfe	update decision tree results	2025-06-11 22:37:38 +00:00
Arthur Lu	afc882a569	Merge pull request #5 from ltcptgeneral/Parser Fix range bug in TreeCompress, update parser to autofail non-singular…	2025-06-11 12:11:39 -07:00
Arthur Lu	6de3807fe2	fix range bug in TreeCompress, update parser to autofail non-singular classifications	2025-06-11 19:10:19 +00:00
Arthur Lu	fc16d3c586	Merge pull request #4 from ltcptgeneral/Parser Eval compressed tree accuracy	2025-06-11 11:28:38 -07:00
Arthur Lu	7bee40ecf9	restore TreeCompress and TreeToRMT from main	2025-06-11 18:28:18 +00:00
krishpatel	e811171a73	Implemented working compressed tree parser to get classification accuracy	2025-06-11 11:10:49 -07:00
Arthur Lu	61a451b82d	fix counting issue in ram bits	2025-06-11 04:47:35 +00:00
Arthur Lu	c73de36c70	replace classes with class string instead of index	2025-06-11 04:41:32 +00:00
Arthur Lu	fadeab8a99	fix incorrect classes in TreeCompress, closes #1	2025-06-08 18:34:39 +00:00
Arthur Lu	c208037ae9	implement priority aware algorithm, add dataset size printout	2025-06-07 01:10:05 +00:00
Arthur Lu	ae3128f6e8	better output formatting for tcam/ram bits	2025-06-05 16:56:10 +00:00
Arthur Lu	25e5a86a43	implement correct prefix counting	2025-06-05 03:42:30 +00:00
Arthur Lu	d3fe6efd47	add worst_case converter to rmt	2025-06-04 22:56:17 +00:00
Arthur Lu	23867747cd	rename RMTConvert to TreeCompress	2025-06-02 21:54:43 +00:00
Arthur Lu	eeebc17d56	add ideal-rmt-simulator as submodule	2025-06-02 21:52:10 +00:00
Arthur Lu	0d5e51f582	add list of paths and classes to compressed_tree output	2025-06-02 21:48:10 +00:00