diff --git a/run/decision_tree.py b/run/decision_tree.py new file mode 100644 index 0000000..384ac44 --- /dev/null +++ b/run/decision_tree.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python3 +""" +Train a decision tree, optionally “nudge” its split thresholds, and +export the result as JSON. + +Usage examples +-------------- +# plain training, no nudging +python build_tree.py --input data/combined/data.csv --output tree.json + +# nudge every internal threshold, keeping only the top-2 bits +python build_tree.py --input data/combined/data.csv --output tree.json \ + --nudge --bits 2 +""" +import argparse +import copy +import json +import math +from pathlib import Path + +import numpy as np +import pandas as pd +from sklearn.metrics import accuracy_score +from sklearn.tree import DecisionTreeClassifier, _tree + +# ---------------------------------------------------------------------- +# 1. command-line arguments +# ---------------------------------------------------------------------- +parser = argparse.ArgumentParser() +parser.add_argument("--input", "-i", help="CSV file with protocol,src,dst,label", default="../data/combined/data.csv") +parser.add_argument("--output", "-o", help="Path for the exported JSON tree", default="tree.json") +parser.add_argument("--depth", "-d", type=int, default=5, + help="Max depth of the decision tree (default: 5)") +parser.add_argument("--nudge", action="store_true", + help="Enable threshold nudging") +parser.add_argument("--bits", type=int, default=2, + help="Number of bits to keep when nudging (default: 2)") +args = parser.parse_args() + +# ---------------------------------------------------------------------- +# 2. helper functions +# ---------------------------------------------------------------------- +def nudge_threshold_max_n_bits(threshold: float, n_bits: int) -> int: + """Remove n bits from each""" + threshold = math.floor(threshold) + if n_bits == 0: + return threshold + + mask = pow(2, 32) - 1 ^ ((1 << n_bits) - 1) + nudged_value = threshold & mask + if threshold & (1 << (n_bits - 1)): + nudged_value += (1 << (n_bits)) + + return nudged_value + +def apply_nudging(tree: _tree.Tree, node_idx: int, n_bits: int) -> None: + """Post-order traversal that nudges every internal node’s threshold.""" + flag = False + if tree.children_left[node_idx] != -1: + apply_nudging(tree, tree.children_left[node_idx], n_bits) + flag = True + if tree.children_right[node_idx] != -1: + apply_nudging(tree, tree.children_right[node_idx], n_bits) + flag = True + if flag: # internal node + tree.threshold[node_idx] = nudge_threshold_max_n_bits( + tree.threshold[node_idx], n_bits + ) + +# output the tree +def get_lineage(tree, feature_names): + data = {"features": {}, "paths": [], "classes": list(tree.classes_)} + + thresholds = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + left = tree.tree_.children_left + right = tree.tree_.children_right + value = tree.tree_.value + + # -------- helper to climb up from a leaf to the root ----------- + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] # leaf marker (an int) + if child in left: + parent = np.where(left == child)[0].item() + split = "l" + elif child in right: + parent = np.where(right == child)[0].item() + split = "r" + else: # should never happen + return lineage + + lineage.append((parent, split, thresholds[parent], features[parent])) + if parent == 0: + return list(reversed(lineage)) + return recurse(left, right, parent, lineage) + + leaf_ids = np.where(left == -1)[0] # indices of all leaves + for path_id, leaf in enumerate(leaf_ids): + clause = [] + + for node in recurse(left, right, leaf): + if not isinstance(node, tuple): # skip the leaf marker + continue + + direction, threshold, feature = node[1], node[2], node[3] + if direction == "l": + clause.append( + {"feature": feature, "operation": "<=", "value": threshold} + ) + else: + clause.append( + {"feature": feature, "operation": ">", "value": threshold} + ) + + class_idx = int(np.argmax(value[leaf][0])) # use the leaf itself + data["paths"].append( + {"conditions": clause, "classification": class_idx, "id": path_id} + ) + + # collect all thresholds per feature + for i, feat in enumerate(features): + if tree.tree_.feature[i] != _tree.TREE_UNDEFINED: + data["features"].setdefault(feat, []).append(thresholds[i]) + + return data + + +class SetEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, set): + return list(obj) + return super().default(obj) + +# ---------------------------------------------------------------------- +# 3. load data +# ---------------------------------------------------------------------- +df = pd.read_csv(args.input) +X = df.iloc[:, :3].to_numpy() +Y = df.iloc[:, 3].to_numpy() + +print(f"dataset size: {len(X)}") + +# ---------------------------------------------------------------------- +# 4. train the tree +# ---------------------------------------------------------------------- +dt = DecisionTreeClassifier(max_depth=args.depth) +dt.fit(X, Y) +print("train accuracy (before nudging):", + accuracy_score(Y, dt.predict(X))) + +if args.nudge: + nudged_tree = copy.deepcopy(dt.tree_) + apply_nudging(nudged_tree, 0, args.bits) + dt.tree_ = nudged_tree + print(f"nudging enabled, removed bottom {args.bits} bit(s) per threshold") + + print("train accuracy (after nudging):", + accuracy_score(Y, dt.predict(X))) + +# ---------------------------------------------------------------------- +# 5. export +# ---------------------------------------------------------------------- +lineage = get_lineage(dt, df.columns[:3]) + +output_path = Path(args.output) +output_path.write_text(json.dumps(lineage, indent=4, cls=SetEncoder)) +print(f"Wrote tree to {output_path.resolve()}") diff --git a/run/print.py b/run/print.py new file mode 100644 index 0000000..2bb4340 --- /dev/null +++ b/run/print.py @@ -0,0 +1,7 @@ +import json +from pathlib import Path + +for file in Path("results/compressed_tree/").glob("*.json"): + with open(file, "r") as f: + s = json.load(f) + print(max(s["paths"])+1) \ No newline at end of file diff --git a/run/rmt.bat b/run/rmt.bat new file mode 100644 index 0000000..c36560f --- /dev/null +++ b/run/rmt.bat @@ -0,0 +1,36 @@ +@echo off +REM ------------------------------------------------------------- +REM Batch-script to evaluate all compressed trees with every mode +REM ------------------------------------------------------------- +setlocal EnableDelayedExpansion + +REM --- where the trees live and where to store results ---------- +set TREEDIR=results\compressed_tree +set OUTDIR=results\rmt + +REM --- python executable (adjust if needed) --------------------- +set PY=python + +REM --- which modes to run -------------------------------------- +set MODELIST=naive priority +REM ------------------------------------------------------------- + +if not exist "%OUTDIR%" mkdir "%OUTDIR%" + +for %%F in ("%TREEDIR%\*.json") do ( + REM strip path → get file name without extension + set BASE=%%~nF + + for %%M in (%MODELIST%) do ( + echo Processing %%~nxF with mode %%M + + "%PY%" tree_to_rmt.py ^ + --mode %%M ^ + --input "%%F" ^ + --output "%OUTDIR%\!BASE!_%%M.json" + + ) +) + +echo All runs complete. +pause diff --git a/run/rmt.txt b/run/rmt.txt new file mode 100644 index 0000000..f3f26fc --- /dev/null +++ b/run/rmt.txt @@ -0,0 +1,362 @@ +Processing compressed_tree_d10_b0.json with mode naive +Output written to results\rmt\compressed_tree_d10_b0_naive.json +TCAM bits: 30336 +RAM bits: 6888 +Processing compressed_tree_d10_b0.json with mode priority +Output written to results\rmt\compressed_tree_d10_b0_priority.json +TCAM bits: 26648 +RAM bits: 6888 +Processing compressed_tree_d10_b1.json with mode naive +Output written to results\rmt\compressed_tree_d10_b1_naive.json +TCAM bits: 29936 +RAM bits: 6531 +Processing compressed_tree_d10_b1.json with mode priority +Output written to results\rmt\compressed_tree_d10_b1_priority.json +TCAM bits: 27120 +RAM bits: 6531 +Processing compressed_tree_d10_b3.json with mode naive +Output written to results\rmt\compressed_tree_d10_b3_naive.json +TCAM bits: 21712 +RAM bits: 5649 +Processing compressed_tree_d10_b3.json with mode priority +Output written to results\rmt\compressed_tree_d10_b3_priority.json +TCAM bits: 20048 +RAM bits: 5649 +Processing compressed_tree_d11_b0.json with mode naive +Output written to results\rmt\compressed_tree_d11_b0_naive.json +TCAM bits: 41248 +RAM bits: 10332 +Processing compressed_tree_d11_b0.json with mode priority +Output written to results\rmt\compressed_tree_d11_b0_priority.json +TCAM bits: 37592 +RAM bits: 10332 +Processing compressed_tree_d11_b1.json with mode naive +Output written to results\rmt\compressed_tree_d11_b1_naive.json +TCAM bits: 41072 +RAM bits: 9744 +Processing compressed_tree_d11_b1.json with mode priority +Output written to results\rmt\compressed_tree_d11_b1_priority.json +TCAM bits: 38256 +RAM bits: 9744 +Processing compressed_tree_d11_b3.json with mode naive +Output written to results\rmt\compressed_tree_d11_b3_naive.json +TCAM bits: 28464 +RAM bits: 8190 +Processing compressed_tree_d11_b3.json with mode priority +Output written to results\rmt\compressed_tree_d11_b3_priority.json +TCAM bits: 26928 +RAM bits: 8190 +Processing compressed_tree_d12_b0.json with mode naive +Output written to results\rmt\compressed_tree_d12_b0_naive.json +TCAM bits: 55680 +RAM bits: 15393 +Processing compressed_tree_d12_b0.json with mode priority +Output written to results\rmt\compressed_tree_d12_b0_priority.json +TCAM bits: 51592 +RAM bits: 15393 +Processing compressed_tree_d12_b1.json with mode naive +Output written to results\rmt\compressed_tree_d12_b1_naive.json +TCAM bits: 54240 +RAM bits: 14175 +Processing compressed_tree_d12_b1.json with mode priority +Output written to results\rmt\compressed_tree_d12_b1_priority.json +TCAM bits: 51200 +RAM bits: 14175 +Processing compressed_tree_d12_b3.json with mode naive +Output written to results\rmt\compressed_tree_d12_b3_naive.json +TCAM bits: 36048 +RAM bits: 11361 +Processing compressed_tree_d12_b3.json with mode priority +Output written to results\rmt\compressed_tree_d12_b3_priority.json +TCAM bits: 34416 +RAM bits: 11361 +Processing compressed_tree_d13_b0.json with mode naive +Output written to results\rmt\compressed_tree_d13_b0_naive.json +TCAM bits: 73152 +RAM bits: 22680 +Processing compressed_tree_d13_b0.json with mode priority +Output written to results\rmt\compressed_tree_d13_b0_priority.json +TCAM bits: 69096 +RAM bits: 22680 +Processing compressed_tree_d13_b1.json with mode naive +Output written to results\rmt\compressed_tree_d13_b1_naive.json +TCAM bits: 71024 +RAM bits: 20643 +Processing compressed_tree_d13_b1.json with mode priority +Output written to results\rmt\compressed_tree_d13_b1_priority.json +TCAM bits: 68160 +RAM bits: 20643 +Processing compressed_tree_d13_b3.json with mode naive +Output written to results\rmt\compressed_tree_d13_b3_naive.json +TCAM bits: 45152 +RAM bits: 16002 +Processing compressed_tree_d13_b3.json with mode priority +Output written to results\rmt\compressed_tree_d13_b3_priority.json +TCAM bits: 43600 +RAM bits: 16002 +Processing compressed_tree_d14_b0.json with mode naive +Output written to results\rmt\compressed_tree_d14_b0_naive.json +TCAM bits: 95760 +RAM bits: 33012 +Processing compressed_tree_d14_b0.json with mode priority +Output written to results\rmt\compressed_tree_d14_b0_priority.json +TCAM bits: 91656 +RAM bits: 33012 +Processing compressed_tree_d14_b1.json with mode naive +Output written to results\rmt\compressed_tree_d14_b1_naive.json +TCAM bits: 93520 +RAM bits: 29862 +Processing compressed_tree_d14_b1.json with mode priority +Output written to results\rmt\compressed_tree_d14_b1_priority.json +TCAM bits: 90544 +RAM bits: 29862 +Processing compressed_tree_d14_b3.json with mode naive +Output written to results\rmt\compressed_tree_d14_b3_naive.json +TCAM bits: 56144 +RAM bits: 21819 +Processing compressed_tree_d14_b3.json with mode priority +Output written to results\rmt\compressed_tree_d14_b3_priority.json +TCAM bits: 54544 +RAM bits: 21819 +Processing compressed_tree_d15_b0.json with mode naive +Output written to results\rmt\compressed_tree_d15_b0_naive.json +TCAM bits: 122496 +RAM bits: 46662 +Processing compressed_tree_d15_b0.json with mode priority +Output written to results\rmt\compressed_tree_d15_b0_priority.json +TCAM bits: 118792 +RAM bits: 46662 +Processing compressed_tree_d15_b1.json with mode naive +Output written to results\rmt\compressed_tree_d15_b1_naive.json +TCAM bits: 118640 +RAM bits: 41349 +Processing compressed_tree_d15_b1.json with mode priority +Output written to results\rmt\compressed_tree_d15_b1_priority.json +TCAM bits: 115984 +RAM bits: 41349 +Processing compressed_tree_d15_b3.json with mode naive +Output written to results\rmt\compressed_tree_d15_b3_naive.json +TCAM bits: 68928 +RAM bits: 28875 +Processing compressed_tree_d15_b3.json with mode priority +Output written to results\rmt\compressed_tree_d15_b3_priority.json +TCAM bits: 67328 +RAM bits: 28875 +Processing compressed_tree_d1_b0.json with mode naive +Output written to results\rmt\compressed_tree_d1_b0_naive.json +TCAM bits: 256 +RAM bits: 42 +Processing compressed_tree_d1_b0.json with mode priority +Output written to results\rmt\compressed_tree_d1_b0_priority.json +TCAM bits: 128 +RAM bits: 42 +Processing compressed_tree_d1_b1.json with mode naive +Output written to results\rmt\compressed_tree_d1_b1_naive.json +TCAM bits: 256 +RAM bits: 42 +Processing compressed_tree_d1_b1.json with mode priority +Output written to results\rmt\compressed_tree_d1_b1_priority.json +TCAM bits: 144 +RAM bits: 42 +Processing compressed_tree_d1_b3.json with mode naive +Output written to results\rmt\compressed_tree_d1_b3_naive.json +TCAM bits: 240 +RAM bits: 42 +Processing compressed_tree_d1_b3.json with mode priority +Output written to results\rmt\compressed_tree_d1_b3_priority.json +TCAM bits: 128 +RAM bits: 42 +Processing compressed_tree_d2_b0.json with mode naive +Output written to results\rmt\compressed_tree_d2_b0_naive.json +TCAM bits: 592 +RAM bits: 105 +Processing compressed_tree_d2_b0.json with mode priority +Output written to results\rmt\compressed_tree_d2_b0_priority.json +TCAM bits: 288 +RAM bits: 105 +Processing compressed_tree_d2_b1.json with mode naive +Output written to results\rmt\compressed_tree_d2_b1_naive.json +TCAM bits: 592 +RAM bits: 105 +Processing compressed_tree_d2_b1.json with mode priority +Output written to results\rmt\compressed_tree_d2_b1_priority.json +TCAM bits: 320 +RAM bits: 105 +Processing compressed_tree_d2_b3.json with mode naive +Output written to results\rmt\compressed_tree_d2_b3_naive.json +TCAM bits: 544 +RAM bits: 105 +Processing compressed_tree_d2_b3.json with mode priority +Output written to results\rmt\compressed_tree_d2_b3_priority.json +TCAM bits: 288 +RAM bits: 105 +Processing compressed_tree_d3_b0.json with mode naive +Output written to results\rmt\compressed_tree_d3_b0_naive.json +TCAM bits: 1120 +RAM bits: 210 +Processing compressed_tree_d3_b0.json with mode priority +Output written to results\rmt\compressed_tree_d3_b0_priority.json +TCAM bits: 640 +RAM bits: 210 +Processing compressed_tree_d3_b1.json with mode naive +Output written to results\rmt\compressed_tree_d3_b1_naive.json +TCAM bits: 1120 +RAM bits: 210 +Processing compressed_tree_d3_b1.json with mode priority +Output written to results\rmt\compressed_tree_d3_b1_priority.json +TCAM bits: 680 +RAM bits: 210 +Processing compressed_tree_d3_b3.json with mode naive +Output written to results\rmt\compressed_tree_d3_b3_naive.json +TCAM bits: 944 +RAM bits: 210 +Processing compressed_tree_d3_b3.json with mode priority +Output written to results\rmt\compressed_tree_d3_b3_priority.json +TCAM bits: 576 +RAM bits: 210 +Processing compressed_tree_d4_b0.json with mode naive +Output written to results\rmt\compressed_tree_d4_b0_naive.json +TCAM bits: 1880 +RAM bits: 357 +Processing compressed_tree_d4_b0.json with mode priority +Output written to results\rmt\compressed_tree_d4_b0_priority.json +TCAM bits: 1128 +RAM bits: 357 +Processing compressed_tree_d4_b1.json with mode naive +Output written to results\rmt\compressed_tree_d4_b1_naive.json +TCAM bits: 1880 +RAM bits: 357 +Processing compressed_tree_d4_b1.json with mode priority +Output written to results\rmt\compressed_tree_d4_b1_priority.json +TCAM bits: 1208 +RAM bits: 357 +Processing compressed_tree_d4_b3.json with mode naive +Output written to results\rmt\compressed_tree_d4_b3_naive.json +TCAM bits: 1632 +RAM bits: 336 +Processing compressed_tree_d4_b3.json with mode priority +Output written to results\rmt\compressed_tree_d4_b3_priority.json +TCAM bits: 1024 +RAM bits: 336 +Processing compressed_tree_d5_b0.json with mode naive +Output written to results\rmt\compressed_tree_d5_b0_naive.json +TCAM bits: 3608 +RAM bits: 609 +Processing compressed_tree_d5_b0.json with mode priority +Output written to results\rmt\compressed_tree_d5_b0_priority.json +TCAM bits: 2200 +RAM bits: 609 +Processing compressed_tree_d5_b1.json with mode naive +Output written to results\rmt\compressed_tree_d5_b1_naive.json +TCAM bits: 3608 +RAM bits: 609 +Processing compressed_tree_d5_b1.json with mode priority +Output written to results\rmt\compressed_tree_d5_b1_priority.json +TCAM bits: 2376 +RAM bits: 609 +Processing compressed_tree_d5_b3.json with mode naive +Output written to results\rmt\compressed_tree_d5_b3_naive.json +TCAM bits: 2704 +RAM bits: 546 +Processing compressed_tree_d5_b3.json with mode priority +Output written to results\rmt\compressed_tree_d5_b3_priority.json +TCAM bits: 1824 +RAM bits: 546 +Processing compressed_tree_d6_b0.json with mode naive +Output written to results\rmt\compressed_tree_d6_b0_naive.json +TCAM bits: 6440 +RAM bits: 1134 +Processing compressed_tree_d6_b0.json with mode priority +Output written to results\rmt\compressed_tree_d6_b0_priority.json +TCAM bits: 4512 +RAM bits: 1134 +Processing compressed_tree_d6_b1.json with mode naive +Output written to results\rmt\compressed_tree_d6_b1_naive.json +TCAM bits: 6440 +RAM bits: 1134 +Processing compressed_tree_d6_b1.json with mode priority +Output written to results\rmt\compressed_tree_d6_b1_priority.json +TCAM bits: 4776 +RAM bits: 1134 +Processing compressed_tree_d6_b3.json with mode naive +Output written to results\rmt\compressed_tree_d6_b3_naive.json +TCAM bits: 4832 +RAM bits: 1008 +Processing compressed_tree_d6_b3.json with mode priority +Output written to results\rmt\compressed_tree_d6_b3_priority.json +TCAM bits: 3648 +RAM bits: 1008 +Processing compressed_tree_d7_b0.json with mode naive +Output written to results\rmt\compressed_tree_d7_b0_naive.json +TCAM bits: 10344 +RAM bits: 1848 +Processing compressed_tree_d7_b0.json with mode priority +Output written to results\rmt\compressed_tree_d7_b0_priority.json +TCAM bits: 7808 +RAM bits: 1848 +Processing compressed_tree_d7_b1.json with mode naive +Output written to results\rmt\compressed_tree_d7_b1_naive.json +TCAM bits: 10312 +RAM bits: 1806 +Processing compressed_tree_d7_b1.json with mode priority +Output written to results\rmt\compressed_tree_d7_b1_priority.json +TCAM bits: 8136 +RAM bits: 1806 +Processing compressed_tree_d7_b3.json with mode naive +Output written to results\rmt\compressed_tree_d7_b3_naive.json +TCAM bits: 7760 +RAM bits: 1596 +Processing compressed_tree_d7_b3.json with mode priority +Output written to results\rmt\compressed_tree_d7_b3_priority.json +TCAM bits: 6352 +RAM bits: 1596 +Processing compressed_tree_d8_b0.json with mode naive +Output written to results\rmt\compressed_tree_d8_b0_naive.json +TCAM bits: 15672 +RAM bits: 3003 +Processing compressed_tree_d8_b0.json with mode priority +Output written to results\rmt\compressed_tree_d8_b0_priority.json +TCAM bits: 12640 +RAM bits: 3003 +Processing compressed_tree_d8_b1.json with mode naive +Output written to results\rmt\compressed_tree_d8_b1_naive.json +TCAM bits: 15576 +RAM bits: 2919 +Processing compressed_tree_d8_b1.json with mode priority +Output written to results\rmt\compressed_tree_d8_b1_priority.json +TCAM bits: 13160 +RAM bits: 2919 +Processing compressed_tree_d8_b3.json with mode naive +Output written to results\rmt\compressed_tree_d8_b3_naive.json +TCAM bits: 11504 +RAM bits: 2625 +Processing compressed_tree_d8_b3.json with mode priority +Output written to results\rmt\compressed_tree_d8_b3_priority.json +TCAM bits: 10016 +RAM bits: 2625 +Processing compressed_tree_d9_b0.json with mode naive +Output written to results\rmt\compressed_tree_d9_b0_naive.json +TCAM bits: 22640 +RAM bits: 4662 +Processing compressed_tree_d9_b0.json with mode priority +Output written to results\rmt\compressed_tree_d9_b0_priority.json +TCAM bits: 18936 +RAM bits: 4662 +Processing compressed_tree_d9_b1.json with mode naive +Output written to results\rmt\compressed_tree_d9_b1_naive.json +TCAM bits: 22784 +RAM bits: 4557 +Processing compressed_tree_d9_b1.json with mode priority +Output written to results\rmt\compressed_tree_d9_b1_priority.json +TCAM bits: 19872 +RAM bits: 4557 +Processing compressed_tree_d9_b3.json with mode naive +Output written to results\rmt\compressed_tree_d9_b3_naive.json +TCAM bits: 16560 +RAM bits: 3948 +Processing compressed_tree_d9_b3.json with mode priority +Output written to results\rmt\compressed_tree_d9_b3_priority.json +TCAM bits: 14880 +RAM bits: 3948 +All runs complete. +Press any key to continue . . . diff --git a/run/run.bat b/run/run.bat new file mode 100644 index 0000000..a57a43d --- /dev/null +++ b/run/run.bat @@ -0,0 +1,24 @@ +@echo off +REM --- settings -------------------------------------------------------- +set INPUT=..\data\combined\data.csv +set OUTDIR=results\tree +set DEPTH_LIST=1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +set BITS_LIST=0 1 3 +set PY=python +REM --------------------------------------------------------------------- + +if not exist "%OUTDIR%" mkdir "%OUTDIR%" + +for %%D in (%DEPTH_LIST%) do ( + for %%B in (%BITS_LIST%) do ( + echo Running depth=%%D bits=%%B + %PY% decision_tree.py ^ + --input "%INPUT%" ^ + --output "%OUTDIR%\tree_d%%D_b%%B.json" ^ + --depth %%D ^ + --nudge --bits %%B + ) +) + +echo All runs complete +pause diff --git a/run/run.txt b/run/run.txt new file mode 100644 index 0000000..dc942e5 --- /dev/null +++ b/run/run.txt @@ -0,0 +1,272 @@ +Running depth=1 bits=0 +dataset size: 7681108 +train accuracy (before nudging): 0.6249802762830571 +nudging enabled, removed bottom 0 bit(s) per threshold +train accuracy (after nudging): 0.6249802762830571 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d1_b0.json +Running depth=1 bits=1 +dataset size: 7681108 +train accuracy (before nudging): 0.6249802762830571 +nudging enabled, removed bottom 1 bit(s) per threshold +train accuracy (after nudging): 0.6249802762830571 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d1_b1.json +Running depth=1 bits=3 +dataset size: 7681108 +train accuracy (before nudging): 0.6249802762830571 +nudging enabled, removed bottom 3 bit(s) per threshold +train accuracy (after nudging): 0.6249802762830571 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d1_b3.json +Running depth=2 bits=0 +dataset size: 7681108 +train accuracy (before nudging): 0.6329657127591488 +nudging enabled, removed bottom 0 bit(s) per threshold +train accuracy (after nudging): 0.6329657127591488 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d2_b0.json +Running depth=2 bits=1 +dataset size: 7681108 +train accuracy (before nudging): 0.6329657127591488 +nudging enabled, removed bottom 1 bit(s) per threshold +train accuracy (after nudging): 0.632965582569598 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d2_b1.json +Running depth=2 bits=3 +dataset size: 7681108 +train accuracy (before nudging): 0.6329657127591488 +nudging enabled, removed bottom 3 bit(s) per threshold +train accuracy (after nudging): 0.632991490290203 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d2_b3.json +Running depth=3 bits=0 +dataset size: 7681108 +train accuracy (before nudging): 0.6770542739406867 +nudging enabled, removed bottom 0 bit(s) per threshold +train accuracy (after nudging): 0.6770542739406867 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d3_b0.json +Running depth=3 bits=1 +dataset size: 7681108 +train accuracy (before nudging): 0.6770542739406867 +nudging enabled, removed bottom 1 bit(s) per threshold +train accuracy (after nudging): 0.6770412549856089 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d3_b1.json +Running depth=3 bits=3 +dataset size: 7681108 +train accuracy (before nudging): 0.6770542739406867 +nudging enabled, removed bottom 3 bit(s) per threshold +train accuracy (after nudging): 0.6785083610333301 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d3_b3.json +Running depth=4 bits=0 +dataset size: 7681108 +train accuracy (before nudging): 0.7785798611346175 +nudging enabled, removed bottom 0 bit(s) per threshold +train accuracy (after nudging): 0.7785798611346175 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d4_b0.json +Running depth=4 bits=1 +dataset size: 7681108 +train accuracy (before nudging): 0.7785798611346175 +nudging enabled, removed bottom 1 bit(s) per threshold +train accuracy (after nudging): 0.7762147075656273 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d4_b1.json +Running depth=4 bits=3 +dataset size: 7681108 +train accuracy (before nudging): 0.7785798611346175 +nudging enabled, removed bottom 3 bit(s) per threshold +train accuracy (after nudging): 0.7764365505601536 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d4_b3.json +Running depth=5 bits=0 +dataset size: 7681108 +train accuracy (before nudging): 0.8410252791654538 +nudging enabled, removed bottom 0 bit(s) per threshold +train accuracy (after nudging): 0.8410252791654538 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d5_b0.json +Running depth=5 bits=1 +dataset size: 7681108 +train accuracy (before nudging): 0.8410252791654538 +nudging enabled, removed bottom 1 bit(s) per threshold +train accuracy (after nudging): 0.834092425207405 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d5_b1.json +Running depth=5 bits=3 +dataset size: 7681108 +train accuracy (before nudging): 0.8410252791654538 +nudging enabled, removed bottom 3 bit(s) per threshold +train accuracy (after nudging): 0.772544924508287 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d5_b3.json +Running depth=6 bits=0 +dataset size: 7681108 +train accuracy (before nudging): 0.8646269522574087 +nudging enabled, removed bottom 0 bit(s) per threshold +train accuracy (after nudging): 0.8646269522574087 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d6_b0.json +Running depth=6 bits=1 +dataset size: 7681108 +train accuracy (before nudging): 0.8646269522574087 +nudging enabled, removed bottom 1 bit(s) per threshold +train accuracy (after nudging): 0.8576925360247506 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d6_b1.json +Running depth=6 bits=3 +dataset size: 7681108 +train accuracy (before nudging): 0.8646269522574087 +nudging enabled, removed bottom 3 bit(s) per threshold +train accuracy (after nudging): 0.794651761178205 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d6_b3.json +Running depth=7 bits=0 +dataset size: 7681108 +train accuracy (before nudging): 0.8806056365826389 +nudging enabled, removed bottom 0 bit(s) per threshold +train accuracy (after nudging): 0.8806056365826389 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d7_b0.json +Running depth=7 bits=1 +dataset size: 7681108 +train accuracy (before nudging): 0.8806056365826389 +nudging enabled, removed bottom 1 bit(s) per threshold +train accuracy (after nudging): 0.8736095105029118 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d7_b1.json +Running depth=7 bits=3 +dataset size: 7681108 +train accuracy (before nudging): 0.8806056365826389 +nudging enabled, removed bottom 3 bit(s) per threshold +train accuracy (after nudging): 0.7695685309983924 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d7_b3.json +Running depth=8 bits=0 +dataset size: 7681108 +train accuracy (before nudging): 0.8930218140403702 +nudging enabled, removed bottom 0 bit(s) per threshold +train accuracy (after nudging): 0.8930218140403702 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d8_b0.json +Running depth=8 bits=1 +dataset size: 7681108 +train accuracy (before nudging): 0.8930218140403702 +nudging enabled, removed bottom 1 bit(s) per threshold +train accuracy (after nudging): 0.8853817704424934 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d8_b1.json +Running depth=8 bits=3 +dataset size: 7681108 +train accuracy (before nudging): 0.8930218140403702 +nudging enabled, removed bottom 3 bit(s) per threshold +train accuracy (after nudging): 0.7773965683075931 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d8_b3.json +Running depth=9 bits=0 +dataset size: 7681108 +train accuracy (before nudging): 0.9065990219119429 +nudging enabled, removed bottom 0 bit(s) per threshold +train accuracy (after nudging): 0.9065990219119429 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d9_b0.json +Running depth=9 bits=1 +dataset size: 7681108 +train accuracy (before nudging): 0.9065990219119429 +nudging enabled, removed bottom 1 bit(s) per threshold +train accuracy (after nudging): 0.8971600191014109 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d9_b1.json +Running depth=9 bits=3 +dataset size: 7681108 +train accuracy (before nudging): 0.9065990219119429 +nudging enabled, removed bottom 3 bit(s) per threshold +train accuracy (after nudging): 0.7901483744272311 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d9_b3.json +Running depth=10 bits=0 +dataset size: 7681108 +train accuracy (before nudging): 0.9131070673658019 +nudging enabled, removed bottom 0 bit(s) per threshold +train accuracy (after nudging): 0.9131070673658019 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d10_b0.json +Running depth=10 bits=1 +dataset size: 7681108 +train accuracy (before nudging): 0.9131070673658019 +nudging enabled, removed bottom 1 bit(s) per threshold +train accuracy (after nudging): 0.9012124292484887 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d10_b1.json +Running depth=10 bits=3 +dataset size: 7681108 +train accuracy (before nudging): 0.9131070673658019 +nudging enabled, removed bottom 3 bit(s) per threshold +train accuracy (after nudging): 0.7823837394292594 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d10_b3.json +Running depth=11 bits=0 +dataset size: 7681108 +train accuracy (before nudging): 0.9167131877328115 +nudging enabled, removed bottom 0 bit(s) per threshold +train accuracy (after nudging): 0.9167131877328115 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d11_b0.json +Running depth=11 bits=1 +dataset size: 7681108 +train accuracy (before nudging): 0.9167131877328115 +nudging enabled, removed bottom 1 bit(s) per threshold +train accuracy (after nudging): 0.9033505322409215 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d11_b1.json +Running depth=11 bits=3 +dataset size: 7681108 +train accuracy (before nudging): 0.9167131877328115 +nudging enabled, removed bottom 3 bit(s) per threshold +train accuracy (after nudging): 0.7834850128392935 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d11_b3.json +Running depth=12 bits=0 +dataset size: 7681108 +train accuracy (before nudging): 0.9190772997853955 +nudging enabled, removed bottom 0 bit(s) per threshold +train accuracy (after nudging): 0.9190772997853955 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d12_b0.json +Running depth=12 bits=1 +dataset size: 7681108 +train accuracy (before nudging): 0.9190772997853955 +nudging enabled, removed bottom 1 bit(s) per threshold +train accuracy (after nudging): 0.9050692946902973 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d12_b1.json +Running depth=12 bits=3 +dataset size: 7681108 +train accuracy (before nudging): 0.9190772997853955 +nudging enabled, removed bottom 3 bit(s) per threshold +train accuracy (after nudging): 0.7733082258445005 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d12_b3.json +Running depth=13 bits=0 +dataset size: 7681108 +train accuracy (before nudging): 0.9210431620021486 +nudging enabled, removed bottom 0 bit(s) per threshold +train accuracy (after nudging): 0.9210431620021486 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d13_b0.json +Running depth=13 bits=1 +dataset size: 7681108 +train accuracy (before nudging): 0.9210431620021486 +nudging enabled, removed bottom 1 bit(s) per threshold +train accuracy (after nudging): 0.9069113466442602 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d13_b1.json +Running depth=13 bits=3 +dataset size: 7681108 +train accuracy (before nudging): 0.9210431620021486 +nudging enabled, removed bottom 3 bit(s) per threshold +train accuracy (after nudging): 0.7656775558942799 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d13_b3.json +Running depth=14 bits=0 +dataset size: 7681108 +train accuracy (before nudging): 0.9232170671210456 +nudging enabled, removed bottom 0 bit(s) per threshold +train accuracy (after nudging): 0.9232170671210456 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d14_b0.json +Running depth=14 bits=1 +dataset size: 7681108 +train accuracy (before nudging): 0.9232169369314948 +nudging enabled, removed bottom 1 bit(s) per threshold +train accuracy (after nudging): 0.9071005120615411 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d14_b1.json +Running depth=14 bits=3 +dataset size: 7681108 +train accuracy (before nudging): 0.9232170671210456 +nudging enabled, removed bottom 3 bit(s) per threshold +train accuracy (after nudging): 0.7649352150757417 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d14_b3.json +Running depth=15 bits=0 +dataset size: 7681108 +train accuracy (before nudging): 0.9249752770043072 +nudging enabled, removed bottom 0 bit(s) per threshold +train accuracy (after nudging): 0.9249752770043072 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d15_b0.json +Running depth=15 bits=1 +dataset size: 7681108 +train accuracy (before nudging): 0.9249752770043072 +nudging enabled, removed bottom 1 bit(s) per threshold +train accuracy (after nudging): 0.908089692268355 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d15_b1.json +Running depth=15 bits=3 +dataset size: 7681108 +train accuracy (before nudging): 0.9249752770043072 +nudging enabled, removed bottom 3 bit(s) per threshold +train accuracy (after nudging): 0.762985496363285 +Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d15_b3.json +All runs complete +Press any key to continue . . . diff --git a/run/tree_compress.py b/run/tree_compress.py new file mode 100644 index 0000000..3d52de8 --- /dev/null +++ b/run/tree_compress.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +"""Batch‑compress decision‑tree JSON files. + +This script preserves the original logic but loops over every *.json file +in results/tree and drops a corresponding compressed file in +results/compressed_tree. + +Example: + $ python compress_trees_batch.py +""" + +from __future__ import annotations + +import json +import math +import os +from collections import defaultdict +from pathlib import Path + +INPUT_DIR = Path("results/tree") +OUTPUT_DIR = Path("results/compressed_tree") +OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + + +class SetEncoder(json.JSONEncoder): + def default(self, obj): # type: ignore[override] + if isinstance(obj, set): + return list(obj) + return super().default(obj) + + +# helper function given a range and value x returns if x is in the range + +def is_in_range(x: int, lower: int | None, upper: int | None) -> bool: # noqa: N803 + if lower is None and upper is None: + return True + if lower is None: + return x <= upper # type: ignore[operator] + if upper is None: + return x > lower + return x <= upper and x > lower # type: ignore[operator] + + +for tree_path in INPUT_DIR.glob("*.json"): + with tree_path.open() as f: + tree = json.load(f) + + paths = tree["paths"] + + # First cleanup the tree by rounding the decision points to integer values + path_ids: set[int] = set() + path_classes = tree["classes"] + + # assign ids and round thresholds + for idx, path in enumerate(paths): + path["id"] = idx + path_ids.add(idx) + for condition in path["conditions"]: + operation = condition["operation"] + if operation == "<=": + condition["value"] = math.floor(condition["value"]) + else: + condition["value"] = math.floor(condition["value"]) + + # Find all breakpoints for each feature and create a set of disjoint ranges + breakpoints: dict[str, list[int]] = defaultdict(set) # type: ignore[assignment] + for path in paths: + for condition in path["conditions"]: + feature = condition["feature"] + value = condition["value"] + breakpoints[feature].add(value) + + # sort breakpoint lists + for feature in breakpoints: + points = list(breakpoints[feature]) + points.sort() + breakpoints[feature] = points # type: ignore[assignment] + + # collapse all paths to ranges for each feature + for path in paths: + compressed: dict[str, dict[str, int | None]] = {} + for feature in breakpoints: + compressed[feature] = {"min": None, "max": None} + + for condition in path["conditions"]: + feature = condition["feature"] + operation = condition["operation"] + value = condition["value"] + if operation == "<=" and compressed[feature]["max"] is None: + compressed[feature]["max"] = value + elif operation == ">" and compressed[feature]["min"] is None: + compressed[feature]["min"] = value + elif operation == "<=" and value < compressed[feature]["max"]: # type: ignore[operator] + compressed[feature]["max"] = value + elif operation == ">" and value > compressed[feature]["min"]: # type: ignore[operator] + compressed[feature]["min"] = value + + path["compressed"] = compressed + + # create buckets for each feature, where each is a list of sets + buckets_id: dict[str, list[set[int]]] = {} + buckets_class: dict[str, list[set[str]]] = {} + for feature in breakpoints: + num_points = len(breakpoints[feature]) + buckets_id[feature] = [set() for _ in range(num_points + 1)] + buckets_class[feature] = [set() for _ in range(num_points + 1)] + + # fill buckets + for path in paths: + for feature_name, feature in path["compressed"].items(): + lower = feature["min"] + upper = feature["max"] + pid = path["id"] + cls = path["classification"] + + for idx, bp in enumerate(breakpoints[feature_name]): + if is_in_range(bp, lower, upper): + buckets_id[feature_name][idx].add(pid) + buckets_class[feature_name][idx].add(cls) + # last bucket (> last breakpoint) + if is_in_range(bp + 1, lower, upper): + buckets_id[feature_name][-1].add(pid) + buckets_class[feature_name][-1].add(cls) + + # combine breakpoints and buckets to one representation + compressed_layers: dict[str, list[dict[str, object]]] = defaultdict(list) + for feature_name in buckets_id: + lower = None + upper = breakpoints[feature_name][0] + compressed_layers[feature_name].append( + { + "min": lower, + "max": upper, + "paths": buckets_id[feature_name][0], + "classes": buckets_class[feature_name][0], + } + ) + for i in range(1, len(buckets_id[feature_name]) - 1): + lower = breakpoints[feature_name][i - 1] + upper = breakpoints[feature_name][i] + compressed_layers[feature_name].append( + { + "min": lower, + "max": upper, + "paths": buckets_id[feature_name][i], + "classes": buckets_class[feature_name][i], + } + ) + lower = breakpoints[feature_name][-1] + upper = None + compressed_layers[feature_name].append( + { + "min": lower, + "max": upper, + "paths": buckets_id[feature_name][-1], + "classes": buckets_class[feature_name][-1], + } + ) + + path_to_class = {path["id"]: path["classification"] for path in paths} + + compressed_tree = { + "paths": list(path_ids), + "classes": path_classes, + "layers": compressed_layers, + "path_to_class": path_to_class, + } + + out_path = OUTPUT_DIR / tree_path.name.replace("tree", "compressed_tree") + with out_path.open("w") as f_out: + json.dump(compressed_tree, f_out, indent=4, cls=SetEncoder) + + # print(f"Wrote {out_path.relative_to(Path.cwd())}") diff --git a/run/tree_to_rmt.py b/run/tree_to_rmt.py new file mode 100644 index 0000000..f2660b0 --- /dev/null +++ b/run/tree_to_rmt.py @@ -0,0 +1,279 @@ +#!/usr/bin/env python3 +"""Range‑to‑Prefix evaluation tool + +This script keeps the original logic intact while letting you choose +which expansion strategy to run via a command‑line flag. + +Example: + $ python rmt_selectable.py --mode naive + $ python rmt_selectable.py --mode priority --input mytree.json --output result.json +""" + +import argparse +import json +import math +import sys +from pathlib import Path + +# --------------------------------------------------------------------------- +# Static configuration +# --------------------------------------------------------------------------- +field_width = { + "src": 16, + "dst": 16, + "protocol": 8, +} + +# --------------------------------------------------------------------------- +# Helper routines (unchanged) +# --------------------------------------------------------------------------- + +def int_to_bin(i, width): + return bin(i)[2:].zfill(width) + + +def increment_dc(pfx): + idx = pfx.find("*") + if idx == -1: + idx = len(pfx) + idx -= 1 + return pfx[:idx] + "*" + pfx[idx + 1 :] + + +def can_merge(pfx_a, pfx_b): + pfx_a = pfx_a.replace("*", "") + pfx_b = pfx_b.replace("*", "") + return pfx_a[:-1] == pfx_b[:-1] and pfx_a[-1] != pfx_b[-1] + + +def merge(pfx_a, prefixes): + pfx_a = increment_dc(pfx_a) + prefixes[-1] = pfx_a + + for i in range(len(prefixes) - 2, -1, -1): + if can_merge(prefixes[i], prefixes[i + 1]): + prefixes.pop() + pfx = increment_dc(prefixes[i]) + prefixes[i] = pfx + + +def convert_range(lower, upper, width): + prefixes = [] + prefix = int_to_bin(lower, width) + prefixes.append(prefix) + norm_upper = min(upper, 2 ** width - 1) + for i in range(lower + 1, norm_upper + 1): + prefix = int_to_bin(i, width) + if can_merge(prefix, prefixes[-1]): + merge(prefix, prefixes) + else: + prefixes.append(prefix) + return prefixes + +# --------------------------------------------------------------------------- +# RMT construction strategies (logic preserved) +# --------------------------------------------------------------------------- + +def worst_case_rmt(tree): + rmt = [] + step = 0 + + tcam_bits = 0 + ram_bits = 0 + + for layer in layers: + num_ranges = len(layers[layer]) + # assume that each range requires all of 2*k prefixes when performing prefix expansion + # therefore there are 2*k * R for R ranges and width k + num_prefixes = 2 * field_width[layer] * num_ranges + prefix_width = field_width[layer] + + tcam = { + "id": f"{layer}_range", + "step": step, + "match": "ternary", + "entries": num_prefixes, + "key_size": prefix_width, + } + tcam_bits += num_prefixes * prefix_width + + # assume basic pointer reuse for metadata storage + ram = { + "id": f"{layer}_meta", + "step": step, + "match": "exact", + "method": "index", + "key_size": math.ceil(math.log2(num_ranges)), + "data_size": len(classes), + } + ram_bits += num_ranges * len(classes) + + rmt.append(tcam) + rmt.append(ram) + + step += 1 + + return rmt, tcam_bits, ram_bits + + +def naive_rmt(tree): + rmt = [] + step = 0 + + tcam_bits = 0 + ram_bits = 0 + + for layer in layers: + num_prefixes = 0 + prefix_width = field_width[layer] + # for each range in the layer, convert the ranges to prefixes using naive range expansion + for r in layers[layer]: + if r["min"] is None: + r["min"] = 0 + elif r["max"] is None: + r["max"] = 2 ** prefix_width + prefixes = convert_range(r["min"], r["max"], prefix_width) + r["prefixes"] = prefixes + num_prefixes += len(prefixes) + tcam_bits += len(prefixes) * prefix_width + + tcam = { + "id": f"{layer}_range", + "step": step, + "match": "ternary", + "entries": num_prefixes, + "key_size": prefix_width, + "ranges": layers[layer], + } + + num_ranges = len(layers[layer]) + # assume no pointer reuse for metadata storage + ram = { + "id": f"{layer}_meta", + "step": step, + "match": "exact", + "method": "index", + "key_size": math.ceil(math.log2(num_ranges)), + "data_size": len(classes), + } + ram_bits += num_ranges * len(classes) + + rmt.append(tcam) + rmt.append(ram) + + step += 1 + + return rmt, tcam_bits, ram_bits + + +def priority_aware(tree): + rmt = [] + step = 0 + + tcam_bits = 0 + ram_bits = 0 + + for layer in layers: + num_prefixes = 0 + prefix_width = field_width[layer] + # for each range, run the regular prefix expansion, and also the prefix expansion setting the minimum to 0 + # then check which set of prefixes would be better + # we will assume the ranges are already disjoint and in the correct order + for r in layers[layer]: + if r["min"] is None: + r["min"] = 0 + elif r["max"] is None: + r["max"] = 2 ** prefix_width + regular_prefixes = convert_range(r["min"], r["max"], prefix_width) + zero_start_prefixes = convert_range(0, r["max"], prefix_width) + + if len(regular_prefixes) <= len(zero_start_prefixes): + pfx_type = "exact" + prefixes = regular_prefixes + else: + pfx_type = "zero" + prefixes = zero_start_prefixes + + r["prefixes"] = prefixes + r["prefix_type"] = pfx_type + num_prefixes += len(prefixes) + tcam_bits += len(prefixes) * prefix_width + + tcam = { + "id": f"{layer}_range", + "step": step, + "match": "ternary", + "entries": num_prefixes, + "key_size": prefix_width, + "ranges": layers[layer], + } + + num_ranges = len(layers[layer]) + # assume no pointer reuse for metadata storage + ram = { + "id": f"{layer}_meta", + "step": step, + "match": "exact", + "method": "index", + "key_size": math.ceil(math.log2(num_ranges)), + "data_size": len(classes), + } + ram_bits += num_ranges * len(classes) + + rmt.append(tcam) + rmt.append(ram) + + step += 1 + + return rmt, tcam_bits, ram_bits + +# --------------------------------------------------------------------------- +# Main entry point +# --------------------------------------------------------------------------- + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Evaluate RMT memory usage for different range‑to‑prefix strategies.") + parser.add_argument("--mode", choices=["worst", "naive", "priority"], default="worst", help="Strategy to use") + parser.add_argument("--input", default="compressed_tree.json", help="Input tree JSON file") + parser.add_argument("--output", default=None, help="Output RMT JSON file (defaults to _rmt.json)") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + + # Keep the original variable names so the functions stay unchanged + global layers, classes + + try: + with open(args.input) as f: + tree = json.load(f) + except FileNotFoundError: + sys.exit(f"Input file '{args.input}' not found.") + + layers = tree["layers"] + classes = tree["classes"] + + if args.mode == "worst": + rmt, tcam_bits, ram_bits = worst_case_rmt(tree) + default_out = "worst_case_rmt.json" + elif args.mode == "naive": + rmt, tcam_bits, ram_bits = naive_rmt(tree) + default_out = "naive_rmt.json" + else: # priority + rmt, tcam_bits, ram_bits = priority_aware(tree) + default_out = "priority_aware.json" + + out_file = args.output or default_out + + with open(out_file, "w") as f: + json.dump(rmt, f, indent=4) + + #! command python3 ideal-rmt-simulator/sim.py {out_file} + print(f"Output written to {out_file}") + print(f"TCAM bits: {tcam_bits}") + print(f"RAM bits: {ram_bits}") + + +if __name__ == "__main__": + main()