mirror of
https://github.com/ltcptgeneral/IdealRMT-DecisionTrees.git
synced 2025-09-06 15:27:23 +00:00
Run data and code
This commit is contained in:
168
run/decision_tree.py
Normal file
168
run/decision_tree.py
Normal file
@@ -0,0 +1,168 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Train a decision tree, optionally “nudge” its split thresholds, and
|
||||
export the result as JSON.
|
||||
|
||||
Usage examples
|
||||
--------------
|
||||
# plain training, no nudging
|
||||
python build_tree.py --input data/combined/data.csv --output tree.json
|
||||
|
||||
# nudge every internal threshold, keeping only the top-2 bits
|
||||
python build_tree.py --input data/combined/data.csv --output tree.json \
|
||||
--nudge --bits 2
|
||||
"""
|
||||
import argparse
|
||||
import copy
|
||||
import json
|
||||
import math
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.metrics import accuracy_score
|
||||
from sklearn.tree import DecisionTreeClassifier, _tree
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# 1. command-line arguments
|
||||
# ----------------------------------------------------------------------
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--input", "-i", help="CSV file with protocol,src,dst,label", default="../data/combined/data.csv")
|
||||
parser.add_argument("--output", "-o", help="Path for the exported JSON tree", default="tree.json")
|
||||
parser.add_argument("--depth", "-d", type=int, default=5,
|
||||
help="Max depth of the decision tree (default: 5)")
|
||||
parser.add_argument("--nudge", action="store_true",
|
||||
help="Enable threshold nudging")
|
||||
parser.add_argument("--bits", type=int, default=2,
|
||||
help="Number of bits to keep when nudging (default: 2)")
|
||||
args = parser.parse_args()
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# 2. helper functions
|
||||
# ----------------------------------------------------------------------
|
||||
def nudge_threshold_max_n_bits(threshold: float, n_bits: int) -> int:
|
||||
"""Remove n bits from each"""
|
||||
threshold = math.floor(threshold)
|
||||
if n_bits == 0:
|
||||
return threshold
|
||||
|
||||
mask = pow(2, 32) - 1 ^ ((1 << n_bits) - 1)
|
||||
nudged_value = threshold & mask
|
||||
if threshold & (1 << (n_bits - 1)):
|
||||
nudged_value += (1 << (n_bits))
|
||||
|
||||
return nudged_value
|
||||
|
||||
def apply_nudging(tree: _tree.Tree, node_idx: int, n_bits: int) -> None:
|
||||
"""Post-order traversal that nudges every internal node’s threshold."""
|
||||
flag = False
|
||||
if tree.children_left[node_idx] != -1:
|
||||
apply_nudging(tree, tree.children_left[node_idx], n_bits)
|
||||
flag = True
|
||||
if tree.children_right[node_idx] != -1:
|
||||
apply_nudging(tree, tree.children_right[node_idx], n_bits)
|
||||
flag = True
|
||||
if flag: # internal node
|
||||
tree.threshold[node_idx] = nudge_threshold_max_n_bits(
|
||||
tree.threshold[node_idx], n_bits
|
||||
)
|
||||
|
||||
# output the tree
|
||||
def get_lineage(tree, feature_names):
|
||||
data = {"features": {}, "paths": [], "classes": list(tree.classes_)}
|
||||
|
||||
thresholds = tree.tree_.threshold
|
||||
features = [feature_names[i] for i in tree.tree_.feature]
|
||||
left = tree.tree_.children_left
|
||||
right = tree.tree_.children_right
|
||||
value = tree.tree_.value
|
||||
|
||||
# -------- helper to climb up from a leaf to the root -----------
|
||||
def recurse(left, right, child, lineage=None):
|
||||
if lineage is None:
|
||||
lineage = [child] # leaf marker (an int)
|
||||
if child in left:
|
||||
parent = np.where(left == child)[0].item()
|
||||
split = "l"
|
||||
elif child in right:
|
||||
parent = np.where(right == child)[0].item()
|
||||
split = "r"
|
||||
else: # should never happen
|
||||
return lineage
|
||||
|
||||
lineage.append((parent, split, thresholds[parent], features[parent]))
|
||||
if parent == 0:
|
||||
return list(reversed(lineage))
|
||||
return recurse(left, right, parent, lineage)
|
||||
|
||||
leaf_ids = np.where(left == -1)[0] # indices of all leaves
|
||||
for path_id, leaf in enumerate(leaf_ids):
|
||||
clause = []
|
||||
|
||||
for node in recurse(left, right, leaf):
|
||||
if not isinstance(node, tuple): # skip the leaf marker
|
||||
continue
|
||||
|
||||
direction, threshold, feature = node[1], node[2], node[3]
|
||||
if direction == "l":
|
||||
clause.append(
|
||||
{"feature": feature, "operation": "<=", "value": threshold}
|
||||
)
|
||||
else:
|
||||
clause.append(
|
||||
{"feature": feature, "operation": ">", "value": threshold}
|
||||
)
|
||||
|
||||
class_idx = int(np.argmax(value[leaf][0])) # use the leaf itself
|
||||
data["paths"].append(
|
||||
{"conditions": clause, "classification": class_idx, "id": path_id}
|
||||
)
|
||||
|
||||
# collect all thresholds per feature
|
||||
for i, feat in enumerate(features):
|
||||
if tree.tree_.feature[i] != _tree.TREE_UNDEFINED:
|
||||
data["features"].setdefault(feat, []).append(thresholds[i])
|
||||
|
||||
return data
|
||||
|
||||
|
||||
class SetEncoder(json.JSONEncoder):
|
||||
def default(self, obj):
|
||||
if isinstance(obj, set):
|
||||
return list(obj)
|
||||
return super().default(obj)
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# 3. load data
|
||||
# ----------------------------------------------------------------------
|
||||
df = pd.read_csv(args.input)
|
||||
X = df.iloc[:, :3].to_numpy()
|
||||
Y = df.iloc[:, 3].to_numpy()
|
||||
|
||||
print(f"dataset size: {len(X)}")
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# 4. train the tree
|
||||
# ----------------------------------------------------------------------
|
||||
dt = DecisionTreeClassifier(max_depth=args.depth)
|
||||
dt.fit(X, Y)
|
||||
print("train accuracy (before nudging):",
|
||||
accuracy_score(Y, dt.predict(X)))
|
||||
|
||||
if args.nudge:
|
||||
nudged_tree = copy.deepcopy(dt.tree_)
|
||||
apply_nudging(nudged_tree, 0, args.bits)
|
||||
dt.tree_ = nudged_tree
|
||||
print(f"nudging enabled, removed bottom {args.bits} bit(s) per threshold")
|
||||
|
||||
print("train accuracy (after nudging):",
|
||||
accuracy_score(Y, dt.predict(X)))
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# 5. export
|
||||
# ----------------------------------------------------------------------
|
||||
lineage = get_lineage(dt, df.columns[:3])
|
||||
|
||||
output_path = Path(args.output)
|
||||
output_path.write_text(json.dumps(lineage, indent=4, cls=SetEncoder))
|
||||
print(f"Wrote tree to {output_path.resolve()}")
|
7
run/print.py
Normal file
7
run/print.py
Normal file
@@ -0,0 +1,7 @@
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
for file in Path("results/compressed_tree/").glob("*.json"):
|
||||
with open(file, "r") as f:
|
||||
s = json.load(f)
|
||||
print(max(s["paths"])+1)
|
36
run/rmt.bat
Normal file
36
run/rmt.bat
Normal file
@@ -0,0 +1,36 @@
|
||||
@echo off
|
||||
REM -------------------------------------------------------------
|
||||
REM Batch-script to evaluate all compressed trees with every mode
|
||||
REM -------------------------------------------------------------
|
||||
setlocal EnableDelayedExpansion
|
||||
|
||||
REM --- where the trees live and where to store results ----------
|
||||
set TREEDIR=results\compressed_tree
|
||||
set OUTDIR=results\rmt
|
||||
|
||||
REM --- python executable (adjust if needed) ---------------------
|
||||
set PY=python
|
||||
|
||||
REM --- which modes to run --------------------------------------
|
||||
set MODELIST=naive priority
|
||||
REM -------------------------------------------------------------
|
||||
|
||||
if not exist "%OUTDIR%" mkdir "%OUTDIR%"
|
||||
|
||||
for %%F in ("%TREEDIR%\*.json") do (
|
||||
REM strip path → get file name without extension
|
||||
set BASE=%%~nF
|
||||
|
||||
for %%M in (%MODELIST%) do (
|
||||
echo Processing %%~nxF with mode %%M
|
||||
|
||||
"%PY%" tree_to_rmt.py ^
|
||||
--mode %%M ^
|
||||
--input "%%F" ^
|
||||
--output "%OUTDIR%\!BASE!_%%M.json"
|
||||
|
||||
)
|
||||
)
|
||||
|
||||
echo All runs complete.
|
||||
pause
|
362
run/rmt.txt
Normal file
362
run/rmt.txt
Normal file
@@ -0,0 +1,362 @@
|
||||
Processing compressed_tree_d10_b0.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d10_b0_naive.json
|
||||
TCAM bits: 30336
|
||||
RAM bits: 6888
|
||||
Processing compressed_tree_d10_b0.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d10_b0_priority.json
|
||||
TCAM bits: 26648
|
||||
RAM bits: 6888
|
||||
Processing compressed_tree_d10_b1.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d10_b1_naive.json
|
||||
TCAM bits: 29936
|
||||
RAM bits: 6531
|
||||
Processing compressed_tree_d10_b1.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d10_b1_priority.json
|
||||
TCAM bits: 27120
|
||||
RAM bits: 6531
|
||||
Processing compressed_tree_d10_b3.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d10_b3_naive.json
|
||||
TCAM bits: 21712
|
||||
RAM bits: 5649
|
||||
Processing compressed_tree_d10_b3.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d10_b3_priority.json
|
||||
TCAM bits: 20048
|
||||
RAM bits: 5649
|
||||
Processing compressed_tree_d11_b0.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d11_b0_naive.json
|
||||
TCAM bits: 41248
|
||||
RAM bits: 10332
|
||||
Processing compressed_tree_d11_b0.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d11_b0_priority.json
|
||||
TCAM bits: 37592
|
||||
RAM bits: 10332
|
||||
Processing compressed_tree_d11_b1.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d11_b1_naive.json
|
||||
TCAM bits: 41072
|
||||
RAM bits: 9744
|
||||
Processing compressed_tree_d11_b1.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d11_b1_priority.json
|
||||
TCAM bits: 38256
|
||||
RAM bits: 9744
|
||||
Processing compressed_tree_d11_b3.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d11_b3_naive.json
|
||||
TCAM bits: 28464
|
||||
RAM bits: 8190
|
||||
Processing compressed_tree_d11_b3.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d11_b3_priority.json
|
||||
TCAM bits: 26928
|
||||
RAM bits: 8190
|
||||
Processing compressed_tree_d12_b0.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d12_b0_naive.json
|
||||
TCAM bits: 55680
|
||||
RAM bits: 15393
|
||||
Processing compressed_tree_d12_b0.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d12_b0_priority.json
|
||||
TCAM bits: 51592
|
||||
RAM bits: 15393
|
||||
Processing compressed_tree_d12_b1.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d12_b1_naive.json
|
||||
TCAM bits: 54240
|
||||
RAM bits: 14175
|
||||
Processing compressed_tree_d12_b1.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d12_b1_priority.json
|
||||
TCAM bits: 51200
|
||||
RAM bits: 14175
|
||||
Processing compressed_tree_d12_b3.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d12_b3_naive.json
|
||||
TCAM bits: 36048
|
||||
RAM bits: 11361
|
||||
Processing compressed_tree_d12_b3.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d12_b3_priority.json
|
||||
TCAM bits: 34416
|
||||
RAM bits: 11361
|
||||
Processing compressed_tree_d13_b0.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d13_b0_naive.json
|
||||
TCAM bits: 73152
|
||||
RAM bits: 22680
|
||||
Processing compressed_tree_d13_b0.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d13_b0_priority.json
|
||||
TCAM bits: 69096
|
||||
RAM bits: 22680
|
||||
Processing compressed_tree_d13_b1.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d13_b1_naive.json
|
||||
TCAM bits: 71024
|
||||
RAM bits: 20643
|
||||
Processing compressed_tree_d13_b1.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d13_b1_priority.json
|
||||
TCAM bits: 68160
|
||||
RAM bits: 20643
|
||||
Processing compressed_tree_d13_b3.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d13_b3_naive.json
|
||||
TCAM bits: 45152
|
||||
RAM bits: 16002
|
||||
Processing compressed_tree_d13_b3.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d13_b3_priority.json
|
||||
TCAM bits: 43600
|
||||
RAM bits: 16002
|
||||
Processing compressed_tree_d14_b0.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d14_b0_naive.json
|
||||
TCAM bits: 95760
|
||||
RAM bits: 33012
|
||||
Processing compressed_tree_d14_b0.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d14_b0_priority.json
|
||||
TCAM bits: 91656
|
||||
RAM bits: 33012
|
||||
Processing compressed_tree_d14_b1.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d14_b1_naive.json
|
||||
TCAM bits: 93520
|
||||
RAM bits: 29862
|
||||
Processing compressed_tree_d14_b1.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d14_b1_priority.json
|
||||
TCAM bits: 90544
|
||||
RAM bits: 29862
|
||||
Processing compressed_tree_d14_b3.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d14_b3_naive.json
|
||||
TCAM bits: 56144
|
||||
RAM bits: 21819
|
||||
Processing compressed_tree_d14_b3.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d14_b3_priority.json
|
||||
TCAM bits: 54544
|
||||
RAM bits: 21819
|
||||
Processing compressed_tree_d15_b0.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d15_b0_naive.json
|
||||
TCAM bits: 122496
|
||||
RAM bits: 46662
|
||||
Processing compressed_tree_d15_b0.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d15_b0_priority.json
|
||||
TCAM bits: 118792
|
||||
RAM bits: 46662
|
||||
Processing compressed_tree_d15_b1.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d15_b1_naive.json
|
||||
TCAM bits: 118640
|
||||
RAM bits: 41349
|
||||
Processing compressed_tree_d15_b1.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d15_b1_priority.json
|
||||
TCAM bits: 115984
|
||||
RAM bits: 41349
|
||||
Processing compressed_tree_d15_b3.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d15_b3_naive.json
|
||||
TCAM bits: 68928
|
||||
RAM bits: 28875
|
||||
Processing compressed_tree_d15_b3.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d15_b3_priority.json
|
||||
TCAM bits: 67328
|
||||
RAM bits: 28875
|
||||
Processing compressed_tree_d1_b0.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d1_b0_naive.json
|
||||
TCAM bits: 256
|
||||
RAM bits: 42
|
||||
Processing compressed_tree_d1_b0.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d1_b0_priority.json
|
||||
TCAM bits: 128
|
||||
RAM bits: 42
|
||||
Processing compressed_tree_d1_b1.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d1_b1_naive.json
|
||||
TCAM bits: 256
|
||||
RAM bits: 42
|
||||
Processing compressed_tree_d1_b1.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d1_b1_priority.json
|
||||
TCAM bits: 144
|
||||
RAM bits: 42
|
||||
Processing compressed_tree_d1_b3.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d1_b3_naive.json
|
||||
TCAM bits: 240
|
||||
RAM bits: 42
|
||||
Processing compressed_tree_d1_b3.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d1_b3_priority.json
|
||||
TCAM bits: 128
|
||||
RAM bits: 42
|
||||
Processing compressed_tree_d2_b0.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d2_b0_naive.json
|
||||
TCAM bits: 592
|
||||
RAM bits: 105
|
||||
Processing compressed_tree_d2_b0.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d2_b0_priority.json
|
||||
TCAM bits: 288
|
||||
RAM bits: 105
|
||||
Processing compressed_tree_d2_b1.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d2_b1_naive.json
|
||||
TCAM bits: 592
|
||||
RAM bits: 105
|
||||
Processing compressed_tree_d2_b1.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d2_b1_priority.json
|
||||
TCAM bits: 320
|
||||
RAM bits: 105
|
||||
Processing compressed_tree_d2_b3.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d2_b3_naive.json
|
||||
TCAM bits: 544
|
||||
RAM bits: 105
|
||||
Processing compressed_tree_d2_b3.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d2_b3_priority.json
|
||||
TCAM bits: 288
|
||||
RAM bits: 105
|
||||
Processing compressed_tree_d3_b0.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d3_b0_naive.json
|
||||
TCAM bits: 1120
|
||||
RAM bits: 210
|
||||
Processing compressed_tree_d3_b0.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d3_b0_priority.json
|
||||
TCAM bits: 640
|
||||
RAM bits: 210
|
||||
Processing compressed_tree_d3_b1.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d3_b1_naive.json
|
||||
TCAM bits: 1120
|
||||
RAM bits: 210
|
||||
Processing compressed_tree_d3_b1.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d3_b1_priority.json
|
||||
TCAM bits: 680
|
||||
RAM bits: 210
|
||||
Processing compressed_tree_d3_b3.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d3_b3_naive.json
|
||||
TCAM bits: 944
|
||||
RAM bits: 210
|
||||
Processing compressed_tree_d3_b3.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d3_b3_priority.json
|
||||
TCAM bits: 576
|
||||
RAM bits: 210
|
||||
Processing compressed_tree_d4_b0.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d4_b0_naive.json
|
||||
TCAM bits: 1880
|
||||
RAM bits: 357
|
||||
Processing compressed_tree_d4_b0.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d4_b0_priority.json
|
||||
TCAM bits: 1128
|
||||
RAM bits: 357
|
||||
Processing compressed_tree_d4_b1.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d4_b1_naive.json
|
||||
TCAM bits: 1880
|
||||
RAM bits: 357
|
||||
Processing compressed_tree_d4_b1.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d4_b1_priority.json
|
||||
TCAM bits: 1208
|
||||
RAM bits: 357
|
||||
Processing compressed_tree_d4_b3.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d4_b3_naive.json
|
||||
TCAM bits: 1632
|
||||
RAM bits: 336
|
||||
Processing compressed_tree_d4_b3.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d4_b3_priority.json
|
||||
TCAM bits: 1024
|
||||
RAM bits: 336
|
||||
Processing compressed_tree_d5_b0.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d5_b0_naive.json
|
||||
TCAM bits: 3608
|
||||
RAM bits: 609
|
||||
Processing compressed_tree_d5_b0.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d5_b0_priority.json
|
||||
TCAM bits: 2200
|
||||
RAM bits: 609
|
||||
Processing compressed_tree_d5_b1.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d5_b1_naive.json
|
||||
TCAM bits: 3608
|
||||
RAM bits: 609
|
||||
Processing compressed_tree_d5_b1.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d5_b1_priority.json
|
||||
TCAM bits: 2376
|
||||
RAM bits: 609
|
||||
Processing compressed_tree_d5_b3.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d5_b3_naive.json
|
||||
TCAM bits: 2704
|
||||
RAM bits: 546
|
||||
Processing compressed_tree_d5_b3.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d5_b3_priority.json
|
||||
TCAM bits: 1824
|
||||
RAM bits: 546
|
||||
Processing compressed_tree_d6_b0.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d6_b0_naive.json
|
||||
TCAM bits: 6440
|
||||
RAM bits: 1134
|
||||
Processing compressed_tree_d6_b0.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d6_b0_priority.json
|
||||
TCAM bits: 4512
|
||||
RAM bits: 1134
|
||||
Processing compressed_tree_d6_b1.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d6_b1_naive.json
|
||||
TCAM bits: 6440
|
||||
RAM bits: 1134
|
||||
Processing compressed_tree_d6_b1.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d6_b1_priority.json
|
||||
TCAM bits: 4776
|
||||
RAM bits: 1134
|
||||
Processing compressed_tree_d6_b3.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d6_b3_naive.json
|
||||
TCAM bits: 4832
|
||||
RAM bits: 1008
|
||||
Processing compressed_tree_d6_b3.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d6_b3_priority.json
|
||||
TCAM bits: 3648
|
||||
RAM bits: 1008
|
||||
Processing compressed_tree_d7_b0.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d7_b0_naive.json
|
||||
TCAM bits: 10344
|
||||
RAM bits: 1848
|
||||
Processing compressed_tree_d7_b0.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d7_b0_priority.json
|
||||
TCAM bits: 7808
|
||||
RAM bits: 1848
|
||||
Processing compressed_tree_d7_b1.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d7_b1_naive.json
|
||||
TCAM bits: 10312
|
||||
RAM bits: 1806
|
||||
Processing compressed_tree_d7_b1.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d7_b1_priority.json
|
||||
TCAM bits: 8136
|
||||
RAM bits: 1806
|
||||
Processing compressed_tree_d7_b3.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d7_b3_naive.json
|
||||
TCAM bits: 7760
|
||||
RAM bits: 1596
|
||||
Processing compressed_tree_d7_b3.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d7_b3_priority.json
|
||||
TCAM bits: 6352
|
||||
RAM bits: 1596
|
||||
Processing compressed_tree_d8_b0.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d8_b0_naive.json
|
||||
TCAM bits: 15672
|
||||
RAM bits: 3003
|
||||
Processing compressed_tree_d8_b0.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d8_b0_priority.json
|
||||
TCAM bits: 12640
|
||||
RAM bits: 3003
|
||||
Processing compressed_tree_d8_b1.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d8_b1_naive.json
|
||||
TCAM bits: 15576
|
||||
RAM bits: 2919
|
||||
Processing compressed_tree_d8_b1.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d8_b1_priority.json
|
||||
TCAM bits: 13160
|
||||
RAM bits: 2919
|
||||
Processing compressed_tree_d8_b3.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d8_b3_naive.json
|
||||
TCAM bits: 11504
|
||||
RAM bits: 2625
|
||||
Processing compressed_tree_d8_b3.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d8_b3_priority.json
|
||||
TCAM bits: 10016
|
||||
RAM bits: 2625
|
||||
Processing compressed_tree_d9_b0.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d9_b0_naive.json
|
||||
TCAM bits: 22640
|
||||
RAM bits: 4662
|
||||
Processing compressed_tree_d9_b0.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d9_b0_priority.json
|
||||
TCAM bits: 18936
|
||||
RAM bits: 4662
|
||||
Processing compressed_tree_d9_b1.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d9_b1_naive.json
|
||||
TCAM bits: 22784
|
||||
RAM bits: 4557
|
||||
Processing compressed_tree_d9_b1.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d9_b1_priority.json
|
||||
TCAM bits: 19872
|
||||
RAM bits: 4557
|
||||
Processing compressed_tree_d9_b3.json with mode naive
|
||||
Output written to results\rmt\compressed_tree_d9_b3_naive.json
|
||||
TCAM bits: 16560
|
||||
RAM bits: 3948
|
||||
Processing compressed_tree_d9_b3.json with mode priority
|
||||
Output written to results\rmt\compressed_tree_d9_b3_priority.json
|
||||
TCAM bits: 14880
|
||||
RAM bits: 3948
|
||||
All runs complete.
|
||||
Press any key to continue . . .
|
24
run/run.bat
Normal file
24
run/run.bat
Normal file
@@ -0,0 +1,24 @@
|
||||
@echo off
|
||||
REM --- settings --------------------------------------------------------
|
||||
set INPUT=..\data\combined\data.csv
|
||||
set OUTDIR=results\tree
|
||||
set DEPTH_LIST=1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
|
||||
set BITS_LIST=0 1 3
|
||||
set PY=python
|
||||
REM ---------------------------------------------------------------------
|
||||
|
||||
if not exist "%OUTDIR%" mkdir "%OUTDIR%"
|
||||
|
||||
for %%D in (%DEPTH_LIST%) do (
|
||||
for %%B in (%BITS_LIST%) do (
|
||||
echo Running depth=%%D bits=%%B
|
||||
%PY% decision_tree.py ^
|
||||
--input "%INPUT%" ^
|
||||
--output "%OUTDIR%\tree_d%%D_b%%B.json" ^
|
||||
--depth %%D ^
|
||||
--nudge --bits %%B
|
||||
)
|
||||
)
|
||||
|
||||
echo All runs complete
|
||||
pause
|
272
run/run.txt
Normal file
272
run/run.txt
Normal file
@@ -0,0 +1,272 @@
|
||||
Running depth=1 bits=0
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.6249802762830571
|
||||
nudging enabled, removed bottom 0 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.6249802762830571
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d1_b0.json
|
||||
Running depth=1 bits=1
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.6249802762830571
|
||||
nudging enabled, removed bottom 1 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.6249802762830571
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d1_b1.json
|
||||
Running depth=1 bits=3
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.6249802762830571
|
||||
nudging enabled, removed bottom 3 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.6249802762830571
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d1_b3.json
|
||||
Running depth=2 bits=0
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.6329657127591488
|
||||
nudging enabled, removed bottom 0 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.6329657127591488
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d2_b0.json
|
||||
Running depth=2 bits=1
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.6329657127591488
|
||||
nudging enabled, removed bottom 1 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.632965582569598
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d2_b1.json
|
||||
Running depth=2 bits=3
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.6329657127591488
|
||||
nudging enabled, removed bottom 3 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.632991490290203
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d2_b3.json
|
||||
Running depth=3 bits=0
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.6770542739406867
|
||||
nudging enabled, removed bottom 0 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.6770542739406867
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d3_b0.json
|
||||
Running depth=3 bits=1
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.6770542739406867
|
||||
nudging enabled, removed bottom 1 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.6770412549856089
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d3_b1.json
|
||||
Running depth=3 bits=3
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.6770542739406867
|
||||
nudging enabled, removed bottom 3 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.6785083610333301
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d3_b3.json
|
||||
Running depth=4 bits=0
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.7785798611346175
|
||||
nudging enabled, removed bottom 0 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.7785798611346175
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d4_b0.json
|
||||
Running depth=4 bits=1
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.7785798611346175
|
||||
nudging enabled, removed bottom 1 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.7762147075656273
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d4_b1.json
|
||||
Running depth=4 bits=3
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.7785798611346175
|
||||
nudging enabled, removed bottom 3 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.7764365505601536
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d4_b3.json
|
||||
Running depth=5 bits=0
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.8410252791654538
|
||||
nudging enabled, removed bottom 0 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.8410252791654538
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d5_b0.json
|
||||
Running depth=5 bits=1
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.8410252791654538
|
||||
nudging enabled, removed bottom 1 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.834092425207405
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d5_b1.json
|
||||
Running depth=5 bits=3
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.8410252791654538
|
||||
nudging enabled, removed bottom 3 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.772544924508287
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d5_b3.json
|
||||
Running depth=6 bits=0
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.8646269522574087
|
||||
nudging enabled, removed bottom 0 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.8646269522574087
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d6_b0.json
|
||||
Running depth=6 bits=1
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.8646269522574087
|
||||
nudging enabled, removed bottom 1 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.8576925360247506
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d6_b1.json
|
||||
Running depth=6 bits=3
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.8646269522574087
|
||||
nudging enabled, removed bottom 3 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.794651761178205
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d6_b3.json
|
||||
Running depth=7 bits=0
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.8806056365826389
|
||||
nudging enabled, removed bottom 0 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.8806056365826389
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d7_b0.json
|
||||
Running depth=7 bits=1
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.8806056365826389
|
||||
nudging enabled, removed bottom 1 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.8736095105029118
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d7_b1.json
|
||||
Running depth=7 bits=3
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.8806056365826389
|
||||
nudging enabled, removed bottom 3 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.7695685309983924
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d7_b3.json
|
||||
Running depth=8 bits=0
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.8930218140403702
|
||||
nudging enabled, removed bottom 0 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.8930218140403702
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d8_b0.json
|
||||
Running depth=8 bits=1
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.8930218140403702
|
||||
nudging enabled, removed bottom 1 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.8853817704424934
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d8_b1.json
|
||||
Running depth=8 bits=3
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.8930218140403702
|
||||
nudging enabled, removed bottom 3 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.7773965683075931
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d8_b3.json
|
||||
Running depth=9 bits=0
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9065990219119429
|
||||
nudging enabled, removed bottom 0 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.9065990219119429
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d9_b0.json
|
||||
Running depth=9 bits=1
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9065990219119429
|
||||
nudging enabled, removed bottom 1 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.8971600191014109
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d9_b1.json
|
||||
Running depth=9 bits=3
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9065990219119429
|
||||
nudging enabled, removed bottom 3 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.7901483744272311
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d9_b3.json
|
||||
Running depth=10 bits=0
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9131070673658019
|
||||
nudging enabled, removed bottom 0 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.9131070673658019
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d10_b0.json
|
||||
Running depth=10 bits=1
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9131070673658019
|
||||
nudging enabled, removed bottom 1 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.9012124292484887
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d10_b1.json
|
||||
Running depth=10 bits=3
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9131070673658019
|
||||
nudging enabled, removed bottom 3 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.7823837394292594
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d10_b3.json
|
||||
Running depth=11 bits=0
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9167131877328115
|
||||
nudging enabled, removed bottom 0 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.9167131877328115
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d11_b0.json
|
||||
Running depth=11 bits=1
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9167131877328115
|
||||
nudging enabled, removed bottom 1 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.9033505322409215
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d11_b1.json
|
||||
Running depth=11 bits=3
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9167131877328115
|
||||
nudging enabled, removed bottom 3 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.7834850128392935
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d11_b3.json
|
||||
Running depth=12 bits=0
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9190772997853955
|
||||
nudging enabled, removed bottom 0 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.9190772997853955
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d12_b0.json
|
||||
Running depth=12 bits=1
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9190772997853955
|
||||
nudging enabled, removed bottom 1 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.9050692946902973
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d12_b1.json
|
||||
Running depth=12 bits=3
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9190772997853955
|
||||
nudging enabled, removed bottom 3 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.7733082258445005
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d12_b3.json
|
||||
Running depth=13 bits=0
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9210431620021486
|
||||
nudging enabled, removed bottom 0 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.9210431620021486
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d13_b0.json
|
||||
Running depth=13 bits=1
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9210431620021486
|
||||
nudging enabled, removed bottom 1 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.9069113466442602
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d13_b1.json
|
||||
Running depth=13 bits=3
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9210431620021486
|
||||
nudging enabled, removed bottom 3 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.7656775558942799
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d13_b3.json
|
||||
Running depth=14 bits=0
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9232170671210456
|
||||
nudging enabled, removed bottom 0 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.9232170671210456
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d14_b0.json
|
||||
Running depth=14 bits=1
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9232169369314948
|
||||
nudging enabled, removed bottom 1 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.9071005120615411
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d14_b1.json
|
||||
Running depth=14 bits=3
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9232170671210456
|
||||
nudging enabled, removed bottom 3 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.7649352150757417
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d14_b3.json
|
||||
Running depth=15 bits=0
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9249752770043072
|
||||
nudging enabled, removed bottom 0 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.9249752770043072
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d15_b0.json
|
||||
Running depth=15 bits=1
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9249752770043072
|
||||
nudging enabled, removed bottom 1 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.908089692268355
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d15_b1.json
|
||||
Running depth=15 bits=3
|
||||
dataset size: 7681108
|
||||
train accuracy (before nudging): 0.9249752770043072
|
||||
nudging enabled, removed bottom 3 bit(s) per threshold
|
||||
train accuracy (after nudging): 0.762985496363285
|
||||
Wrote tree to C:\Users\jaipa\CS\cs216\IdealRMT-DecisionTrees\run\results\tree\tree_d15_b3.json
|
||||
All runs complete
|
||||
Press any key to continue . . .
|
173
run/tree_compress.py
Normal file
173
run/tree_compress.py
Normal file
@@ -0,0 +1,173 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Batch‑compress decision‑tree JSON files.
|
||||
|
||||
This script preserves the original logic but loops over every *.json file
|
||||
in results/tree and drops a corresponding compressed file in
|
||||
results/compressed_tree.
|
||||
|
||||
Example:
|
||||
$ python compress_trees_batch.py
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
INPUT_DIR = Path("results/tree")
|
||||
OUTPUT_DIR = Path("results/compressed_tree")
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
class SetEncoder(json.JSONEncoder):
|
||||
def default(self, obj): # type: ignore[override]
|
||||
if isinstance(obj, set):
|
||||
return list(obj)
|
||||
return super().default(obj)
|
||||
|
||||
|
||||
# helper function given a range and value x returns if x is in the range
|
||||
|
||||
def is_in_range(x: int, lower: int | None, upper: int | None) -> bool: # noqa: N803
|
||||
if lower is None and upper is None:
|
||||
return True
|
||||
if lower is None:
|
||||
return x <= upper # type: ignore[operator]
|
||||
if upper is None:
|
||||
return x > lower
|
||||
return x <= upper and x > lower # type: ignore[operator]
|
||||
|
||||
|
||||
for tree_path in INPUT_DIR.glob("*.json"):
|
||||
with tree_path.open() as f:
|
||||
tree = json.load(f)
|
||||
|
||||
paths = tree["paths"]
|
||||
|
||||
# First cleanup the tree by rounding the decision points to integer values
|
||||
path_ids: set[int] = set()
|
||||
path_classes = tree["classes"]
|
||||
|
||||
# assign ids and round thresholds
|
||||
for idx, path in enumerate(paths):
|
||||
path["id"] = idx
|
||||
path_ids.add(idx)
|
||||
for condition in path["conditions"]:
|
||||
operation = condition["operation"]
|
||||
if operation == "<=":
|
||||
condition["value"] = math.floor(condition["value"])
|
||||
else:
|
||||
condition["value"] = math.floor(condition["value"])
|
||||
|
||||
# Find all breakpoints for each feature and create a set of disjoint ranges
|
||||
breakpoints: dict[str, list[int]] = defaultdict(set) # type: ignore[assignment]
|
||||
for path in paths:
|
||||
for condition in path["conditions"]:
|
||||
feature = condition["feature"]
|
||||
value = condition["value"]
|
||||
breakpoints[feature].add(value)
|
||||
|
||||
# sort breakpoint lists
|
||||
for feature in breakpoints:
|
||||
points = list(breakpoints[feature])
|
||||
points.sort()
|
||||
breakpoints[feature] = points # type: ignore[assignment]
|
||||
|
||||
# collapse all paths to ranges for each feature
|
||||
for path in paths:
|
||||
compressed: dict[str, dict[str, int | None]] = {}
|
||||
for feature in breakpoints:
|
||||
compressed[feature] = {"min": None, "max": None}
|
||||
|
||||
for condition in path["conditions"]:
|
||||
feature = condition["feature"]
|
||||
operation = condition["operation"]
|
||||
value = condition["value"]
|
||||
if operation == "<=" and compressed[feature]["max"] is None:
|
||||
compressed[feature]["max"] = value
|
||||
elif operation == ">" and compressed[feature]["min"] is None:
|
||||
compressed[feature]["min"] = value
|
||||
elif operation == "<=" and value < compressed[feature]["max"]: # type: ignore[operator]
|
||||
compressed[feature]["max"] = value
|
||||
elif operation == ">" and value > compressed[feature]["min"]: # type: ignore[operator]
|
||||
compressed[feature]["min"] = value
|
||||
|
||||
path["compressed"] = compressed
|
||||
|
||||
# create buckets for each feature, where each is a list of sets
|
||||
buckets_id: dict[str, list[set[int]]] = {}
|
||||
buckets_class: dict[str, list[set[str]]] = {}
|
||||
for feature in breakpoints:
|
||||
num_points = len(breakpoints[feature])
|
||||
buckets_id[feature] = [set() for _ in range(num_points + 1)]
|
||||
buckets_class[feature] = [set() for _ in range(num_points + 1)]
|
||||
|
||||
# fill buckets
|
||||
for path in paths:
|
||||
for feature_name, feature in path["compressed"].items():
|
||||
lower = feature["min"]
|
||||
upper = feature["max"]
|
||||
pid = path["id"]
|
||||
cls = path["classification"]
|
||||
|
||||
for idx, bp in enumerate(breakpoints[feature_name]):
|
||||
if is_in_range(bp, lower, upper):
|
||||
buckets_id[feature_name][idx].add(pid)
|
||||
buckets_class[feature_name][idx].add(cls)
|
||||
# last bucket (> last breakpoint)
|
||||
if is_in_range(bp + 1, lower, upper):
|
||||
buckets_id[feature_name][-1].add(pid)
|
||||
buckets_class[feature_name][-1].add(cls)
|
||||
|
||||
# combine breakpoints and buckets to one representation
|
||||
compressed_layers: dict[str, list[dict[str, object]]] = defaultdict(list)
|
||||
for feature_name in buckets_id:
|
||||
lower = None
|
||||
upper = breakpoints[feature_name][0]
|
||||
compressed_layers[feature_name].append(
|
||||
{
|
||||
"min": lower,
|
||||
"max": upper,
|
||||
"paths": buckets_id[feature_name][0],
|
||||
"classes": buckets_class[feature_name][0],
|
||||
}
|
||||
)
|
||||
for i in range(1, len(buckets_id[feature_name]) - 1):
|
||||
lower = breakpoints[feature_name][i - 1]
|
||||
upper = breakpoints[feature_name][i]
|
||||
compressed_layers[feature_name].append(
|
||||
{
|
||||
"min": lower,
|
||||
"max": upper,
|
||||
"paths": buckets_id[feature_name][i],
|
||||
"classes": buckets_class[feature_name][i],
|
||||
}
|
||||
)
|
||||
lower = breakpoints[feature_name][-1]
|
||||
upper = None
|
||||
compressed_layers[feature_name].append(
|
||||
{
|
||||
"min": lower,
|
||||
"max": upper,
|
||||
"paths": buckets_id[feature_name][-1],
|
||||
"classes": buckets_class[feature_name][-1],
|
||||
}
|
||||
)
|
||||
|
||||
path_to_class = {path["id"]: path["classification"] for path in paths}
|
||||
|
||||
compressed_tree = {
|
||||
"paths": list(path_ids),
|
||||
"classes": path_classes,
|
||||
"layers": compressed_layers,
|
||||
"path_to_class": path_to_class,
|
||||
}
|
||||
|
||||
out_path = OUTPUT_DIR / tree_path.name.replace("tree", "compressed_tree")
|
||||
with out_path.open("w") as f_out:
|
||||
json.dump(compressed_tree, f_out, indent=4, cls=SetEncoder)
|
||||
|
||||
# print(f"Wrote {out_path.relative_to(Path.cwd())}")
|
279
run/tree_to_rmt.py
Normal file
279
run/tree_to_rmt.py
Normal file
@@ -0,0 +1,279 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Range‑to‑Prefix evaluation tool
|
||||
|
||||
This script keeps the original logic intact while letting you choose
|
||||
which expansion strategy to run via a command‑line flag.
|
||||
|
||||
Example:
|
||||
$ python rmt_selectable.py --mode naive
|
||||
$ python rmt_selectable.py --mode priority --input mytree.json --output result.json
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Static configuration
|
||||
# ---------------------------------------------------------------------------
|
||||
field_width = {
|
||||
"src": 16,
|
||||
"dst": 16,
|
||||
"protocol": 8,
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helper routines (unchanged)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def int_to_bin(i, width):
|
||||
return bin(i)[2:].zfill(width)
|
||||
|
||||
|
||||
def increment_dc(pfx):
|
||||
idx = pfx.find("*")
|
||||
if idx == -1:
|
||||
idx = len(pfx)
|
||||
idx -= 1
|
||||
return pfx[:idx] + "*" + pfx[idx + 1 :]
|
||||
|
||||
|
||||
def can_merge(pfx_a, pfx_b):
|
||||
pfx_a = pfx_a.replace("*", "")
|
||||
pfx_b = pfx_b.replace("*", "")
|
||||
return pfx_a[:-1] == pfx_b[:-1] and pfx_a[-1] != pfx_b[-1]
|
||||
|
||||
|
||||
def merge(pfx_a, prefixes):
|
||||
pfx_a = increment_dc(pfx_a)
|
||||
prefixes[-1] = pfx_a
|
||||
|
||||
for i in range(len(prefixes) - 2, -1, -1):
|
||||
if can_merge(prefixes[i], prefixes[i + 1]):
|
||||
prefixes.pop()
|
||||
pfx = increment_dc(prefixes[i])
|
||||
prefixes[i] = pfx
|
||||
|
||||
|
||||
def convert_range(lower, upper, width):
|
||||
prefixes = []
|
||||
prefix = int_to_bin(lower, width)
|
||||
prefixes.append(prefix)
|
||||
norm_upper = min(upper, 2 ** width - 1)
|
||||
for i in range(lower + 1, norm_upper + 1):
|
||||
prefix = int_to_bin(i, width)
|
||||
if can_merge(prefix, prefixes[-1]):
|
||||
merge(prefix, prefixes)
|
||||
else:
|
||||
prefixes.append(prefix)
|
||||
return prefixes
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# RMT construction strategies (logic preserved)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def worst_case_rmt(tree):
|
||||
rmt = []
|
||||
step = 0
|
||||
|
||||
tcam_bits = 0
|
||||
ram_bits = 0
|
||||
|
||||
for layer in layers:
|
||||
num_ranges = len(layers[layer])
|
||||
# assume that each range requires all of 2*k prefixes when performing prefix expansion
|
||||
# therefore there are 2*k * R for R ranges and width k
|
||||
num_prefixes = 2 * field_width[layer] * num_ranges
|
||||
prefix_width = field_width[layer]
|
||||
|
||||
tcam = {
|
||||
"id": f"{layer}_range",
|
||||
"step": step,
|
||||
"match": "ternary",
|
||||
"entries": num_prefixes,
|
||||
"key_size": prefix_width,
|
||||
}
|
||||
tcam_bits += num_prefixes * prefix_width
|
||||
|
||||
# assume basic pointer reuse for metadata storage
|
||||
ram = {
|
||||
"id": f"{layer}_meta",
|
||||
"step": step,
|
||||
"match": "exact",
|
||||
"method": "index",
|
||||
"key_size": math.ceil(math.log2(num_ranges)),
|
||||
"data_size": len(classes),
|
||||
}
|
||||
ram_bits += num_ranges * len(classes)
|
||||
|
||||
rmt.append(tcam)
|
||||
rmt.append(ram)
|
||||
|
||||
step += 1
|
||||
|
||||
return rmt, tcam_bits, ram_bits
|
||||
|
||||
|
||||
def naive_rmt(tree):
|
||||
rmt = []
|
||||
step = 0
|
||||
|
||||
tcam_bits = 0
|
||||
ram_bits = 0
|
||||
|
||||
for layer in layers:
|
||||
num_prefixes = 0
|
||||
prefix_width = field_width[layer]
|
||||
# for each range in the layer, convert the ranges to prefixes using naive range expansion
|
||||
for r in layers[layer]:
|
||||
if r["min"] is None:
|
||||
r["min"] = 0
|
||||
elif r["max"] is None:
|
||||
r["max"] = 2 ** prefix_width
|
||||
prefixes = convert_range(r["min"], r["max"], prefix_width)
|
||||
r["prefixes"] = prefixes
|
||||
num_prefixes += len(prefixes)
|
||||
tcam_bits += len(prefixes) * prefix_width
|
||||
|
||||
tcam = {
|
||||
"id": f"{layer}_range",
|
||||
"step": step,
|
||||
"match": "ternary",
|
||||
"entries": num_prefixes,
|
||||
"key_size": prefix_width,
|
||||
"ranges": layers[layer],
|
||||
}
|
||||
|
||||
num_ranges = len(layers[layer])
|
||||
# assume no pointer reuse for metadata storage
|
||||
ram = {
|
||||
"id": f"{layer}_meta",
|
||||
"step": step,
|
||||
"match": "exact",
|
||||
"method": "index",
|
||||
"key_size": math.ceil(math.log2(num_ranges)),
|
||||
"data_size": len(classes),
|
||||
}
|
||||
ram_bits += num_ranges * len(classes)
|
||||
|
||||
rmt.append(tcam)
|
||||
rmt.append(ram)
|
||||
|
||||
step += 1
|
||||
|
||||
return rmt, tcam_bits, ram_bits
|
||||
|
||||
|
||||
def priority_aware(tree):
|
||||
rmt = []
|
||||
step = 0
|
||||
|
||||
tcam_bits = 0
|
||||
ram_bits = 0
|
||||
|
||||
for layer in layers:
|
||||
num_prefixes = 0
|
||||
prefix_width = field_width[layer]
|
||||
# for each range, run the regular prefix expansion, and also the prefix expansion setting the minimum to 0
|
||||
# then check which set of prefixes would be better
|
||||
# we will assume the ranges are already disjoint and in the correct order
|
||||
for r in layers[layer]:
|
||||
if r["min"] is None:
|
||||
r["min"] = 0
|
||||
elif r["max"] is None:
|
||||
r["max"] = 2 ** prefix_width
|
||||
regular_prefixes = convert_range(r["min"], r["max"], prefix_width)
|
||||
zero_start_prefixes = convert_range(0, r["max"], prefix_width)
|
||||
|
||||
if len(regular_prefixes) <= len(zero_start_prefixes):
|
||||
pfx_type = "exact"
|
||||
prefixes = regular_prefixes
|
||||
else:
|
||||
pfx_type = "zero"
|
||||
prefixes = zero_start_prefixes
|
||||
|
||||
r["prefixes"] = prefixes
|
||||
r["prefix_type"] = pfx_type
|
||||
num_prefixes += len(prefixes)
|
||||
tcam_bits += len(prefixes) * prefix_width
|
||||
|
||||
tcam = {
|
||||
"id": f"{layer}_range",
|
||||
"step": step,
|
||||
"match": "ternary",
|
||||
"entries": num_prefixes,
|
||||
"key_size": prefix_width,
|
||||
"ranges": layers[layer],
|
||||
}
|
||||
|
||||
num_ranges = len(layers[layer])
|
||||
# assume no pointer reuse for metadata storage
|
||||
ram = {
|
||||
"id": f"{layer}_meta",
|
||||
"step": step,
|
||||
"match": "exact",
|
||||
"method": "index",
|
||||
"key_size": math.ceil(math.log2(num_ranges)),
|
||||
"data_size": len(classes),
|
||||
}
|
||||
ram_bits += num_ranges * len(classes)
|
||||
|
||||
rmt.append(tcam)
|
||||
rmt.append(ram)
|
||||
|
||||
step += 1
|
||||
|
||||
return rmt, tcam_bits, ram_bits
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Evaluate RMT memory usage for different range‑to‑prefix strategies.")
|
||||
parser.add_argument("--mode", choices=["worst", "naive", "priority"], default="worst", help="Strategy to use")
|
||||
parser.add_argument("--input", default="compressed_tree.json", help="Input tree JSON file")
|
||||
parser.add_argument("--output", default=None, help="Output RMT JSON file (defaults to <mode>_rmt.json)")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = parse_args()
|
||||
|
||||
# Keep the original variable names so the functions stay unchanged
|
||||
global layers, classes
|
||||
|
||||
try:
|
||||
with open(args.input) as f:
|
||||
tree = json.load(f)
|
||||
except FileNotFoundError:
|
||||
sys.exit(f"Input file '{args.input}' not found.")
|
||||
|
||||
layers = tree["layers"]
|
||||
classes = tree["classes"]
|
||||
|
||||
if args.mode == "worst":
|
||||
rmt, tcam_bits, ram_bits = worst_case_rmt(tree)
|
||||
default_out = "worst_case_rmt.json"
|
||||
elif args.mode == "naive":
|
||||
rmt, tcam_bits, ram_bits = naive_rmt(tree)
|
||||
default_out = "naive_rmt.json"
|
||||
else: # priority
|
||||
rmt, tcam_bits, ram_bits = priority_aware(tree)
|
||||
default_out = "priority_aware.json"
|
||||
|
||||
out_file = args.output or default_out
|
||||
|
||||
with open(out_file, "w") as f:
|
||||
json.dump(rmt, f, indent=4)
|
||||
|
||||
#! command python3 ideal-rmt-simulator/sim.py {out_file}
|
||||
print(f"Output written to {out_file}")
|
||||
print(f"TCAM bits: {tcam_bits}")
|
||||
print(f"RAM bits: {ram_bits}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Reference in New Issue
Block a user