mirror of
https://github.com/ltcptgeneral/IdealRMT-DecisionTrees.git
synced 2025-09-06 23:37:23 +00:00
Compare commits
15 Commits
61a451b82d
...
main
Author | SHA1 | Date | |
---|---|---|---|
51f920e2ba | |||
1136bd93ea | |||
2ad40946d1 | |||
50075b1acc | |||
|
1585399c7d | ||
8301998da3 | |||
3b2d6b3186 | |||
|
24fc2ed6f7 | ||
|
fda251f051 | ||
541538fcfe | |||
|
afc882a569 | ||
6de3807fe2 | |||
|
fc16d3c586 | ||
7bee40ecf9 | |||
|
e811171a73 |
2
.gitattributes
vendored
Normal file
2
.gitattributes
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
# force LF for any shell script
|
||||
*.sh text eol=lf
|
4
.gitignore
vendored
4
.gitignore
vendored
@@ -1,3 +1,5 @@
|
||||
data.*
|
||||
__pycache__
|
||||
*.json
|
||||
data/*
|
||||
.DS_Store
|
||||
.ipynb_checkpoints/
|
||||
|
152
CompressedTreeParser.ipynb
Normal file
152
CompressedTreeParser.ipynb
Normal file
@@ -0,0 +1,152 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 138,
|
||||
"id": "938dec51",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import argparse\n",
|
||||
"from sklearn.tree import DecisionTreeClassifier, plot_tree, _tree\n",
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"from sklearn.tree import export_graphviz\n",
|
||||
"import pydotplus\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"from labels import mac_to_label\n",
|
||||
"import json\n",
|
||||
"import math"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 139,
|
||||
"id": "442624c7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Set1 = pd.read_csv('data/combined/data.csv').values.tolist()\n",
|
||||
"X = [i[0:3] for i in Set1]\n",
|
||||
"Y =[i[3] for i in Set1]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 142,
|
||||
"id": "12ad454d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'0': 20, '1': 20, '2': 9, '3': 20, '4': 0, '5': 13, '6': 20, '7': 0, '8': 12, '9': 4, '10': 20, '11': 4, '12': 1, '13': 16, '14': 20, '15': 2, '16': 20, '17': 0, '18': 20, '19': 20, '20': 20, '21': 20, '22': 20, '23': 1, '24': 2, '25': 20, '26': 13, '27': 11, '28': 20, '29': 20}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"predict_Yt = []\n",
|
||||
"index=0\n",
|
||||
"\n",
|
||||
"with open('compressed_tree.json', 'r') as file:\n",
|
||||
" data = json.load(file)\n",
|
||||
" classes = data[\"classes\"]\n",
|
||||
" for x in X:\n",
|
||||
" counter = 0\n",
|
||||
" class_set = []\n",
|
||||
" paths_set = []\n",
|
||||
" features = [\"protocol\", \"src\", \"dst\"]\n",
|
||||
" for feature in features:\n",
|
||||
" if feature in data[\"layers\"]:\n",
|
||||
" for node in data['layers'][feature]:\n",
|
||||
" if node['min'] is None:\n",
|
||||
" if x[counter] <= node['max']:\n",
|
||||
" class_set.append(node['classes'])\n",
|
||||
" paths_set.append(node[\"paths\"])\n",
|
||||
" break #is this an issue?\n",
|
||||
" else:\n",
|
||||
" continue\n",
|
||||
" elif node['max'] is None:\n",
|
||||
" if node['min'] < x[counter]:\n",
|
||||
" class_set.append(node['classes'])\n",
|
||||
" paths_set.append(node[\"paths\"])\n",
|
||||
" break #is this an issue?\n",
|
||||
" else:\n",
|
||||
" continue\n",
|
||||
" elif node['min'] < x[counter] and x[counter] <= node['max']:\n",
|
||||
" class_set.append(node['classes'])\n",
|
||||
" paths_set.append(node[\"paths\"])\n",
|
||||
" break #is this an issue?\n",
|
||||
"\n",
|
||||
" counter += 1\n",
|
||||
" result = set(class_set[0])\n",
|
||||
" paths = set(paths_set[0])\n",
|
||||
" for s in class_set[1:]:\n",
|
||||
" result.intersection_update(s)\n",
|
||||
" for s in paths_set[1:]:\n",
|
||||
" paths.intersection_update(s)\n",
|
||||
"\n",
|
||||
" #predict_Yt.append(list(result))\n",
|
||||
" #print(result)\n",
|
||||
" if len(paths) != 1:\n",
|
||||
" print(paths)\n",
|
||||
" print(x)\n",
|
||||
" print(result)\n",
|
||||
" assert len(paths) == 1\n",
|
||||
" path = list(paths)[0]\n",
|
||||
" pred = data[\"path_to_class\"][str(path)]\n",
|
||||
" pred_class = classes[pred]\n",
|
||||
" predict_Yt.append(pred_class)\n",
|
||||
" \n",
|
||||
" index += 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 143,
|
||||
"id": "8b4c56b6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.8410252791654538\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"correct = 0\n",
|
||||
"for i in range(len(Y)):\n",
|
||||
" prediction = predict_Yt[i]\n",
|
||||
" if prediction != None and Y[i] == prediction:\n",
|
||||
" correct += 1\n",
|
||||
"\n",
|
||||
"print(correct / len(Y))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
File diff suppressed because one or more lines are too long
@@ -89,7 +89,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "switch",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -103,7 +103,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.7"
|
||||
"version": "3.12.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
20
README.md
20
README.md
@@ -2,18 +2,30 @@
|
||||
|
||||
Run `pip install -r requirements.txt`
|
||||
|
||||
Run `setup.sh`
|
||||
|
||||
# Tree Generation
|
||||
|
||||
## Download Dataset
|
||||
|
||||
Download the *September 22 2016* dataset from: https://iotanalytics.unsw.edu.au/iottraces.html#bib18tmc
|
||||
Download the *September 22 2016* dataset (or others) from: https://iotanalytics.unsw.edu.au/iottraces.html#bib18tmc
|
||||
|
||||
Rename the file as data.pcap
|
||||
Place these into the `data/tar` folder.
|
||||
|
||||
Run `extract_tars.sh` which will extract and place the `.pcap` files at the corresponding location inside `data/pcap`.
|
||||
|
||||
## Preprocessing Dataset
|
||||
|
||||
Run `ExtractDataset.ipynb`, this will take a few minutes
|
||||
Run `extract_all_datasets.py` which will extract the data from each file in `data/pcap` and turn it into the corresponding `.csv` file inside `data/processed`. This will take a few minutes per file. Combine the data under `data/csv` using `combine_csv.py`. This will overwrite `data/combined/data.csv` which you can use for the decision tree.
|
||||
|
||||
## Training
|
||||
|
||||
Run `DecisionTree.ipynb`, the tree should be output in `tree`
|
||||
Run `DecisionTree.ipynb`, the tree should be output in `tree.json`
|
||||
|
||||
## Compression
|
||||
|
||||
Run `TreeCompress.ipynb`, the tree should be output in `compressed_tree.json`
|
||||
|
||||
## RMT
|
||||
|
||||
Run `TreeToRMT.ipynb`, it will report the TCAM and SRAM usage of the compressed tree
|
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 73,
|
||||
"id": "ec310f34",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -14,7 +14,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 74,
|
||||
"id": "5b54797e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -28,7 +28,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 75,
|
||||
"id": "a38fdb8a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -60,7 +60,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 76,
|
||||
"id": "2fd4f738",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -83,7 +83,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 77,
|
||||
"id": "98cde024",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -109,13 +109,13 @@
|
||||
"\t\tvalue = condition[\"value\"]\n",
|
||||
"\n",
|
||||
"\t\t# move the min/max for the corresponding feature in compressed\n",
|
||||
"\t\tif operation == \"<=\" and compressed[feature][\"min\"] is None:\n",
|
||||
"\t\tif operation == \"<=\" and compressed[feature][\"max\"] is None:\n",
|
||||
"\t\t\tcompressed[feature][\"max\"] = value\n",
|
||||
"\t\telif operation == \">\" and compressed[feature][\"max\"] is None:\n",
|
||||
"\t\telif operation == \">\" and compressed[feature][\"min\"] is None:\n",
|
||||
"\t\t\tcompressed[feature][\"min\"] = value\n",
|
||||
"\t\telif operation == \"<=\" and value < compressed[feature][\"min\"]:\n",
|
||||
"\t\telif operation == \"<=\" and value < compressed[feature][\"max\"]:\n",
|
||||
"\t\t\tcompressed[feature][\"max\"] = value\n",
|
||||
"\t\telif operation == \">\" and value > compressed[feature][\"max\"]:\n",
|
||||
"\t\telif operation == \">\" and value > compressed[feature][\"min\"]:\n",
|
||||
"\t\t\tcompressed[feature][\"min\"] = value\n",
|
||||
"\n",
|
||||
"\tpath[\"compressed\"] = compressed"
|
||||
@@ -123,7 +123,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 78,
|
||||
"id": "b6fbadbf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -183,7 +183,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 79,
|
||||
"id": "0a767971",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -213,16 +213,22 @@
|
||||
"\tcompressed_layers[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
|
||||
"\t#print(\"=\"*40)\n",
|
||||
"\n",
|
||||
"path_to_class = {}\n",
|
||||
"for i in range(len(tree[\"paths\"])):\n",
|
||||
" path = tree[\"paths\"][i]\n",
|
||||
" path_to_class[path[\"id\"]] = path[\"classification\"]\n",
|
||||
"\n",
|
||||
"compressed_tree = {\n",
|
||||
"\t\"paths\": path_ids,\n",
|
||||
"\t\"classes\": path_classes,\n",
|
||||
"\t\"layers\": compressed_layers,\n",
|
||||
" \"path_to_class\": path_to_class,\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 80,
|
||||
"id": "561b0bc1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -241,7 +247,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "switch",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -255,7 +261,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.7"
|
||||
"version": "3.12.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@@ -117,8 +117,8 @@
|
||||
"[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
|
||||
"id mapping: \n",
|
||||
"[['dst_range', 'dst_meta'], ['src_range', 'src_meta'], ['protocl_range', 'protocl_meta'], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]\n",
|
||||
"TCAM bits: 13312\n",
|
||||
"RAM bits: 522\n"
|
||||
"TCAM bits: 13184\n",
|
||||
"RAM bits: 504\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -263,8 +263,8 @@
|
||||
"[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
|
||||
"id mapping: \n",
|
||||
"[['dst_range', 'dst_meta'], ['src_range', 'src_meta'], ['protocl_range', 'protocl_meta'], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]\n",
|
||||
"TCAM bits: 3584\n",
|
||||
"RAM bits: 522\n"
|
||||
"TCAM bits: 3320\n",
|
||||
"RAM bits: 504\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -274,6 +274,14 @@
|
||||
"print(f\"RAM bits: {ram_bits}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2504b1ba",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Priority Aware Prefix Expansion"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
@@ -368,8 +376,8 @@
|
||||
"[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
|
||||
"id mapping: \n",
|
||||
"[['dst_range', 'dst_meta'], ['src_range', 'src_meta'], ['protocl_range', 'protocl_meta'], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]\n",
|
||||
"TCAM bits: 2120\n",
|
||||
"RAM bits: 522\n"
|
||||
"TCAM bits: 2152\n",
|
||||
"RAM bits: 504\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -382,7 +390,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "switch",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
74
combine.py
Normal file
74
combine.py
Normal file
@@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env python3
|
||||
"""combined.py
|
||||
|
||||
Concatenate every CSV that matches the pattern
|
||||
data/processed/<name>/<name>.csv
|
||||
into a single file:
|
||||
data/combined/data.csv
|
||||
|
||||
The script streams each source CSV in 1‑Mio‑row chunks so memory stays low.
|
||||
Typos in the historic column names (protocl/classfication) are fixed on‑the‑fly.
|
||||
|
||||
Usage
|
||||
-----
|
||||
python combined.py
|
||||
|
||||
You can optionally supply a different root directory:
|
||||
python combined.py --root other/processed_dir --out other/combined/data.csv
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
import os
|
||||
import pandas as pd
|
||||
|
||||
CHUNK = 1_000_000 # rows per read_csv chunk
|
||||
|
||||
|
||||
def fix_cols(df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Rename legacy columns to canonical names."""
|
||||
return df.rename(
|
||||
columns={"protocl": "protocol", "classfication": "classification"}
|
||||
)
|
||||
|
||||
|
||||
def find_source_csvs(proc_root: Path):
|
||||
"""Yield CSV paths that exactly match processed/<name>/<name>.csv."""
|
||||
for sub in sorted(proc_root.iterdir()):
|
||||
if not sub.is_dir():
|
||||
continue
|
||||
target = sub / f"{sub.name}.csv"
|
||||
if target.exists():
|
||||
yield target
|
||||
|
||||
|
||||
def combine(proc_root: Path, out_path: Path):
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
first_write = True
|
||||
for csv_path in find_source_csvs(proc_root):
|
||||
print(f"→ adding {csv_path.relative_to(proc_root.parent)}")
|
||||
for chunk in pd.read_csv(csv_path, chunksize=CHUNK):
|
||||
chunk = fix_cols(chunk)
|
||||
chunk.to_csv(
|
||||
out_path,
|
||||
mode="w" if first_write else "a",
|
||||
header=first_write,
|
||||
index=False,
|
||||
)
|
||||
first_write = False
|
||||
print(f"✓ combined CSV written to {out_path}")
|
||||
|
||||
|
||||
def main():
|
||||
p = argparse.ArgumentParser(description="Combine processed CSVs into one.")
|
||||
p.add_argument("--root", default="data/processed", help="processed dir root")
|
||||
p.add_argument("--out", default="data/combined/data.csv", help="output CSV")
|
||||
args = p.parse_args()
|
||||
|
||||
combine(Path(args.root).expanduser(), Path(args.out).expanduser())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
560
example/compressed_tree.json
Normal file
560
example/compressed_tree.json
Normal file
@@ -0,0 +1,560 @@
|
||||
{
|
||||
"paths": [
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22
|
||||
],
|
||||
"classes": [
|
||||
"Amazon Echo",
|
||||
"Belkin Motion Sensor",
|
||||
"Belkin Switch",
|
||||
"Dropcam",
|
||||
"HP Printer",
|
||||
"LiFX Bulb",
|
||||
"NEST Smoke Sensor",
|
||||
"Netatmo Camera",
|
||||
"Netatmo Weather station",
|
||||
"Pixstart photo frame",
|
||||
"Samsung Smart Cam",
|
||||
"Smart Things",
|
||||
"TP-Link Camera",
|
||||
"TP-Link Plug",
|
||||
"Triby Speaker",
|
||||
"Withings",
|
||||
"Withings Scale",
|
||||
"Withings sleep sensor",
|
||||
"iHome PowerPlug",
|
||||
"other"
|
||||
],
|
||||
"layers": {
|
||||
"dst": [
|
||||
{
|
||||
"min": null,
|
||||
"max": 2136,
|
||||
"paths": [
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6
|
||||
],
|
||||
"classes": [
|
||||
8,
|
||||
19,
|
||||
4
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 2136,
|
||||
"max": 2224,
|
||||
"paths": [
|
||||
7
|
||||
],
|
||||
"classes": [
|
||||
11
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 2224,
|
||||
"max": 5016,
|
||||
"paths": [
|
||||
8,
|
||||
9
|
||||
],
|
||||
"classes": [
|
||||
1,
|
||||
19
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 5016,
|
||||
"max": 25848,
|
||||
"paths": [
|
||||
10,
|
||||
11,
|
||||
12
|
||||
],
|
||||
"classes": [
|
||||
19,
|
||||
7
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 25848,
|
||||
"max": 47936,
|
||||
"paths": [
|
||||
10,
|
||||
11,
|
||||
13
|
||||
],
|
||||
"classes": [
|
||||
19,
|
||||
7
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 47936,
|
||||
"max": 47944,
|
||||
"paths": [
|
||||
14
|
||||
],
|
||||
"classes": [
|
||||
3
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 47944,
|
||||
"max": 49152,
|
||||
"paths": [
|
||||
16,
|
||||
15
|
||||
],
|
||||
"classes": [
|
||||
10,
|
||||
7
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 49152,
|
||||
"max": 49160,
|
||||
"paths": [
|
||||
17,
|
||||
18
|
||||
],
|
||||
"classes": [
|
||||
16,
|
||||
2
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 49160,
|
||||
"max": null,
|
||||
"paths": [
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22
|
||||
],
|
||||
"classes": [
|
||||
17,
|
||||
19,
|
||||
15
|
||||
]
|
||||
}
|
||||
],
|
||||
"src": [
|
||||
{
|
||||
"min": null,
|
||||
"max": 64,
|
||||
"paths": [
|
||||
0,
|
||||
1,
|
||||
7,
|
||||
8,
|
||||
10,
|
||||
14,
|
||||
15,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
3,
|
||||
7,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
19
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 64,
|
||||
"max": 128,
|
||||
"paths": [
|
||||
3,
|
||||
5,
|
||||
7,
|
||||
8,
|
||||
10,
|
||||
14,
|
||||
15,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
3,
|
||||
4,
|
||||
7,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
19
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 128,
|
||||
"max": 280,
|
||||
"paths": [
|
||||
3,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
10,
|
||||
14,
|
||||
15,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
3,
|
||||
4,
|
||||
7,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
19
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 280,
|
||||
"max": 816,
|
||||
"paths": [
|
||||
3,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
11,
|
||||
14,
|
||||
15,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
3,
|
||||
4,
|
||||
7,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
19
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 816,
|
||||
"max": 1576,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
11,
|
||||
14,
|
||||
15,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
3,
|
||||
7,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
19
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 1576,
|
||||
"max": 2488,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
11,
|
||||
14,
|
||||
15,
|
||||
18,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
2,
|
||||
3,
|
||||
7,
|
||||
11,
|
||||
15,
|
||||
19
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 2488,
|
||||
"max": 4776,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
11,
|
||||
14,
|
||||
16,
|
||||
18,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
2,
|
||||
3,
|
||||
7,
|
||||
10,
|
||||
11,
|
||||
15,
|
||||
19
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 4776,
|
||||
"max": 5224,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
11,
|
||||
14,
|
||||
16,
|
||||
18,
|
||||
20
|
||||
],
|
||||
"classes": [
|
||||
2,
|
||||
3,
|
||||
7,
|
||||
10,
|
||||
11,
|
||||
17,
|
||||
19
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 5224,
|
||||
"max": 9048,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
16,
|
||||
18,
|
||||
20
|
||||
],
|
||||
"classes": [
|
||||
2,
|
||||
3,
|
||||
10,
|
||||
11,
|
||||
17,
|
||||
19
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 9048,
|
||||
"max": 43008,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
16,
|
||||
18,
|
||||
21
|
||||
],
|
||||
"classes": [
|
||||
2,
|
||||
3,
|
||||
10,
|
||||
11,
|
||||
19
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 43008,
|
||||
"max": 50384,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
9,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
16,
|
||||
18,
|
||||
21
|
||||
],
|
||||
"classes": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
10,
|
||||
11,
|
||||
19
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 50384,
|
||||
"max": null,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
9,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
16,
|
||||
18,
|
||||
22
|
||||
],
|
||||
"classes": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
10,
|
||||
11,
|
||||
19
|
||||
]
|
||||
}
|
||||
],
|
||||
"protocl": [
|
||||
{
|
||||
"min": null,
|
||||
"max": 0,
|
||||
"paths": [
|
||||
0,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22
|
||||
],
|
||||
"classes": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
7,
|
||||
8,
|
||||
10,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
19
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 0,
|
||||
"max": null,
|
||||
"paths": [
|
||||
1,
|
||||
2,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22
|
||||
],
|
||||
"classes": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
7,
|
||||
8,
|
||||
10,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
19
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"path_to_class": {
|
||||
"0": 19,
|
||||
"1": 19,
|
||||
"2": 8,
|
||||
"3": 4,
|
||||
"4": 19,
|
||||
"5": 19,
|
||||
"6": 19,
|
||||
"7": 11,
|
||||
"8": 19,
|
||||
"9": 1,
|
||||
"10": 19,
|
||||
"11": 7,
|
||||
"12": 19,
|
||||
"13": 19,
|
||||
"14": 3,
|
||||
"15": 7,
|
||||
"16": 10,
|
||||
"17": 16,
|
||||
"18": 2,
|
||||
"19": 15,
|
||||
"20": 17,
|
||||
"21": 19,
|
||||
"22": 19
|
||||
}
|
||||
}
|
734
example/naive_rmt.json
Normal file
734
example/naive_rmt.json
Normal file
@@ -0,0 +1,734 @@
|
||||
[
|
||||
{
|
||||
"id": "dst_range",
|
||||
"step": 0,
|
||||
"match": "ternary",
|
||||
"entries": 68,
|
||||
"key_size": 16,
|
||||
"ranges": [
|
||||
{
|
||||
"min": 0,
|
||||
"max": 2136,
|
||||
"paths": [
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6
|
||||
],
|
||||
"classes": [
|
||||
8,
|
||||
19,
|
||||
4
|
||||
],
|
||||
"prefixes": [
|
||||
"00000***********",
|
||||
"0000100000******",
|
||||
"000010000100****",
|
||||
"0000100001010***",
|
||||
"0000100001011000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 2136,
|
||||
"max": 2224,
|
||||
"paths": [
|
||||
7
|
||||
],
|
||||
"classes": [
|
||||
11
|
||||
],
|
||||
"prefixes": [
|
||||
"0000100001011***",
|
||||
"00001000011*****",
|
||||
"00001000100*****",
|
||||
"000010001010****",
|
||||
"0000100010110000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 2224,
|
||||
"max": 5016,
|
||||
"paths": [
|
||||
8,
|
||||
9
|
||||
],
|
||||
"classes": [
|
||||
1,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"000010001011****",
|
||||
"0000100011******",
|
||||
"00001001********",
|
||||
"0000101*********",
|
||||
"000011**********",
|
||||
"0001000*********",
|
||||
"00010010********",
|
||||
"000100110*******",
|
||||
"000100111000****",
|
||||
"0001001110010***",
|
||||
"0001001110011000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 5016,
|
||||
"max": 25848,
|
||||
"paths": [
|
||||
10,
|
||||
11,
|
||||
12
|
||||
],
|
||||
"classes": [
|
||||
19,
|
||||
7
|
||||
],
|
||||
"prefixes": [
|
||||
"0001001110011***",
|
||||
"00010011101*****",
|
||||
"0001001111******",
|
||||
"000101**********",
|
||||
"00011***********",
|
||||
"001*************",
|
||||
"010*************",
|
||||
"011000**********",
|
||||
"011001000*******",
|
||||
"0110010010******",
|
||||
"01100100110*****",
|
||||
"011001001110****",
|
||||
"0110010011110***",
|
||||
"0110010011111000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 25848,
|
||||
"max": 47936,
|
||||
"paths": [
|
||||
10,
|
||||
11,
|
||||
13
|
||||
],
|
||||
"classes": [
|
||||
19,
|
||||
7
|
||||
],
|
||||
"prefixes": [
|
||||
"0110010011111***",
|
||||
"01100101********",
|
||||
"0110011*********",
|
||||
"01101***********",
|
||||
"0111************",
|
||||
"100*************",
|
||||
"1010************",
|
||||
"10110***********",
|
||||
"1011100*********",
|
||||
"10111010********",
|
||||
"1011101100******",
|
||||
"1011101101000000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 47936,
|
||||
"max": 47944,
|
||||
"paths": [
|
||||
14
|
||||
],
|
||||
"classes": [
|
||||
3
|
||||
],
|
||||
"prefixes": [
|
||||
"1011101101000***",
|
||||
"1011101101001000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 47944,
|
||||
"max": 49152,
|
||||
"paths": [
|
||||
16,
|
||||
15
|
||||
],
|
||||
"classes": [
|
||||
10,
|
||||
7
|
||||
],
|
||||
"prefixes": [
|
||||
"1011101101001***",
|
||||
"101110110101****",
|
||||
"10111011011*****",
|
||||
"101110111*******",
|
||||
"101111**********",
|
||||
"1100000000000000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 49152,
|
||||
"max": 49160,
|
||||
"paths": [
|
||||
17,
|
||||
18
|
||||
],
|
||||
"classes": [
|
||||
16,
|
||||
2
|
||||
],
|
||||
"prefixes": [
|
||||
"1100000000000***",
|
||||
"1100000000001000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 49160,
|
||||
"max": 65536,
|
||||
"paths": [
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22
|
||||
],
|
||||
"classes": [
|
||||
17,
|
||||
19,
|
||||
15
|
||||
],
|
||||
"prefixes": [
|
||||
"1100000000001***",
|
||||
"110000000001****",
|
||||
"11000000001*****",
|
||||
"1100000001******",
|
||||
"110000001*******",
|
||||
"11000001********",
|
||||
"1100001*********",
|
||||
"110001**********",
|
||||
"11001***********",
|
||||
"1101************",
|
||||
"111*************"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "dst_meta",
|
||||
"step": 0,
|
||||
"match": "exact",
|
||||
"method": "index",
|
||||
"key_size": 4,
|
||||
"data_size": 20
|
||||
},
|
||||
{
|
||||
"id": "src_range",
|
||||
"step": 1,
|
||||
"match": "ternary",
|
||||
"entries": 87,
|
||||
"key_size": 16,
|
||||
"ranges": [
|
||||
{
|
||||
"min": 0,
|
||||
"max": 64,
|
||||
"paths": [
|
||||
0,
|
||||
1,
|
||||
7,
|
||||
8,
|
||||
10,
|
||||
14,
|
||||
15,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
3,
|
||||
7,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"0000000000******",
|
||||
"0000000001000000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 64,
|
||||
"max": 128,
|
||||
"paths": [
|
||||
3,
|
||||
5,
|
||||
7,
|
||||
8,
|
||||
10,
|
||||
14,
|
||||
15,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
3,
|
||||
4,
|
||||
7,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"0000000001******",
|
||||
"0000000010000000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 128,
|
||||
"max": 280,
|
||||
"paths": [
|
||||
3,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
10,
|
||||
14,
|
||||
15,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
3,
|
||||
4,
|
||||
7,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"000000001*******",
|
||||
"000000010000****",
|
||||
"0000000100010***",
|
||||
"0000000100011000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 280,
|
||||
"max": 816,
|
||||
"paths": [
|
||||
3,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
11,
|
||||
14,
|
||||
15,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
3,
|
||||
4,
|
||||
7,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"0000000100011***",
|
||||
"00000001001*****",
|
||||
"0000000101******",
|
||||
"000000011*******",
|
||||
"00000010********",
|
||||
"00000011000*****",
|
||||
"000000110010****",
|
||||
"0000001100110000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 816,
|
||||
"max": 1576,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
11,
|
||||
14,
|
||||
15,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
3,
|
||||
7,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"000000110011****",
|
||||
"0000001101******",
|
||||
"000000111*******",
|
||||
"0000010*********",
|
||||
"00000110000*****",
|
||||
"0000011000100***",
|
||||
"0000011000101000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 1576,
|
||||
"max": 2488,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
11,
|
||||
14,
|
||||
15,
|
||||
18,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
2,
|
||||
3,
|
||||
7,
|
||||
11,
|
||||
15,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"0000011000101***",
|
||||
"000001100011****",
|
||||
"0000011001******",
|
||||
"000001101*******",
|
||||
"00000111********",
|
||||
"00001000********",
|
||||
"000010010*******",
|
||||
"00001001100*****",
|
||||
"000010011010****",
|
||||
"0000100110110***",
|
||||
"0000100110111000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 2488,
|
||||
"max": 4776,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
11,
|
||||
14,
|
||||
16,
|
||||
18,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
2,
|
||||
3,
|
||||
7,
|
||||
10,
|
||||
11,
|
||||
15,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"0000100110111***",
|
||||
"0000100111******",
|
||||
"0000101*********",
|
||||
"000011**********",
|
||||
"0001000*********",
|
||||
"000100100*******",
|
||||
"00010010100*****",
|
||||
"0001001010100***",
|
||||
"0001001010101000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 4776,
|
||||
"max": 5224,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
11,
|
||||
14,
|
||||
16,
|
||||
18,
|
||||
20
|
||||
],
|
||||
"classes": [
|
||||
2,
|
||||
3,
|
||||
7,
|
||||
10,
|
||||
11,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"0001001010101***",
|
||||
"000100101011****",
|
||||
"0001001011******",
|
||||
"00010011********",
|
||||
"0001010000******",
|
||||
"00010100010*****",
|
||||
"0001010001100***",
|
||||
"0001010001101000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 5224,
|
||||
"max": 9048,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
16,
|
||||
18,
|
||||
20
|
||||
],
|
||||
"classes": [
|
||||
2,
|
||||
3,
|
||||
10,
|
||||
11,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"0001010001101***",
|
||||
"000101000111****",
|
||||
"000101001*******",
|
||||
"00010101********",
|
||||
"0001011*********",
|
||||
"00011***********",
|
||||
"0010000*********",
|
||||
"00100010********",
|
||||
"0010001100******",
|
||||
"001000110100****",
|
||||
"0010001101010***",
|
||||
"0010001101011000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 9048,
|
||||
"max": 43008,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
16,
|
||||
18,
|
||||
21
|
||||
],
|
||||
"classes": [
|
||||
2,
|
||||
3,
|
||||
10,
|
||||
11,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"0010001101011***",
|
||||
"00100011011*****",
|
||||
"001000111*******",
|
||||
"001001**********",
|
||||
"00101***********",
|
||||
"0011************",
|
||||
"01**************",
|
||||
"100*************",
|
||||
"10100***********",
|
||||
"1010100000000000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 43008,
|
||||
"max": 50384,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
9,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
16,
|
||||
18,
|
||||
21
|
||||
],
|
||||
"classes": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
10,
|
||||
11,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"10101***********",
|
||||
"1011************",
|
||||
"110000**********",
|
||||
"110001000*******",
|
||||
"1100010010******",
|
||||
"110001001100****",
|
||||
"1100010011010000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 50384,
|
||||
"max": 65536,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
9,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
16,
|
||||
18,
|
||||
22
|
||||
],
|
||||
"classes": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
10,
|
||||
11,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"110001001101****",
|
||||
"11000100111*****",
|
||||
"11000101********",
|
||||
"1100011*********",
|
||||
"11001***********",
|
||||
"1101************",
|
||||
"111*************"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "src_meta",
|
||||
"step": 1,
|
||||
"match": "exact",
|
||||
"method": "index",
|
||||
"key_size": 4,
|
||||
"data_size": 20
|
||||
},
|
||||
{
|
||||
"id": "protocl_range",
|
||||
"step": 2,
|
||||
"match": "ternary",
|
||||
"entries": 2,
|
||||
"key_size": 8,
|
||||
"ranges": [
|
||||
{
|
||||
"min": 0,
|
||||
"max": 0,
|
||||
"paths": [
|
||||
0,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22
|
||||
],
|
||||
"classes": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
7,
|
||||
8,
|
||||
10,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"00000000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"min": 0,
|
||||
"max": 256,
|
||||
"paths": [
|
||||
1,
|
||||
2,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22
|
||||
],
|
||||
"classes": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
7,
|
||||
8,
|
||||
10,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"********"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "protocl_meta",
|
||||
"step": 2,
|
||||
"match": "exact",
|
||||
"method": "index",
|
||||
"key_size": 1,
|
||||
"data_size": 20
|
||||
}
|
||||
]
|
700
example/priority_aware.json
Normal file
700
example/priority_aware.json
Normal file
@@ -0,0 +1,700 @@
|
||||
[
|
||||
{
|
||||
"id": "dst_range",
|
||||
"step": 0,
|
||||
"match": "ternary",
|
||||
"entries": 42,
|
||||
"key_size": 16,
|
||||
"ranges": [
|
||||
{
|
||||
"min": 0,
|
||||
"max": 2136,
|
||||
"paths": [
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6
|
||||
],
|
||||
"classes": [
|
||||
8,
|
||||
19,
|
||||
4
|
||||
],
|
||||
"prefixes": [
|
||||
"00000***********",
|
||||
"0000100000******",
|
||||
"000010000100****",
|
||||
"0000100001010***",
|
||||
"0000100001011000"
|
||||
],
|
||||
"prefix_type": "exact"
|
||||
},
|
||||
{
|
||||
"min": 2136,
|
||||
"max": 2224,
|
||||
"paths": [
|
||||
7
|
||||
],
|
||||
"classes": [
|
||||
11
|
||||
],
|
||||
"prefixes": [
|
||||
"0000100001011***",
|
||||
"00001000011*****",
|
||||
"00001000100*****",
|
||||
"000010001010****",
|
||||
"0000100010110000"
|
||||
],
|
||||
"prefix_type": "exact"
|
||||
},
|
||||
{
|
||||
"min": 2224,
|
||||
"max": 5016,
|
||||
"paths": [
|
||||
8,
|
||||
9
|
||||
],
|
||||
"classes": [
|
||||
1,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"0000************",
|
||||
"0001000*********",
|
||||
"00010010********",
|
||||
"000100110*******",
|
||||
"000100111000****",
|
||||
"0001001110010***",
|
||||
"0001001110011000"
|
||||
],
|
||||
"prefix_type": "zero"
|
||||
},
|
||||
{
|
||||
"min": 5016,
|
||||
"max": 25848,
|
||||
"paths": [
|
||||
10,
|
||||
11,
|
||||
12
|
||||
],
|
||||
"classes": [
|
||||
19,
|
||||
7
|
||||
],
|
||||
"prefixes": [
|
||||
"00**************",
|
||||
"010*************",
|
||||
"011000**********",
|
||||
"011001000*******",
|
||||
"0110010010******",
|
||||
"01100100110*****",
|
||||
"011001001110****",
|
||||
"0110010011110***",
|
||||
"0110010011111000"
|
||||
],
|
||||
"prefix_type": "zero"
|
||||
},
|
||||
{
|
||||
"min": 25848,
|
||||
"max": 47936,
|
||||
"paths": [
|
||||
10,
|
||||
11,
|
||||
13
|
||||
],
|
||||
"classes": [
|
||||
19,
|
||||
7
|
||||
],
|
||||
"prefixes": [
|
||||
"0***************",
|
||||
"100*************",
|
||||
"1010************",
|
||||
"10110***********",
|
||||
"1011100*********",
|
||||
"10111010********",
|
||||
"1011101100******",
|
||||
"1011101101000000"
|
||||
],
|
||||
"prefix_type": "zero"
|
||||
},
|
||||
{
|
||||
"min": 47936,
|
||||
"max": 47944,
|
||||
"paths": [
|
||||
14
|
||||
],
|
||||
"classes": [
|
||||
3
|
||||
],
|
||||
"prefixes": [
|
||||
"1011101101000***",
|
||||
"1011101101001000"
|
||||
],
|
||||
"prefix_type": "exact"
|
||||
},
|
||||
{
|
||||
"min": 47944,
|
||||
"max": 49152,
|
||||
"paths": [
|
||||
16,
|
||||
15
|
||||
],
|
||||
"classes": [
|
||||
10,
|
||||
7
|
||||
],
|
||||
"prefixes": [
|
||||
"0***************",
|
||||
"10**************",
|
||||
"1100000000000000"
|
||||
],
|
||||
"prefix_type": "zero"
|
||||
},
|
||||
{
|
||||
"min": 49152,
|
||||
"max": 49160,
|
||||
"paths": [
|
||||
17,
|
||||
18
|
||||
],
|
||||
"classes": [
|
||||
16,
|
||||
2
|
||||
],
|
||||
"prefixes": [
|
||||
"1100000000000***",
|
||||
"1100000000001000"
|
||||
],
|
||||
"prefix_type": "exact"
|
||||
},
|
||||
{
|
||||
"min": 49160,
|
||||
"max": 65536,
|
||||
"paths": [
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22
|
||||
],
|
||||
"classes": [
|
||||
17,
|
||||
19,
|
||||
15
|
||||
],
|
||||
"prefixes": [
|
||||
"****************"
|
||||
],
|
||||
"prefix_type": "zero"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "dst_meta",
|
||||
"step": 0,
|
||||
"match": "exact",
|
||||
"method": "index",
|
||||
"key_size": 4,
|
||||
"data_size": 20
|
||||
},
|
||||
{
|
||||
"id": "src_range",
|
||||
"step": 1,
|
||||
"match": "ternary",
|
||||
"entries": 56,
|
||||
"key_size": 16,
|
||||
"ranges": [
|
||||
{
|
||||
"min": 0,
|
||||
"max": 64,
|
||||
"paths": [
|
||||
0,
|
||||
1,
|
||||
7,
|
||||
8,
|
||||
10,
|
||||
14,
|
||||
15,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
3,
|
||||
7,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"0000000000******",
|
||||
"0000000001000000"
|
||||
],
|
||||
"prefix_type": "exact"
|
||||
},
|
||||
{
|
||||
"min": 64,
|
||||
"max": 128,
|
||||
"paths": [
|
||||
3,
|
||||
5,
|
||||
7,
|
||||
8,
|
||||
10,
|
||||
14,
|
||||
15,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
3,
|
||||
4,
|
||||
7,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"0000000001******",
|
||||
"0000000010000000"
|
||||
],
|
||||
"prefix_type": "exact"
|
||||
},
|
||||
{
|
||||
"min": 128,
|
||||
"max": 280,
|
||||
"paths": [
|
||||
3,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
10,
|
||||
14,
|
||||
15,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
3,
|
||||
4,
|
||||
7,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"000000001*******",
|
||||
"000000010000****",
|
||||
"0000000100010***",
|
||||
"0000000100011000"
|
||||
],
|
||||
"prefix_type": "exact"
|
||||
},
|
||||
{
|
||||
"min": 280,
|
||||
"max": 816,
|
||||
"paths": [
|
||||
3,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
11,
|
||||
14,
|
||||
15,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
3,
|
||||
4,
|
||||
7,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"0000000*********",
|
||||
"00000010********",
|
||||
"00000011000*****",
|
||||
"000000110010****",
|
||||
"0000001100110000"
|
||||
],
|
||||
"prefix_type": "zero"
|
||||
},
|
||||
{
|
||||
"min": 816,
|
||||
"max": 1576,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
11,
|
||||
14,
|
||||
15,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
3,
|
||||
7,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"000000**********",
|
||||
"0000010*********",
|
||||
"00000110000*****",
|
||||
"0000011000100***",
|
||||
"0000011000101000"
|
||||
],
|
||||
"prefix_type": "zero"
|
||||
},
|
||||
{
|
||||
"min": 1576,
|
||||
"max": 2488,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
11,
|
||||
14,
|
||||
15,
|
||||
18,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
2,
|
||||
3,
|
||||
7,
|
||||
11,
|
||||
15,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"00000***********",
|
||||
"00001000********",
|
||||
"000010010*******",
|
||||
"00001001100*****",
|
||||
"000010011010****",
|
||||
"0000100110110***",
|
||||
"0000100110111000"
|
||||
],
|
||||
"prefix_type": "zero"
|
||||
},
|
||||
{
|
||||
"min": 2488,
|
||||
"max": 4776,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
11,
|
||||
14,
|
||||
16,
|
||||
18,
|
||||
19
|
||||
],
|
||||
"classes": [
|
||||
2,
|
||||
3,
|
||||
7,
|
||||
10,
|
||||
11,
|
||||
15,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"0000************",
|
||||
"0001000*********",
|
||||
"000100100*******",
|
||||
"00010010100*****",
|
||||
"0001001010100***",
|
||||
"0001001010101000"
|
||||
],
|
||||
"prefix_type": "zero"
|
||||
},
|
||||
{
|
||||
"min": 4776,
|
||||
"max": 5224,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
11,
|
||||
14,
|
||||
16,
|
||||
18,
|
||||
20
|
||||
],
|
||||
"classes": [
|
||||
2,
|
||||
3,
|
||||
7,
|
||||
10,
|
||||
11,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"0000************",
|
||||
"000100**********",
|
||||
"0001010000******",
|
||||
"00010100010*****",
|
||||
"0001010001100***",
|
||||
"0001010001101000"
|
||||
],
|
||||
"prefix_type": "zero"
|
||||
},
|
||||
{
|
||||
"min": 5224,
|
||||
"max": 9048,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
16,
|
||||
18,
|
||||
20
|
||||
],
|
||||
"classes": [
|
||||
2,
|
||||
3,
|
||||
10,
|
||||
11,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"000*************",
|
||||
"0010000*********",
|
||||
"00100010********",
|
||||
"0010001100******",
|
||||
"001000110100****",
|
||||
"0010001101010***",
|
||||
"0010001101011000"
|
||||
],
|
||||
"prefix_type": "zero"
|
||||
},
|
||||
{
|
||||
"min": 9048,
|
||||
"max": 43008,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
16,
|
||||
18,
|
||||
21
|
||||
],
|
||||
"classes": [
|
||||
2,
|
||||
3,
|
||||
10,
|
||||
11,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"0***************",
|
||||
"100*************",
|
||||
"10100***********",
|
||||
"1010100000000000"
|
||||
],
|
||||
"prefix_type": "zero"
|
||||
},
|
||||
{
|
||||
"min": 43008,
|
||||
"max": 50384,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
9,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
16,
|
||||
18,
|
||||
21
|
||||
],
|
||||
"classes": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
10,
|
||||
11,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"10101***********",
|
||||
"1011************",
|
||||
"110000**********",
|
||||
"110001000*******",
|
||||
"1100010010******",
|
||||
"110001001100****",
|
||||
"1100010011010000"
|
||||
],
|
||||
"prefix_type": "exact"
|
||||
},
|
||||
{
|
||||
"min": 50384,
|
||||
"max": 65536,
|
||||
"paths": [
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
9,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
16,
|
||||
18,
|
||||
22
|
||||
],
|
||||
"classes": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
10,
|
||||
11,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"****************"
|
||||
],
|
||||
"prefix_type": "zero"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "src_meta",
|
||||
"step": 1,
|
||||
"match": "exact",
|
||||
"method": "index",
|
||||
"key_size": 4,
|
||||
"data_size": 20
|
||||
},
|
||||
{
|
||||
"id": "protocl_range",
|
||||
"step": 2,
|
||||
"match": "ternary",
|
||||
"entries": 2,
|
||||
"key_size": 8,
|
||||
"ranges": [
|
||||
{
|
||||
"min": 0,
|
||||
"max": 0,
|
||||
"paths": [
|
||||
0,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22
|
||||
],
|
||||
"classes": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
7,
|
||||
8,
|
||||
10,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"00000000"
|
||||
],
|
||||
"prefix_type": "exact"
|
||||
},
|
||||
{
|
||||
"min": 0,
|
||||
"max": 256,
|
||||
"paths": [
|
||||
1,
|
||||
2,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22
|
||||
],
|
||||
"classes": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
7,
|
||||
8,
|
||||
10,
|
||||
11,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"prefixes": [
|
||||
"********"
|
||||
],
|
||||
"prefix_type": "exact"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "protocl_meta",
|
||||
"step": 2,
|
||||
"match": "exact",
|
||||
"method": "index",
|
||||
"key_size": 1,
|
||||
"data_size": 20
|
||||
}
|
||||
]
|
744
example/tree.json
Normal file
744
example/tree.json
Normal file
@@ -0,0 +1,744 @@
|
||||
{
|
||||
"features": {
|
||||
"dst": [
|
||||
47936.0,
|
||||
2128.0,
|
||||
5024.0,
|
||||
2224.0,
|
||||
25856.0,
|
||||
47936.0,
|
||||
49168.0,
|
||||
49152.0
|
||||
],
|
||||
"src": [
|
||||
64.0,
|
||||
64.0,
|
||||
816.0,
|
||||
128.0,
|
||||
43008.0,
|
||||
5232.0,
|
||||
288.0,
|
||||
2480.0,
|
||||
1584.0,
|
||||
9040.0,
|
||||
4784.0,
|
||||
50384.0
|
||||
],
|
||||
"protocl": [
|
||||
0.0,
|
||||
0.0
|
||||
]
|
||||
},
|
||||
"paths": [
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 2128.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": "<=",
|
||||
"value": 64.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": "<=",
|
||||
"value": 64.0
|
||||
},
|
||||
{
|
||||
"feature": "protocl",
|
||||
"operation": "<=",
|
||||
"value": 0.0
|
||||
}
|
||||
],
|
||||
"classification": 19,
|
||||
"id": 0
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 2128.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": "<=",
|
||||
"value": 64.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": "<=",
|
||||
"value": 64.0
|
||||
},
|
||||
{
|
||||
"feature": "protocl",
|
||||
"operation": ">",
|
||||
"value": 0.0
|
||||
}
|
||||
],
|
||||
"classification": 19,
|
||||
"id": 1
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 2128.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": "<=",
|
||||
"value": 64.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": ">",
|
||||
"value": 64.0
|
||||
}
|
||||
],
|
||||
"classification": 8,
|
||||
"id": 2
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 2128.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": ">",
|
||||
"value": 64.0
|
||||
},
|
||||
{
|
||||
"feature": "protocl",
|
||||
"operation": "<=",
|
||||
"value": 0.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": "<=",
|
||||
"value": 816.0
|
||||
}
|
||||
],
|
||||
"classification": 4,
|
||||
"id": 3
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 2128.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": ">",
|
||||
"value": 64.0
|
||||
},
|
||||
{
|
||||
"feature": "protocl",
|
||||
"operation": "<=",
|
||||
"value": 0.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": ">",
|
||||
"value": 816.0
|
||||
}
|
||||
],
|
||||
"classification": 19,
|
||||
"id": 4
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 2128.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": ">",
|
||||
"value": 64.0
|
||||
},
|
||||
{
|
||||
"feature": "protocl",
|
||||
"operation": ">",
|
||||
"value": 0.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": "<=",
|
||||
"value": 128.0
|
||||
}
|
||||
],
|
||||
"classification": 19,
|
||||
"id": 5
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 2128.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": ">",
|
||||
"value": 64.0
|
||||
},
|
||||
{
|
||||
"feature": "protocl",
|
||||
"operation": ">",
|
||||
"value": 0.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": ">",
|
||||
"value": 128.0
|
||||
}
|
||||
],
|
||||
"classification": 19,
|
||||
"id": 6
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 2128.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 5024.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 2224.0
|
||||
}
|
||||
],
|
||||
"classification": 11,
|
||||
"id": 7
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 2128.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 5024.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 2224.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": "<=",
|
||||
"value": 43008.0
|
||||
}
|
||||
],
|
||||
"classification": 19,
|
||||
"id": 8
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 2128.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 5024.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 2224.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": ">",
|
||||
"value": 43008.0
|
||||
}
|
||||
],
|
||||
"classification": 1,
|
||||
"id": 9
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 2128.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 5024.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": "<=",
|
||||
"value": 5232.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": "<=",
|
||||
"value": 288.0
|
||||
}
|
||||
],
|
||||
"classification": 19,
|
||||
"id": 10
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 2128.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 5024.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": "<=",
|
||||
"value": 5232.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": ">",
|
||||
"value": 288.0
|
||||
}
|
||||
],
|
||||
"classification": 7,
|
||||
"id": 11
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 2128.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 5024.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": ">",
|
||||
"value": 5232.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 25856.0
|
||||
}
|
||||
],
|
||||
"classification": 19,
|
||||
"id": 12
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 2128.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 5024.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": ">",
|
||||
"value": 5232.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 25856.0
|
||||
}
|
||||
],
|
||||
"classification": 19,
|
||||
"id": 13
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 47936.0
|
||||
}
|
||||
],
|
||||
"classification": 3,
|
||||
"id": 14
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 49168.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 49152.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": "<=",
|
||||
"value": 2480.0
|
||||
}
|
||||
],
|
||||
"classification": 7,
|
||||
"id": 15
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 49168.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 49152.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": ">",
|
||||
"value": 2480.0
|
||||
}
|
||||
],
|
||||
"classification": 10,
|
||||
"id": 16
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 49168.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 49152.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": "<=",
|
||||
"value": 1584.0
|
||||
}
|
||||
],
|
||||
"classification": 16,
|
||||
"id": 17
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": "<=",
|
||||
"value": 49168.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 49152.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": ">",
|
||||
"value": 1584.0
|
||||
}
|
||||
],
|
||||
"classification": 2,
|
||||
"id": 18
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 49168.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": "<=",
|
||||
"value": 9040.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": "<=",
|
||||
"value": 4784.0
|
||||
}
|
||||
],
|
||||
"classification": 15,
|
||||
"id": 19
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 49168.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": "<=",
|
||||
"value": 9040.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": ">",
|
||||
"value": 4784.0
|
||||
}
|
||||
],
|
||||
"classification": 17,
|
||||
"id": 20
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 49168.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": ">",
|
||||
"value": 9040.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": "<=",
|
||||
"value": 50384.0
|
||||
}
|
||||
],
|
||||
"classification": 19,
|
||||
"id": 21
|
||||
},
|
||||
{
|
||||
"conditions": [
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 47936.0
|
||||
},
|
||||
{
|
||||
"feature": "dst",
|
||||
"operation": ">",
|
||||
"value": 49168.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": ">",
|
||||
"value": 9040.0
|
||||
},
|
||||
{
|
||||
"feature": "src",
|
||||
"operation": ">",
|
||||
"value": 50384.0
|
||||
}
|
||||
],
|
||||
"classification": 19,
|
||||
"id": 22
|
||||
}
|
||||
],
|
||||
"classes": [
|
||||
"Amazon Echo",
|
||||
"Belkin Motion Sensor",
|
||||
"Belkin Switch",
|
||||
"Dropcam",
|
||||
"HP Printer",
|
||||
"LiFX Bulb",
|
||||
"NEST Smoke Sensor",
|
||||
"Netatmo Camera",
|
||||
"Netatmo Weather station",
|
||||
"Pixstart photo frame",
|
||||
"Samsung Smart Cam",
|
||||
"Smart Things",
|
||||
"TP-Link Camera",
|
||||
"TP-Link Plug",
|
||||
"Triby Speaker",
|
||||
"Withings",
|
||||
"Withings Scale",
|
||||
"Withings sleep sensor",
|
||||
"iHome PowerPlug",
|
||||
"other"
|
||||
]
|
||||
}
|
47
example/worst_case_rmt.json
Normal file
47
example/worst_case_rmt.json
Normal file
@@ -0,0 +1,47 @@
|
||||
[
|
||||
{
|
||||
"id": "dst_range",
|
||||
"step": 0,
|
||||
"match": "ternary",
|
||||
"entries": 288,
|
||||
"key_size": 16
|
||||
},
|
||||
{
|
||||
"id": "dst_meta",
|
||||
"step": 0,
|
||||
"match": "exact",
|
||||
"method": "index",
|
||||
"key_size": 4,
|
||||
"data_size": 20
|
||||
},
|
||||
{
|
||||
"id": "src_range",
|
||||
"step": 1,
|
||||
"match": "ternary",
|
||||
"entries": 384,
|
||||
"key_size": 16
|
||||
},
|
||||
{
|
||||
"id": "src_meta",
|
||||
"step": 1,
|
||||
"match": "exact",
|
||||
"method": "index",
|
||||
"key_size": 4,
|
||||
"data_size": 20
|
||||
},
|
||||
{
|
||||
"id": "protocl_range",
|
||||
"step": 2,
|
||||
"match": "ternary",
|
||||
"entries": 32,
|
||||
"key_size": 8
|
||||
},
|
||||
{
|
||||
"id": "protocl_meta",
|
||||
"step": 2,
|
||||
"match": "exact",
|
||||
"method": "index",
|
||||
"key_size": 1,
|
||||
"data_size": 20
|
||||
}
|
||||
]
|
80
extract_all_datasets.py
Normal file
80
extract_all_datasets.py
Normal file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python3
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from labels import mac_to_label
|
||||
from tqdm import tqdm
|
||||
import os
|
||||
|
||||
ROOT = Path(__file__).resolve().parent
|
||||
PCAP_DIR = ROOT / "data" / "pcap"
|
||||
CSV_DIR = ROOT / "data" / "processed"
|
||||
CSV_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
BATCH = 100_000 # packets per chunk
|
||||
|
||||
from scapy.all import rdpcap
|
||||
|
||||
|
||||
def process_pcap(pcap_path: str, csv_path: str) -> None:
|
||||
all_packets = rdpcap(pcap_path)
|
||||
|
||||
print("rdpcap done", flush=True)
|
||||
results = []
|
||||
for packet in tqdm(all_packets):
|
||||
size = len(packet)
|
||||
try:
|
||||
proto = packet.proto
|
||||
except AttributeError:
|
||||
proto = 0
|
||||
try:
|
||||
sport = packet.sport
|
||||
dport = packet.dport
|
||||
except AttributeError:
|
||||
sport = 0
|
||||
dport = 0
|
||||
|
||||
proto = int(proto)
|
||||
sport = int(sport)
|
||||
dport = int(dport)
|
||||
|
||||
if "Ether" in packet:
|
||||
eth_dst = packet["Ether"].dst
|
||||
if eth_dst in mac_to_label:
|
||||
classification = mac_to_label[eth_dst]
|
||||
else:
|
||||
classification = "other"
|
||||
else:
|
||||
classification = "other"
|
||||
|
||||
metric = [proto,sport,dport,classification]
|
||||
results.append(metric)
|
||||
results = (np.array(results)).T
|
||||
|
||||
# store the features in the dataframe
|
||||
dataframe = pd.DataFrame({'protocl':results[0],'src':results[1],'dst':results[2],'classfication':results[3]})
|
||||
columns = ['protocl','src','dst','classfication']
|
||||
|
||||
# save the dataframe to the csv file, if not exsit, create one.
|
||||
if os.path.exists(csv_path):
|
||||
dataframe.to_csv(csv_path,index=False,sep=',',mode='a',columns = columns, header=False)
|
||||
else:
|
||||
dataframe.to_csv(csv_path,index=False,sep=',',columns = columns)
|
||||
|
||||
print("Done")
|
||||
|
||||
|
||||
|
||||
def main() -> None:
|
||||
for pcap in sorted(PCAP_DIR.rglob("*.pcap")):
|
||||
rel_csv = pcap.relative_to(PCAP_DIR).with_suffix(".csv")
|
||||
csv_path = CSV_DIR / rel_csv
|
||||
if csv_path.exists():
|
||||
print(f"Skip {rel_csv} (CSV exists)")
|
||||
continue
|
||||
print(f"Processing {rel_csv}")
|
||||
csv_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
process_pcap(str(pcap), str(csv_path))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
50
extract_tars.sh
Normal file
50
extract_tars.sh
Normal file
@@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env bash
|
||||
# Usage: extract_all.sh SOURCE_DIR TARGET_DIR
|
||||
# For every .tar, .tar.gz, .tgz, .tar.bz2, .tar.xz in SOURCE_DIR:
|
||||
# 1. Create TARGET_DIR/<name>/
|
||||
# 2. If TARGET_DIR/<name>/<name>.pcap already exists, skip the archive.
|
||||
# 3. Otherwise, extract the archive into its own folder.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
if [[ $# -ne 2 ]]; then
|
||||
echo "Usage: $0 SOURCE_DIR TARGET_DIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
src_dir="$1"
|
||||
dst_dir="$2"
|
||||
mkdir -p "$dst_dir"
|
||||
|
||||
# Strip common extensions to recover the base name
|
||||
strip_ext() {
|
||||
local n="$1"
|
||||
n=${n%.tar.gz}; n=${n%.tgz}; n=${n%.tar.bz2}; n=${n%.tar.xz}; n=${n%.tar}
|
||||
echo "$n"
|
||||
}
|
||||
|
||||
shopt -s nullglob
|
||||
for archive in "$src_dir"/*.tar{,.gz,.bz2,.xz} "$src_dir"/*.tgz; do
|
||||
base=$(basename "$archive")
|
||||
name=$(strip_ext "$base")
|
||||
out_dir="$dst_dir/$name"
|
||||
key_file="$out_dir/$name.pcap"
|
||||
|
||||
if [[ -f "$key_file" ]]; then
|
||||
echo "Skipping $archive — $key_file already present"
|
||||
continue
|
||||
fi
|
||||
|
||||
echo "Extracting $archive into $out_dir"
|
||||
mkdir -p "$out_dir"
|
||||
|
||||
case "$archive" in
|
||||
*.tar) tar -xf "$archive" -C "$out_dir" ;;
|
||||
*.tar.gz|*.tgz) tar -xzf "$archive" -C "$out_dir" ;;
|
||||
*.tar.bz2) tar -xjf "$archive" -C "$out_dir" ;;
|
||||
*.tar.xz) tar -xJf "$archive" -C "$out_dir" ;;
|
||||
*) echo "Unknown type: $archive" ;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "All archives processed."
|
@@ -3,4 +3,5 @@ numpy
|
||||
pandas
|
||||
scikit-learn
|
||||
pydotplus
|
||||
matplotlib
|
||||
matplotlib
|
||||
scipy
|
44
sanity_check/csvdiff.py
Normal file
44
sanity_check/csvdiff.py
Normal file
@@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
csvdiff.py file1.csv file2.csv
|
||||
Streams both files; prints the first differing line or
|
||||
‘No differences found’. Uses O(1) memory.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from itertools import zip_longest
|
||||
from pathlib import Path
|
||||
|
||||
def open_checked(p: str):
|
||||
print(p)
|
||||
path = Path(p)
|
||||
try:
|
||||
return path.open("r", newline=""), path
|
||||
except FileNotFoundError:
|
||||
sys.exit(f"Error: {path} not found")
|
||||
|
||||
def human(n: int) -> str:
|
||||
return f"{n:,}"
|
||||
|
||||
def main(a_path: str, b_path: str) -> None:
|
||||
fa, a = open_checked(a_path)
|
||||
fb, b = open_checked(b_path)
|
||||
|
||||
with fa, fb:
|
||||
for idx, (ra, rb) in enumerate(zip_longest(fa, fb), 1):
|
||||
if ra != rb:
|
||||
print(f"Files differ at line {human(idx)}")
|
||||
if ra is None:
|
||||
print(f"{a} ended early")
|
||||
elif rb is None:
|
||||
print(f"{b} ended early")
|
||||
else:
|
||||
print(f"{a}: {ra.rstrip()}")
|
||||
print(f"{b}: {rb.rstrip()}")
|
||||
return
|
||||
print("No differences found")
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
sys.exit("Usage: csvdiff.py file1.csv file2.csv")
|
||||
main(sys.argv[1], sys.argv[2])
|
600
sanity_check/data_visualization.ipynb
Normal file
600
sanity_check/data_visualization.ipynb
Normal file
File diff suppressed because one or more lines are too long
206
sanity_check/diversity_metrics.py
Normal file
206
sanity_check/diversity_metrics.py
Normal file
@@ -0,0 +1,206 @@
|
||||
#!/usr/bin/env python3
|
||||
"""diversity_metrics.py (fast version)
|
||||
|
||||
Estimate how much diversity each CSV adds without building a giant in‑memory
|
||||
DataFrame. Designed for IoT packet logs with millions of rows.
|
||||
|
||||
Quick summary printed as a GitHub‑style table (requires *tabulate*; falls back
|
||||
to pandas plain text).
|
||||
|
||||
Usage
|
||||
-----
|
||||
python diversity_metrics.py path/to/processed_dir [-r] [--sample 50000]
|
||||
|
||||
Metrics
|
||||
-------
|
||||
ΔEntropy : change in Shannon entropy of *classification* counts
|
||||
ΔGini : change in Gini impurity of the same counts
|
||||
χ² p : Pearson χ² p‑value old vs new classification counts
|
||||
Jaccard : similarity of unique (src,dst) pairs (0 → new pairs, 1 → no new)
|
||||
KS src p : Kolmogorov–Smirnov p‑value, source‑port dist (uses sampling)
|
||||
KS dst p : Kolmogorov–Smirnov p‑value, dest‑port dist (uses sampling)
|
||||
|
||||
Speed tricks
|
||||
------------
|
||||
* No growing DataFrame; we keep Counters / sets / lists.
|
||||
* Ports for KS are *sampled* (default 50 k) to bound cost.
|
||||
* (src,dst) pairs are hashed to a 32‑bit int to reduce set overhead.
|
||||
* pandas reads via **pyarrow** engine when available.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from collections import Counter
|
||||
from typing import List, Set
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from scipy.stats import chi2_contingency, ks_2samp, entropy
|
||||
|
||||
try:
|
||||
from tabulate import tabulate
|
||||
_USE_TABULATE = True
|
||||
except ImportError:
|
||||
_USE_TABULATE = False
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Helper metrics
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
def shannon(counts: Counter) -> float:
|
||||
total = sum(counts.values())
|
||||
if total == 0:
|
||||
return 0.0
|
||||
p = np.fromiter(counts.values(), dtype=float)
|
||||
p /= total
|
||||
return entropy(p, base=2)
|
||||
|
||||
|
||||
def gini(counts: Counter) -> float:
|
||||
total = sum(counts.values())
|
||||
if total == 0:
|
||||
return 0.0
|
||||
return 1.0 - sum((n / total) ** 2 for n in counts.values())
|
||||
|
||||
|
||||
def jaccard(a: Set[int], b: Set[int]) -> float:
|
||||
if not a and not b:
|
||||
return 1.0
|
||||
return len(a & b) / len(a | b)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Core analysis
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
def analyse(csv_files: List[Path], sample_size: int):
|
||||
"""Return list of dicts with diversity metrics for each added file."""
|
||||
|
||||
# cumulative state (no big DataFrame!)
|
||||
class_counter: Counter = Counter()
|
||||
pair_hashes: Set[int] = set()
|
||||
src_list: List[int] = []
|
||||
dst_list: List[int] = []
|
||||
|
||||
rows = []
|
||||
|
||||
for csv_path in csv_files:
|
||||
df = pd.read_csv(
|
||||
csv_path,
|
||||
engine="pyarrow" if pd.__version__ >= "2" else "c", # fast parse
|
||||
usecols=["protocl", "src", "dst", "classfication"],
|
||||
dtype={
|
||||
"protocl": "uint16",
|
||||
"protocol": "uint16",
|
||||
"src": "uint16",
|
||||
"dst": "uint16",
|
||||
},
|
||||
)
|
||||
# normalise column names
|
||||
df.rename(columns={"protocl": "protocol", "classfication": "classification"}, inplace=True)
|
||||
|
||||
# snapshot previous state
|
||||
prev_class = class_counter.copy()
|
||||
prev_pairs = pair_hashes.copy()
|
||||
prev_src = np.asarray(src_list, dtype=np.uint16)
|
||||
prev_dst = np.asarray(dst_list, dtype=np.uint16)
|
||||
|
||||
# --- update cumulative structures ------------------------------------
|
||||
class_counter.update(df["classification"].value_counts().to_dict())
|
||||
|
||||
# hash (src,dst) into 32‑bit int to save memory
|
||||
pair_ids = (df["src"].to_numpy(dtype=np.uint32) << np.uint32(16)) | \
|
||||
df["dst"].to_numpy(dtype=np.uint32)
|
||||
|
||||
|
||||
# extend port lists (keep small ints)
|
||||
src_list.extend(df["src"].tolist())
|
||||
dst_list.extend(df["dst"].tolist())
|
||||
|
||||
# --- metrics ----------------------------------------------------------
|
||||
# χ² classification
|
||||
chi_p = np.nan
|
||||
if prev_class:
|
||||
all_classes = list(set(prev_class) | set(df["classification"].unique()))
|
||||
old = [prev_class.get(c, 0) for c in all_classes]
|
||||
new = [df["classification"].value_counts().get(c, 0) for c in all_classes]
|
||||
_, chi_p, _, _ = chi2_contingency([old, new])
|
||||
|
||||
# entropy & gini deltas
|
||||
delta_entropy = shannon(class_counter) - shannon(prev_class)
|
||||
delta_gini = gini(class_counter) - gini(prev_class)
|
||||
|
||||
# Jaccard on pair hashes
|
||||
jc = jaccard(prev_pairs, pair_hashes)
|
||||
|
||||
# KS tests on sampled ports
|
||||
ks_src_p = ks_dst_p = np.nan
|
||||
if prev_src.size:
|
||||
new_src = df["src"].to_numpy(dtype=np.uint16)
|
||||
new_dst = df["dst"].to_numpy(dtype=np.uint16)
|
||||
if prev_src.size > sample_size:
|
||||
prev_src_sample = np.random.choice(prev_src, sample_size, replace=False)
|
||||
else:
|
||||
prev_src_sample = prev_src
|
||||
if new_src.size > sample_size:
|
||||
new_src_sample = np.random.choice(new_src, sample_size, replace=False)
|
||||
else:
|
||||
new_src_sample = new_src
|
||||
if prev_dst.size > sample_size:
|
||||
prev_dst_sample = np.random.choice(prev_dst, sample_size, replace=False)
|
||||
else:
|
||||
prev_dst_sample = prev_dst
|
||||
if new_dst.size > sample_size:
|
||||
new_dst_sample = np.random.choice(new_dst, sample_size, replace=False)
|
||||
else:
|
||||
new_dst_sample = new_dst
|
||||
|
||||
ks_src_p = ks_2samp(prev_src_sample, new_src_sample).pvalue
|
||||
ks_dst_p = ks_2samp(prev_dst_sample, new_dst_sample).pvalue
|
||||
|
||||
rows.append(
|
||||
{
|
||||
"File": csv_path.name,
|
||||
"Rows": len(df),
|
||||
"ΔEntropy": round(delta_entropy, 4),
|
||||
"ΔGini": round(delta_gini, 4),
|
||||
"χ² p": f"{chi_p:.3g}" if not np.isnan(chi_p) else "NA",
|
||||
"Jaccard": round(jc, 3),
|
||||
"KS src p": f"{ks_src_p:.3g}" if not np.isnan(ks_src_p) else "NA",
|
||||
"KS dst p": f"{ks_dst_p:.3g}" if not np.isnan(ks_dst_p) else "NA",
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# CLI
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(description="Evaluate diversity contribution of each CSV (fast version).")
|
||||
ap.add_argument("csv_dir", help="Directory containing CSV files")
|
||||
ap.add_argument("-r", "--recursive", action="store_true", help="Recursively search csv_dir")
|
||||
ap.add_argument("--sample", type=int, default=50_000, help="Sample size for KS tests (default 50k)")
|
||||
args = ap.parse_args()
|
||||
|
||||
root = Path(args.csv_dir)
|
||||
pattern = "**/*.csv" if args.recursive else "*.csv"
|
||||
csv_files = sorted(root.glob(pattern))
|
||||
if not csv_files:
|
||||
print("No CSV files found.")
|
||||
return
|
||||
|
||||
table_rows = analyse(csv_files, args.sample)
|
||||
|
||||
if _USE_TABULATE:
|
||||
print(tabulate(table_rows, headers="keys", tablefmt="github", floatfmt=".4f"))
|
||||
else:
|
||||
print(pd.DataFrame(table_rows).to_string(index=False))
|
||||
|
||||
print(
|
||||
"\nLegend:\n • p-values (χ², KS) < 0.05 → new file significantly shifts distribution (GOOD)"
|
||||
"\n • Positive ΔEntropy or ΔGini → richer mix; near 0 → little new info"
|
||||
"\n • Jaccard close to 0 → many unseen (src,dst) pairs; close to 1 → redundant."
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
14
setup.sh
Normal file
14
setup.sh
Normal file
@@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env bash
|
||||
# Creates the directory layout:
|
||||
# data/
|
||||
# tar/
|
||||
# pcap/
|
||||
# processed/
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
root="$(cd -- "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
mkdir -p "$root"/data/{tar,pcap,processed,combined}
|
||||
|
||||
echo "Directory structure ready under $root/data/"
|
Reference in New Issue
Block a user