From 9729c6e68c0b1960ccb87b6b02a456480144d285 Mon Sep 17 00:00:00 2001
From: Arthur Lu <root@tronnet.net>
Date: Sun, 1 Jun 2025 07:27:02 +0000
Subject: [PATCH] add classifications to each feature's range

---
 RMTConvert.ipynb | 56 ++++++++++++++++++++++++++++++------------------
 1 file changed, 35 insertions(+), 21 deletions(-)

diff --git a/RMTConvert.ipynb b/RMTConvert.ipynb
index 4f0bc9a..17335bf 100644
--- a/RMTConvert.ipynb
+++ b/RMTConvert.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 9,
    "id": "ec310f34",
    "metadata": {},
    "outputs": [],
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 10,
    "id": "5b54797e",
    "metadata": {},
    "outputs": [],
@@ -28,7 +28,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 11,
    "id": "a38fdb8a",
    "metadata": {},
    "outputs": [],
@@ -57,7 +57,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 12,
    "id": "2fd4f738",
    "metadata": {},
    "outputs": [],
@@ -80,7 +80,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 13,
    "id": "98cde024",
    "metadata": {},
    "outputs": [],
@@ -120,7 +120,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 14,
    "id": "b6fbadbf",
    "metadata": {},
    "outputs": [],
@@ -143,14 +143,17 @@
     "\t\treturn x <= upper and x > lower\n",
     "\n",
     "# create buckets for each feature, where each is a list of sets\n",
-    "buckets = {}\n",
+    "buckets_id = {}\n",
+    "buckets_class = {}\n",
     "for feature in breakpoints:\n",
     "\tnum_points = len(breakpoints[feature])\n",
-    "\tbuckets[feature] = []\n",
+    "\tbuckets_id[feature] = []\n",
+    "\tbuckets_class[feature] = []\n",
     "\t# each index in the feature corresponds to the corresponding breakpoint value in breakpoints\n",
     "\t# each index holds a set, which is the membership of paths in that range\n",
     "\tfor i in range(0, num_points + 1):\n",
-    "\t\tbuckets[feature].append(set())\n",
+    "\t\tbuckets_id[feature].append(set())\n",
+    "\t\tbuckets_class[feature].append(set())\n",
     "\n",
     "# for each path\n",
     "for path in paths:\n",
@@ -160,19 +163,21 @@
     "\t\tlower = feature[\"min\"]\n",
     "\t\tupper = feature[\"max\"]\n",
     "\t\tID = path[\"id\"]\n",
+    "\t\tClass = path[\"classification\"]\n",
     "\n",
     "\t\t# for each bucket which encompases the condition's range, add this path's id to the sets \n",
     "\t\ti = 0\n",
     "\t\tfor bp in breakpoints[feature_name]:\n",
     "\t\t\tin_range = is_in_range(bp, lower, upper)\n",
     "\t\t\tif in_range:\n",
-    "\t\t\t\tbuckets[feature_name][i].add(ID)\n",
+    "\t\t\t\tbuckets_id[feature_name][i].add(ID)\n",
+    "\t\t\t\tbuckets_class[feature_name][i].add(Class)\n",
     "\t\t\ti += 1"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 15,
    "id": "0a767971",
    "metadata": {},
    "outputs": [],
@@ -180,35 +185,44 @@
     "# combine breakpoints and buckets to one representation\n",
     "\n",
     "compressed_tree = defaultdict(list)\n",
-    "for feature_name in buckets:\n",
+    "for feature_name in buckets_id:\n",
     "\tlower = None\n",
     "\tupper = breakpoints[feature_name][0]\n",
-    "\tmembers = buckets[feature_name][0]\n",
+    "\tpaths = buckets_id[feature_name][0]\n",
+    "\tclasses = buckets_class[feature_name][0]\n",
     "\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n",
-    "\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": list(members)})\n",
-    "\tfor i in range(1, len(buckets[feature_name]) - 1):\n",
+    "\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
+    "\tfor i in range(1, len(buckets_id[feature_name]) - 1):\n",
     "\t\tlower = breakpoints[feature_name][i-1]\n",
     "\t\tupper = breakpoints[feature_name][i]\n",
-    "\t\tmembers = buckets[feature_name][i]\n",
+    "\t\tmembers = buckets_id[feature_name][i]\n",
+    "\t\tclasses = buckets_class[feature_name][i]\n",
     "\t\t#print(f\"{feature_name} = [{lower}, {upper}]: {buckets[feature_name][i]}\")\n",
-    "\t\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": list(members)})\n",
+    "\t\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
     "\tlower = breakpoints[feature_name][len(breakpoints[feature_name]) - 1]\n",
     "\tupper = None\n",
-    "\tmembers = buckets[feature_name][len(buckets[feature_name]) - 1]\n",
+    "\tmembers = buckets_id[feature_name][len(buckets_id[feature_name]) - 1]\n",
+    "\tclasses = buckets_class[feature_name][len(buckets_class[feature_name]) - 1]\n",
     "\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n",
-    "\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": list(members)})\n",
+    "\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
     "\t#print(\"=\"*40)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 16,
    "id": "561b0bc1",
    "metadata": {},
    "outputs": [],
    "source": [
+    "class SetEncoder(json.JSONEncoder):\n",
+    "    def default(self, obj):\n",
+    "        if isinstance(obj, set):\n",
+    "            return list(obj)\n",
+    "        return json.JSONEncoder.default(self, obj)\n",
+    "\n",
     "f = open(\"compressed_tree.json\", \"w+\")\n",
-    "f.write(json.dumps(compressed_tree, indent = 4))\n",
+    "f.write(json.dumps(compressed_tree, indent = 4, cls=SetEncoder))\n",
     "f.close()"
    ]
   }