add classifications to each feature's range

2025-09-06 07:17:23 +00:00 · 2025-06-01 07:27:02 +00:00
parent 2d80a4b94e
commit 9729c6e68c
1 changed files with 35 additions and 21 deletions
--- a/RMTConvert.ipynb
+++ b/RMTConvert.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 9,
   "id": "ec310f34",
   "metadata": {},
   "outputs": [],
@@ -14,7 +14,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 10,
   "id": "5b54797e",
   "metadata": {},
   "outputs": [],
@@ -28,7 +28,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 11,
   "id": "a38fdb8a",
   "metadata": {},
   "outputs": [],
@@ -57,7 +57,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 12,
   "id": "2fd4f738",
   "metadata": {},
   "outputs": [],
@@ -80,7 +80,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 13,
   "id": "98cde024",
   "metadata": {},
   "outputs": [],
@@ -120,7 +120,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 14,
   "id": "b6fbadbf",
   "metadata": {},
   "outputs": [],
@@ -143,14 +143,17 @@
    "\t\treturn x <= upper and x > lower\n",
    "\n",
    "# create buckets for each feature, where each is a list of sets\n",
-    "buckets = {}\n",
+    "buckets_id = {}\n",
+    "buckets_class = {}\n",
    "for feature in breakpoints:\n",
    "\tnum_points = len(breakpoints[feature])\n",
-    "\tbuckets[feature] = []\n",
+    "\tbuckets_id[feature] = []\n",
+    "\tbuckets_class[feature] = []\n",
    "\t# each index in the feature corresponds to the corresponding breakpoint value in breakpoints\n",
    "\t# each index holds a set, which is the membership of paths in that range\n",
    "\tfor i in range(0, num_points + 1):\n",
-    "\t\tbuckets[feature].append(set())\n",
+    "\t\tbuckets_id[feature].append(set())\n",
+    "\t\tbuckets_class[feature].append(set())\n",
    "\n",
    "# for each path\n",
    "for path in paths:\n",
@@ -160,19 +163,21 @@
    "\t\tlower = feature[\"min\"]\n",
    "\t\tupper = feature[\"max\"]\n",
    "\t\tID = path[\"id\"]\n",
+    "\t\tClass = path[\"classification\"]\n",
    "\n",
    "\t\t# for each bucket which encompases the condition's range, add this path's id to the sets \n",
    "\t\ti = 0\n",
    "\t\tfor bp in breakpoints[feature_name]:\n",
    "\t\t\tin_range = is_in_range(bp, lower, upper)\n",
    "\t\t\tif in_range:\n",
-    "\t\t\t\tbuckets[feature_name][i].add(ID)\n",
+    "\t\t\t\tbuckets_id[feature_name][i].add(ID)\n",
+    "\t\t\t\tbuckets_class[feature_name][i].add(Class)\n",
    "\t\t\ti += 1"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 15,
   "id": "0a767971",
   "metadata": {},
   "outputs": [],
@@ -180,35 +185,44 @@
    "# combine breakpoints and buckets to one representation\n",
    "\n",
    "compressed_tree = defaultdict(list)\n",
-    "for feature_name in buckets:\n",
+    "for feature_name in buckets_id:\n",
    "\tlower = None\n",
    "\tupper = breakpoints[feature_name][0]\n",
-    "\tmembers = buckets[feature_name][0]\n",
+    "\tpaths = buckets_id[feature_name][0]\n",
+    "\tclasses = buckets_class[feature_name][0]\n",
    "\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n",
-    "\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": list(members)})\n",
-    "\tfor i in range(1, len(buckets[feature_name]) - 1):\n",
+    "\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
+    "\tfor i in range(1, len(buckets_id[feature_name]) - 1):\n",
    "\t\tlower = breakpoints[feature_name][i-1]\n",
    "\t\tupper = breakpoints[feature_name][i]\n",
-    "\t\tmembers = buckets[feature_name][i]\n",
+    "\t\tmembers = buckets_id[feature_name][i]\n",
+    "\t\tclasses = buckets_class[feature_name][i]\n",
    "\t\t#print(f\"{feature_name} = [{lower}, {upper}]: {buckets[feature_name][i]}\")\n",
-    "\t\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": list(members)})\n",
+    "\t\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
    "\tlower = breakpoints[feature_name][len(breakpoints[feature_name]) - 1]\n",
    "\tupper = None\n",
-    "\tmembers = buckets[feature_name][len(buckets[feature_name]) - 1]\n",
+    "\tmembers = buckets_id[feature_name][len(buckets_id[feature_name]) - 1]\n",
+    "\tclasses = buckets_class[feature_name][len(buckets_class[feature_name]) - 1]\n",
    "\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n",
-    "\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": list(members)})\n",
+    "\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
    "\t#print(\"=\"*40)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 16,
   "id": "561b0bc1",
   "metadata": {},
   "outputs": [],
   "source": [
+    "class SetEncoder(json.JSONEncoder):\n",
+    "    def default(self, obj):\n",
+    "        if isinstance(obj, set):\n",
+    "            return list(obj)\n",
+    "        return json.JSONEncoder.default(self, obj)\n",
+    "\n",
    "f = open(\"compressed_tree.json\", \"w+\")\n",
-    "f.write(json.dumps(compressed_tree, indent = 4))\n",
+    "f.write(json.dumps(compressed_tree, indent = 4, cls=SetEncoder))\n",
    "f.close()"
   ]
  }