From 9729c6e68c0b1960ccb87b6b02a456480144d285 Mon Sep 17 00:00:00 2001 From: Arthur Lu Date: Sun, 1 Jun 2025 07:27:02 +0000 Subject: [PATCH] add classifications to each feature's range --- RMTConvert.ipynb | 56 ++++++++++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/RMTConvert.ipynb b/RMTConvert.ipynb index 4f0bc9a..17335bf 100644 --- a/RMTConvert.ipynb +++ b/RMTConvert.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 25, + "execution_count": 9, "id": "ec310f34", "metadata": {}, "outputs": [], @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 10, "id": "5b54797e", "metadata": {}, "outputs": [], @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 11, "id": "a38fdb8a", "metadata": {}, "outputs": [], @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 12, "id": "2fd4f738", "metadata": {}, "outputs": [], @@ -80,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 13, "id": "98cde024", "metadata": {}, "outputs": [], @@ -120,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 14, "id": "b6fbadbf", "metadata": {}, "outputs": [], @@ -143,14 +143,17 @@ "\t\treturn x <= upper and x > lower\n", "\n", "# create buckets for each feature, where each is a list of sets\n", - "buckets = {}\n", + "buckets_id = {}\n", + "buckets_class = {}\n", "for feature in breakpoints:\n", "\tnum_points = len(breakpoints[feature])\n", - "\tbuckets[feature] = []\n", + "\tbuckets_id[feature] = []\n", + "\tbuckets_class[feature] = []\n", "\t# each index in the feature corresponds to the corresponding breakpoint value in breakpoints\n", "\t# each index holds a set, which is the membership of paths in that range\n", "\tfor i in range(0, num_points + 1):\n", - "\t\tbuckets[feature].append(set())\n", + "\t\tbuckets_id[feature].append(set())\n", + "\t\tbuckets_class[feature].append(set())\n", "\n", "# for each path\n", "for path in paths:\n", @@ -160,19 +163,21 @@ "\t\tlower = feature[\"min\"]\n", "\t\tupper = feature[\"max\"]\n", "\t\tID = path[\"id\"]\n", + "\t\tClass = path[\"classification\"]\n", "\n", "\t\t# for each bucket which encompases the condition's range, add this path's id to the sets \n", "\t\ti = 0\n", "\t\tfor bp in breakpoints[feature_name]:\n", "\t\t\tin_range = is_in_range(bp, lower, upper)\n", "\t\t\tif in_range:\n", - "\t\t\t\tbuckets[feature_name][i].add(ID)\n", + "\t\t\t\tbuckets_id[feature_name][i].add(ID)\n", + "\t\t\t\tbuckets_class[feature_name][i].add(Class)\n", "\t\t\ti += 1" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 15, "id": "0a767971", "metadata": {}, "outputs": [], @@ -180,35 +185,44 @@ "# combine breakpoints and buckets to one representation\n", "\n", "compressed_tree = defaultdict(list)\n", - "for feature_name in buckets:\n", + "for feature_name in buckets_id:\n", "\tlower = None\n", "\tupper = breakpoints[feature_name][0]\n", - "\tmembers = buckets[feature_name][0]\n", + "\tpaths = buckets_id[feature_name][0]\n", + "\tclasses = buckets_class[feature_name][0]\n", "\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n", - "\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": list(members)})\n", - "\tfor i in range(1, len(buckets[feature_name]) - 1):\n", + "\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n", + "\tfor i in range(1, len(buckets_id[feature_name]) - 1):\n", "\t\tlower = breakpoints[feature_name][i-1]\n", "\t\tupper = breakpoints[feature_name][i]\n", - "\t\tmembers = buckets[feature_name][i]\n", + "\t\tmembers = buckets_id[feature_name][i]\n", + "\t\tclasses = buckets_class[feature_name][i]\n", "\t\t#print(f\"{feature_name} = [{lower}, {upper}]: {buckets[feature_name][i]}\")\n", - "\t\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": list(members)})\n", + "\t\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n", "\tlower = breakpoints[feature_name][len(breakpoints[feature_name]) - 1]\n", "\tupper = None\n", - "\tmembers = buckets[feature_name][len(buckets[feature_name]) - 1]\n", + "\tmembers = buckets_id[feature_name][len(buckets_id[feature_name]) - 1]\n", + "\tclasses = buckets_class[feature_name][len(buckets_class[feature_name]) - 1]\n", "\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n", - "\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": list(members)})\n", + "\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n", "\t#print(\"=\"*40)" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 16, "id": "561b0bc1", "metadata": {}, "outputs": [], "source": [ + "class SetEncoder(json.JSONEncoder):\n", + " def default(self, obj):\n", + " if isinstance(obj, set):\n", + " return list(obj)\n", + " return json.JSONEncoder.default(self, obj)\n", + "\n", "f = open(\"compressed_tree.json\", \"w+\")\n", - "f.write(json.dumps(compressed_tree, indent = 4))\n", + "f.write(json.dumps(compressed_tree, indent = 4, cls=SetEncoder))\n", "f.close()" ] }