add classifications to each feature's range

This commit is contained in:
2025-06-01 07:27:02 +00:00
parent 2d80a4b94e
commit 9729c6e68c

View File

@@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 25, "execution_count": 9,
"id": "ec310f34", "id": "ec310f34",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -14,7 +14,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 26, "execution_count": 10,
"id": "5b54797e", "id": "5b54797e",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -28,7 +28,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 27, "execution_count": 11,
"id": "a38fdb8a", "id": "a38fdb8a",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -57,7 +57,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 28, "execution_count": 12,
"id": "2fd4f738", "id": "2fd4f738",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -80,7 +80,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 29, "execution_count": 13,
"id": "98cde024", "id": "98cde024",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -120,7 +120,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 30, "execution_count": 14,
"id": "b6fbadbf", "id": "b6fbadbf",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -143,14 +143,17 @@
"\t\treturn x <= upper and x > lower\n", "\t\treturn x <= upper and x > lower\n",
"\n", "\n",
"# create buckets for each feature, where each is a list of sets\n", "# create buckets for each feature, where each is a list of sets\n",
"buckets = {}\n", "buckets_id = {}\n",
"buckets_class = {}\n",
"for feature in breakpoints:\n", "for feature in breakpoints:\n",
"\tnum_points = len(breakpoints[feature])\n", "\tnum_points = len(breakpoints[feature])\n",
"\tbuckets[feature] = []\n", "\tbuckets_id[feature] = []\n",
"\tbuckets_class[feature] = []\n",
"\t# each index in the feature corresponds to the corresponding breakpoint value in breakpoints\n", "\t# each index in the feature corresponds to the corresponding breakpoint value in breakpoints\n",
"\t# each index holds a set, which is the membership of paths in that range\n", "\t# each index holds a set, which is the membership of paths in that range\n",
"\tfor i in range(0, num_points + 1):\n", "\tfor i in range(0, num_points + 1):\n",
"\t\tbuckets[feature].append(set())\n", "\t\tbuckets_id[feature].append(set())\n",
"\t\tbuckets_class[feature].append(set())\n",
"\n", "\n",
"# for each path\n", "# for each path\n",
"for path in paths:\n", "for path in paths:\n",
@@ -160,19 +163,21 @@
"\t\tlower = feature[\"min\"]\n", "\t\tlower = feature[\"min\"]\n",
"\t\tupper = feature[\"max\"]\n", "\t\tupper = feature[\"max\"]\n",
"\t\tID = path[\"id\"]\n", "\t\tID = path[\"id\"]\n",
"\t\tClass = path[\"classification\"]\n",
"\n", "\n",
"\t\t# for each bucket which encompases the condition's range, add this path's id to the sets \n", "\t\t# for each bucket which encompases the condition's range, add this path's id to the sets \n",
"\t\ti = 0\n", "\t\ti = 0\n",
"\t\tfor bp in breakpoints[feature_name]:\n", "\t\tfor bp in breakpoints[feature_name]:\n",
"\t\t\tin_range = is_in_range(bp, lower, upper)\n", "\t\t\tin_range = is_in_range(bp, lower, upper)\n",
"\t\t\tif in_range:\n", "\t\t\tif in_range:\n",
"\t\t\t\tbuckets[feature_name][i].add(ID)\n", "\t\t\t\tbuckets_id[feature_name][i].add(ID)\n",
"\t\t\t\tbuckets_class[feature_name][i].add(Class)\n",
"\t\t\ti += 1" "\t\t\ti += 1"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 31, "execution_count": 15,
"id": "0a767971", "id": "0a767971",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -180,35 +185,44 @@
"# combine breakpoints and buckets to one representation\n", "# combine breakpoints and buckets to one representation\n",
"\n", "\n",
"compressed_tree = defaultdict(list)\n", "compressed_tree = defaultdict(list)\n",
"for feature_name in buckets:\n", "for feature_name in buckets_id:\n",
"\tlower = None\n", "\tlower = None\n",
"\tupper = breakpoints[feature_name][0]\n", "\tupper = breakpoints[feature_name][0]\n",
"\tmembers = buckets[feature_name][0]\n", "\tpaths = buckets_id[feature_name][0]\n",
"\tclasses = buckets_class[feature_name][0]\n",
"\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n", "\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n",
"\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": list(members)})\n", "\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
"\tfor i in range(1, len(buckets[feature_name]) - 1):\n", "\tfor i in range(1, len(buckets_id[feature_name]) - 1):\n",
"\t\tlower = breakpoints[feature_name][i-1]\n", "\t\tlower = breakpoints[feature_name][i-1]\n",
"\t\tupper = breakpoints[feature_name][i]\n", "\t\tupper = breakpoints[feature_name][i]\n",
"\t\tmembers = buckets[feature_name][i]\n", "\t\tmembers = buckets_id[feature_name][i]\n",
"\t\tclasses = buckets_class[feature_name][i]\n",
"\t\t#print(f\"{feature_name} = [{lower}, {upper}]: {buckets[feature_name][i]}\")\n", "\t\t#print(f\"{feature_name} = [{lower}, {upper}]: {buckets[feature_name][i]}\")\n",
"\t\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": list(members)})\n", "\t\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
"\tlower = breakpoints[feature_name][len(breakpoints[feature_name]) - 1]\n", "\tlower = breakpoints[feature_name][len(breakpoints[feature_name]) - 1]\n",
"\tupper = None\n", "\tupper = None\n",
"\tmembers = buckets[feature_name][len(buckets[feature_name]) - 1]\n", "\tmembers = buckets_id[feature_name][len(buckets_id[feature_name]) - 1]\n",
"\tclasses = buckets_class[feature_name][len(buckets_class[feature_name]) - 1]\n",
"\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n", "\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n",
"\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": list(members)})\n", "\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
"\t#print(\"=\"*40)" "\t#print(\"=\"*40)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 32, "execution_count": 16,
"id": "561b0bc1", "id": "561b0bc1",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"class SetEncoder(json.JSONEncoder):\n",
" def default(self, obj):\n",
" if isinstance(obj, set):\n",
" return list(obj)\n",
" return json.JSONEncoder.default(self, obj)\n",
"\n",
"f = open(\"compressed_tree.json\", \"w+\")\n", "f = open(\"compressed_tree.json\", \"w+\")\n",
"f.write(json.dumps(compressed_tree, indent = 4))\n", "f.write(json.dumps(compressed_tree, indent = 4, cls=SetEncoder))\n",
"f.close()" "f.close()"
] ]
} }