mirror of
https://github.com/ltcptgeneral/IdealRMT-DecisionTrees.git
synced 2025-09-06 07:17:23 +00:00
add classifications to each feature's range
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 9,
|
||||
"id": "ec310f34",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -14,7 +14,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 10,
|
||||
"id": "5b54797e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -28,7 +28,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 11,
|
||||
"id": "a38fdb8a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -57,7 +57,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 12,
|
||||
"id": "2fd4f738",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -80,7 +80,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 13,
|
||||
"id": "98cde024",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -120,7 +120,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"execution_count": 14,
|
||||
"id": "b6fbadbf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -143,14 +143,17 @@
|
||||
"\t\treturn x <= upper and x > lower\n",
|
||||
"\n",
|
||||
"# create buckets for each feature, where each is a list of sets\n",
|
||||
"buckets = {}\n",
|
||||
"buckets_id = {}\n",
|
||||
"buckets_class = {}\n",
|
||||
"for feature in breakpoints:\n",
|
||||
"\tnum_points = len(breakpoints[feature])\n",
|
||||
"\tbuckets[feature] = []\n",
|
||||
"\tbuckets_id[feature] = []\n",
|
||||
"\tbuckets_class[feature] = []\n",
|
||||
"\t# each index in the feature corresponds to the corresponding breakpoint value in breakpoints\n",
|
||||
"\t# each index holds a set, which is the membership of paths in that range\n",
|
||||
"\tfor i in range(0, num_points + 1):\n",
|
||||
"\t\tbuckets[feature].append(set())\n",
|
||||
"\t\tbuckets_id[feature].append(set())\n",
|
||||
"\t\tbuckets_class[feature].append(set())\n",
|
||||
"\n",
|
||||
"# for each path\n",
|
||||
"for path in paths:\n",
|
||||
@@ -160,19 +163,21 @@
|
||||
"\t\tlower = feature[\"min\"]\n",
|
||||
"\t\tupper = feature[\"max\"]\n",
|
||||
"\t\tID = path[\"id\"]\n",
|
||||
"\t\tClass = path[\"classification\"]\n",
|
||||
"\n",
|
||||
"\t\t# for each bucket which encompases the condition's range, add this path's id to the sets \n",
|
||||
"\t\ti = 0\n",
|
||||
"\t\tfor bp in breakpoints[feature_name]:\n",
|
||||
"\t\t\tin_range = is_in_range(bp, lower, upper)\n",
|
||||
"\t\t\tif in_range:\n",
|
||||
"\t\t\t\tbuckets[feature_name][i].add(ID)\n",
|
||||
"\t\t\t\tbuckets_id[feature_name][i].add(ID)\n",
|
||||
"\t\t\t\tbuckets_class[feature_name][i].add(Class)\n",
|
||||
"\t\t\ti += 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"execution_count": 15,
|
||||
"id": "0a767971",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -180,35 +185,44 @@
|
||||
"# combine breakpoints and buckets to one representation\n",
|
||||
"\n",
|
||||
"compressed_tree = defaultdict(list)\n",
|
||||
"for feature_name in buckets:\n",
|
||||
"for feature_name in buckets_id:\n",
|
||||
"\tlower = None\n",
|
||||
"\tupper = breakpoints[feature_name][0]\n",
|
||||
"\tmembers = buckets[feature_name][0]\n",
|
||||
"\tpaths = buckets_id[feature_name][0]\n",
|
||||
"\tclasses = buckets_class[feature_name][0]\n",
|
||||
"\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n",
|
||||
"\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": list(members)})\n",
|
||||
"\tfor i in range(1, len(buckets[feature_name]) - 1):\n",
|
||||
"\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
|
||||
"\tfor i in range(1, len(buckets_id[feature_name]) - 1):\n",
|
||||
"\t\tlower = breakpoints[feature_name][i-1]\n",
|
||||
"\t\tupper = breakpoints[feature_name][i]\n",
|
||||
"\t\tmembers = buckets[feature_name][i]\n",
|
||||
"\t\tmembers = buckets_id[feature_name][i]\n",
|
||||
"\t\tclasses = buckets_class[feature_name][i]\n",
|
||||
"\t\t#print(f\"{feature_name} = [{lower}, {upper}]: {buckets[feature_name][i]}\")\n",
|
||||
"\t\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": list(members)})\n",
|
||||
"\t\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
|
||||
"\tlower = breakpoints[feature_name][len(breakpoints[feature_name]) - 1]\n",
|
||||
"\tupper = None\n",
|
||||
"\tmembers = buckets[feature_name][len(buckets[feature_name]) - 1]\n",
|
||||
"\tmembers = buckets_id[feature_name][len(buckets_id[feature_name]) - 1]\n",
|
||||
"\tclasses = buckets_class[feature_name][len(buckets_class[feature_name]) - 1]\n",
|
||||
"\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n",
|
||||
"\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": list(members)})\n",
|
||||
"\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
|
||||
"\t#print(\"=\"*40)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"execution_count": 16,
|
||||
"id": "561b0bc1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class SetEncoder(json.JSONEncoder):\n",
|
||||
" def default(self, obj):\n",
|
||||
" if isinstance(obj, set):\n",
|
||||
" return list(obj)\n",
|
||||
" return json.JSONEncoder.default(self, obj)\n",
|
||||
"\n",
|
||||
"f = open(\"compressed_tree.json\", \"w+\")\n",
|
||||
"f.write(json.dumps(compressed_tree, indent = 4))\n",
|
||||
"f.write(json.dumps(compressed_tree, indent = 4, cls=SetEncoder))\n",
|
||||
"f.close()"
|
||||
]
|
||||
}
|
||||
|
Reference in New Issue
Block a user