mirror of
https://github.com/ltcptgeneral/IdealRMT-DecisionTrees.git
synced 2025-09-06 07:17:23 +00:00
add classifications to each feature's range
This commit is contained in:
@@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 25,
|
"execution_count": 9,
|
||||||
"id": "ec310f34",
|
"id": "ec310f34",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -14,7 +14,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 26,
|
"execution_count": 10,
|
||||||
"id": "5b54797e",
|
"id": "5b54797e",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -28,7 +28,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 27,
|
"execution_count": 11,
|
||||||
"id": "a38fdb8a",
|
"id": "a38fdb8a",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -57,7 +57,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 28,
|
"execution_count": 12,
|
||||||
"id": "2fd4f738",
|
"id": "2fd4f738",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -80,7 +80,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 29,
|
"execution_count": 13,
|
||||||
"id": "98cde024",
|
"id": "98cde024",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -120,7 +120,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 30,
|
"execution_count": 14,
|
||||||
"id": "b6fbadbf",
|
"id": "b6fbadbf",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -143,14 +143,17 @@
|
|||||||
"\t\treturn x <= upper and x > lower\n",
|
"\t\treturn x <= upper and x > lower\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# create buckets for each feature, where each is a list of sets\n",
|
"# create buckets for each feature, where each is a list of sets\n",
|
||||||
"buckets = {}\n",
|
"buckets_id = {}\n",
|
||||||
|
"buckets_class = {}\n",
|
||||||
"for feature in breakpoints:\n",
|
"for feature in breakpoints:\n",
|
||||||
"\tnum_points = len(breakpoints[feature])\n",
|
"\tnum_points = len(breakpoints[feature])\n",
|
||||||
"\tbuckets[feature] = []\n",
|
"\tbuckets_id[feature] = []\n",
|
||||||
|
"\tbuckets_class[feature] = []\n",
|
||||||
"\t# each index in the feature corresponds to the corresponding breakpoint value in breakpoints\n",
|
"\t# each index in the feature corresponds to the corresponding breakpoint value in breakpoints\n",
|
||||||
"\t# each index holds a set, which is the membership of paths in that range\n",
|
"\t# each index holds a set, which is the membership of paths in that range\n",
|
||||||
"\tfor i in range(0, num_points + 1):\n",
|
"\tfor i in range(0, num_points + 1):\n",
|
||||||
"\t\tbuckets[feature].append(set())\n",
|
"\t\tbuckets_id[feature].append(set())\n",
|
||||||
|
"\t\tbuckets_class[feature].append(set())\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# for each path\n",
|
"# for each path\n",
|
||||||
"for path in paths:\n",
|
"for path in paths:\n",
|
||||||
@@ -160,19 +163,21 @@
|
|||||||
"\t\tlower = feature[\"min\"]\n",
|
"\t\tlower = feature[\"min\"]\n",
|
||||||
"\t\tupper = feature[\"max\"]\n",
|
"\t\tupper = feature[\"max\"]\n",
|
||||||
"\t\tID = path[\"id\"]\n",
|
"\t\tID = path[\"id\"]\n",
|
||||||
|
"\t\tClass = path[\"classification\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\t\t# for each bucket which encompases the condition's range, add this path's id to the sets \n",
|
"\t\t# for each bucket which encompases the condition's range, add this path's id to the sets \n",
|
||||||
"\t\ti = 0\n",
|
"\t\ti = 0\n",
|
||||||
"\t\tfor bp in breakpoints[feature_name]:\n",
|
"\t\tfor bp in breakpoints[feature_name]:\n",
|
||||||
"\t\t\tin_range = is_in_range(bp, lower, upper)\n",
|
"\t\t\tin_range = is_in_range(bp, lower, upper)\n",
|
||||||
"\t\t\tif in_range:\n",
|
"\t\t\tif in_range:\n",
|
||||||
"\t\t\t\tbuckets[feature_name][i].add(ID)\n",
|
"\t\t\t\tbuckets_id[feature_name][i].add(ID)\n",
|
||||||
|
"\t\t\t\tbuckets_class[feature_name][i].add(Class)\n",
|
||||||
"\t\t\ti += 1"
|
"\t\t\ti += 1"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 31,
|
"execution_count": 15,
|
||||||
"id": "0a767971",
|
"id": "0a767971",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -180,35 +185,44 @@
|
|||||||
"# combine breakpoints and buckets to one representation\n",
|
"# combine breakpoints and buckets to one representation\n",
|
||||||
"\n",
|
"\n",
|
||||||
"compressed_tree = defaultdict(list)\n",
|
"compressed_tree = defaultdict(list)\n",
|
||||||
"for feature_name in buckets:\n",
|
"for feature_name in buckets_id:\n",
|
||||||
"\tlower = None\n",
|
"\tlower = None\n",
|
||||||
"\tupper = breakpoints[feature_name][0]\n",
|
"\tupper = breakpoints[feature_name][0]\n",
|
||||||
"\tmembers = buckets[feature_name][0]\n",
|
"\tpaths = buckets_id[feature_name][0]\n",
|
||||||
|
"\tclasses = buckets_class[feature_name][0]\n",
|
||||||
"\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n",
|
"\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n",
|
||||||
"\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": list(members)})\n",
|
"\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
|
||||||
"\tfor i in range(1, len(buckets[feature_name]) - 1):\n",
|
"\tfor i in range(1, len(buckets_id[feature_name]) - 1):\n",
|
||||||
"\t\tlower = breakpoints[feature_name][i-1]\n",
|
"\t\tlower = breakpoints[feature_name][i-1]\n",
|
||||||
"\t\tupper = breakpoints[feature_name][i]\n",
|
"\t\tupper = breakpoints[feature_name][i]\n",
|
||||||
"\t\tmembers = buckets[feature_name][i]\n",
|
"\t\tmembers = buckets_id[feature_name][i]\n",
|
||||||
|
"\t\tclasses = buckets_class[feature_name][i]\n",
|
||||||
"\t\t#print(f\"{feature_name} = [{lower}, {upper}]: {buckets[feature_name][i]}\")\n",
|
"\t\t#print(f\"{feature_name} = [{lower}, {upper}]: {buckets[feature_name][i]}\")\n",
|
||||||
"\t\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": list(members)})\n",
|
"\t\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
|
||||||
"\tlower = breakpoints[feature_name][len(breakpoints[feature_name]) - 1]\n",
|
"\tlower = breakpoints[feature_name][len(breakpoints[feature_name]) - 1]\n",
|
||||||
"\tupper = None\n",
|
"\tupper = None\n",
|
||||||
"\tmembers = buckets[feature_name][len(buckets[feature_name]) - 1]\n",
|
"\tmembers = buckets_id[feature_name][len(buckets_id[feature_name]) - 1]\n",
|
||||||
|
"\tclasses = buckets_class[feature_name][len(buckets_class[feature_name]) - 1]\n",
|
||||||
"\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n",
|
"\t#print(f\"{feature_name} = [{lower}, {upper}]: {members}\")\n",
|
||||||
"\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": list(members)})\n",
|
"\tcompressed_tree[feature_name].append({\"min\": lower, \"max\": upper, \"paths\": paths, \"classes\": classes})\n",
|
||||||
"\t#print(\"=\"*40)"
|
"\t#print(\"=\"*40)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 32,
|
"execution_count": 16,
|
||||||
"id": "561b0bc1",
|
"id": "561b0bc1",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"class SetEncoder(json.JSONEncoder):\n",
|
||||||
|
" def default(self, obj):\n",
|
||||||
|
" if isinstance(obj, set):\n",
|
||||||
|
" return list(obj)\n",
|
||||||
|
" return json.JSONEncoder.default(self, obj)\n",
|
||||||
|
"\n",
|
||||||
"f = open(\"compressed_tree.json\", \"w+\")\n",
|
"f = open(\"compressed_tree.json\", \"w+\")\n",
|
||||||
"f.write(json.dumps(compressed_tree, indent = 4))\n",
|
"f.write(json.dumps(compressed_tree, indent = 4, cls=SetEncoder))\n",
|
||||||
"f.close()"
|
"f.close()"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user