extract lasso features

This commit is contained in:
ltcptgeneral 2023-12-04 10:22:45 -08:00
parent 4cf85a15dd
commit e5e37ebea9
2 changed files with 52 additions and 18 deletions

View File

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 31, "execution_count": 1,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -15,7 +15,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 32, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -64,7 +64,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 33, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -73,7 +73,7 @@
"97248" "97248"
] ]
}, },
"execution_count": 33, "execution_count": 3,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -97,7 +97,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 34, "execution_count": 4,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -111,7 +111,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 35, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -129,7 +129,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 36, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -169,7 +169,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 37, "execution_count": 7,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -190,18 +190,18 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 38, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"0.01 0.21632227671715168 0.6847807364903296\n", "0.01 0.21632227671897006 0.6847807364939246\n",
"0.1 0.2156867944836758 0.6829965387241808\n", "0.1 0.21568679448554554 0.6829965387278908\n",
"1 0.21316700811628655 0.6747810400313006\n", "1 0.21316700811782532 0.6747810400344023\n",
"10 0.2161776145305841 0.6681779252365153\n", "10 0.21617761453133164 0.6681779252378663\n",
"100 0.20723445731519957 0.5973124724751776\n" "100 0.20723445731522736 0.5973124724752429\n"
] ]
} }
], ],
@ -226,7 +226,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 39, "execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -236,7 +236,7 @@
"0.01 0.17702951629340366 0.538690243296189\n", "0.01 0.17702951629340366 0.538690243296189\n",
"0.1 0.177432503566242 0.5387345171140366\n", "0.1 0.177432503566242 0.5387345171140366\n",
"1 0.17743138596037397 0.538778156304091\n", "1 0.17743138596037397 0.538778156304091\n",
"10 0.17786269625555318 0.5396020974919651\n", "10 0.17786269625555318 0.539602097491965\n",
"100 0.17818192454918605 0.557911382661004\n" "100 0.17818192454918605 0.557911382661004\n"
] ]
} }
@ -257,6 +257,40 @@
"\n", "\n",
" print(C, mean_squared_error(Y_funny_test, pred_funny_test), mean_squared_error(Y_helpful_test, pred_helpful_test))" " print(C, mean_squared_error(Y_funny_test, pred_funny_test), mean_squared_error(Y_helpful_test, pred_helpful_test))"
] ]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-0.008666338118757945: you\n",
"-0.0033743830081447994: shit\n",
"0.002818066343305709: i\n",
"-0.0009495127061038011: 3\n",
"-0.0007321653144316716: it\n",
"0.0006758513650775456: bad\n",
"-0.0005113655228402811: nyan\n",
"-0.00045892382707229636: of\n",
"0.0003103226245059089: *review length*\n",
"-0.00026518360080170943: ͡°\n"
]
}
],
"source": [
"model = linear_model.Lasso(alpha=0.01, fit_intercept=True)\n",
"model.fit(X_train, Y_helpful_train)\n",
"idxs = np.argsort(np.abs(model.coef_))[::-1][:10]\n",
"\n",
"for idx in idxs:\n",
" if (idx < len(words)):\n",
" print(f\"{model.coef_[idx]}: {words[idx]}\")\n",
" else:\n",
" print(f\"{model.coef_[idx]}: *review length*\")"
]
} }
], ],
"metadata": { "metadata": {

View File

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 2,
"id": "9808cacf", "id": "9808cacf",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -16,7 +16,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 3,
"id": "494d6c25", "id": "494d6c25",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],