From 23f932ecf32c5ed3d34dce4c7138d20fcb2a9123 Mon Sep 17 00:00:00 2001 From: ltcptgeneral <35508619+ltcptgeneral@users.noreply.github.com> Date: Wed, 28 Nov 2018 16:35:47 -0600 Subject: [PATCH] analysis.py - v 1.0.7.000 changelog: - added tanh_regression (logistical regression) - bug fixes --- .../__pycache__/analysis.cpython-37.pyc | Bin 26015 -> 26781 bytes data analysis/analysis.py | 58 ++++++++++++++---- 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/data analysis/__pycache__/analysis.cpython-37.pyc b/data analysis/__pycache__/analysis.cpython-37.pyc index 0dd6a0207f411e038a51d0da2109502e9dca6b74..d56baaa48b46dbd5aa923f90ef16f92b3e57091c 100644 GIT binary patch delta 7872 zcmbVRdvKe_amN84-XO&X!iPwTpD4b-2l$pH5u!+u63IlODUlLIh!OjWa)HIV$+B6*%jgz|LCa%Xx+q7|;PSWOUlTqrpj>nyPnoRpg zW;*Wf0R|`-j5LOX-|fA2_x5)8?!a5$CpZ6*S9w3qsT=%EK`AC!AR-$S$?^z9cbeu!S61N889 z;@?LH=@5+APmj=}P#&PebOg#?I!ZqcrH_7w9)t2AJx<4 zq^F=|fS#sPP#&gd=-z1{2I(yI)3Y#Sh=%A4JqOegO6hqT0O}|$n=p8kReOiG-MiP@ z-rf^@hd*X4>FGJ(u(!%VN);-v2BI;x5R8OZ6?twc8q&hCsI0~0Oe+m)K~>TC9b-x2 zOU69A!yZ~y=Pfe$29vqj)mvBznDibGMGI7^H&F>1Uf#8*3 zI1-$VD6En{k=Npup@Rw7jT8acNW7W%6H+1?(>=PGDzi)TH~63JxKQcAp-l+Q2rUS$ z03HkT^7ewpav56$AXnWM2!x|yEfCn`mkT~ui_>*WaB)$I(rwx7D)eS6*8}su$}bfD zIwWKZEwU7_Gp}H-DnS;y$o4{oJ%q3iVL!qF0Nu72Q^ROzorIH$%Gf8gl@uD*?(kQN zzBSnkjUFpIh<$ws+X|M1V0H)x^&<=*$Z0^%qW63JuZqw4wzVKh^0{}S?qtgR5}OZe zK}~s!pLN$etB~I=glhhryUd5?XKe`G2xuyH7(r+>h?IOU2#wH7UP#<^f0OXt74Bx- zN!=a_D06d4NK-vIwGm=;{^OG1iCoJV0v_lHD6Ji#1ECY43qZG~m~YWXsDgU`4E*C$ z{ATIWZoyfsBFoP{V$b76H+=M9TOZ5w6eM2wE-}xB7^v3GeXi7z~uUQPyiS}@*`pt_OC$i+s1sMspEomSv zhK-=6_jjEp-XZ_d!y@P0PtF57N*{0DeWop!adU47`p8lKr@QO?TQUr)pM%9G@E8eL z6(%}r<_UKfxaIqJ#yJ1GJ^sOrWRdK1dPXw8Z08n}JY0KiB+D&6#w@d6;;`7xCC>7e zx_`JQuL!>vX)1G22w@o!x&0y|ryotcT}qiHkW%nq$CTnAWh9^QYxOsNo2O(XKd`4< z<#P?wTRbJhVtY?n=l|7^eHIn_aECsDZ#R&XeLj-F5#6)w=5{y#TaGv(5RbYOPb$;?E#Gq@LR3N zvt?Le*i+AYzttrp6UlF0jD@2bI|Vg%2EcDtLoudgCmtJg{afOF@5{t8jzfF-)9p3u zTNJ}a9K?ijd2ANx5W*CI-?A(&fPTx0C^PcdIUI8yA%JiJA&8K}Q*`H+d*ofhTRQd< zg@-yGs}R|10;bKw7e05t4aYMkH#W&X>?pS+*T;8tHkaq%%j_`Ho|x@?hLAS?LD#E! zB0&+|bV*rZ9Uf5Gb26$V~0dk2y(zAy|_!oQH z_sG~O0C!q1x)@fqSRfb;Mpjg~vBn~_g#Wy!gIrA%?=_Y=IM5G5)LZV9+o?S!jY7ycHI}q{# zzyKM!(kwhL{N`nRP=NGiUsd@A4wTa%7VW}nV#ZgL(!qf`Ts_!6C3J9R>Aub`yVa>e0+@e`D zYaFI*kRijl$~8;e5Vw>Y)~uS1I^w{TI!mFK7%s=B44OS|j*|`JnoV=0D0^Ir+v3SR zb&Z8E2jT}yOnDRVRd)i!aRnr91ooyfgBw=hHpHd%QW)VOV;-Zhwk~PG=tYlNY^_d$ zI+@i;NGCLVCqD-qZy13!d<_QUY8B24z$1exstT_nuP+h{1tV&o7dx062CX{fCONt& z8XASauUeYc8&~C4nW2`eW{*d1mKnz3s_P0&_9kC=jV$FJ=nsDDTqG9M_I5K|l;0d= zEV!auv{(d!KIt8qz{&DhN5*3rE`WX304~hdk$eQ*$mXOEs8Pq$NW2e*G^$75Eg-b z$gBl(BAY|NIDjr>$(}<%cDfW$mlks6brG@&{yT@BtwYYaOuJQ1oDcOMaz2WDljh)G z>@UsEB?Zv^8vpbDOUEjK0_XALcsz`8!?(f5?0`Y*#wV=Wc2aH zfg9^1nu$W#VK~7LOeTnlZt!*|c)2-l+c3?T)(mT8Eidlav}0HjL)-}z1SU`}pz@Hq z{D)J9WeS`m3IXZ{P7p&z;5D{pTk&ZwXm6h1pxa(D(JeFsk7Fh1CNwSu^V8sMMh2}n+mE%@qSEiv+2gB8_A z&~e{N4H&GDT2lu5h;nTf@}CS=?CG9@vwbap*@wp0EO7_sGhzJ3HCsG?)2*{6*-WU_VgyanhCEL#|DNH!;=suDhshI3M(+cIM0q$Q1cIlnvQcE z^a29T5Z(^M)!wC0Vkp&)CG7cB88>u1y9UYeO;E}qHBDgO* zNWQU*7|G5gO)L=F_tss;0t|R)FG~Eu5s6qt44>q8j+P9Z0ETd-z+{~K2}s0rab`{k0FnpJ&c1yES*FOr^0b+8{U^4fW^?$#3zUU+A@n{LcC_AcBhP_09=V~pH-rv ziwi+^X^>wSDJ8f0^2p7Wr*IG^ERll6mi>mb=)?hcy3b&`MxOI^GAa z>e9vV{6$@w3&SsfXk^xaUF4B$J8nYr!~EuGDcMYXWAss}u?+`{6^XU|9#%2e&tG$Y zefa0A*AFFqsF$A`^O6bv%-AULC4M~CLWX`9My;CUxa=uOno(SctjGYfinvROTPA~j z``AxcLhc|xI^Ix}$)>nIxb*n>`uIq5dWFI>bP~&%Mv>19|J8UvLW+OI>EWL};qI+^Pd)ODCQzy|a6LHRO?7n#=sw^+E zzOVE0iNh8VO)v7`gtq}NqTt$^vY^5x47~;Jg>bZJx;YeEfn7)XA}DQPbq;@BRp8e;VG0@N3lQqR3DrBOzDVH^2R3*=vNsdoo2(-f z-^H=0obCv#>e6f|5>!=IhOHR8x_uII!4bwF>d{gj6Z<|klL_6lpw4Hln!N#i-{gl* zcab-^diq1li%^G0#`07bd5eE-s@jLTFd_4wkd0*yfB4pttc^+Se}qHiG>9d<4%HuU z>zM}fI&VAk^l1!FkBtfTKg6NJy#Iof$T-NHl_LBzfXAqt;XbmY2;H-I!*A1}SQK)J z_6{$Yt|C9>-svY{bH6d&mY4Q7j|8sv-_!2>1ISZO11hImfdOor`afvnB4{zd6+(iG zgp0UI0dYaTVQ=$3|LNm>I8g@xGkzXvv0b@%I^Y5vrGbd1TsFaBn>u~*=`RuX3z!-* z@y&WS|F!?vlrV;gl_0~61W8EHN#wpl;FWgSnGuptWbhF7E`Ratg(K+tpzPm6;kUrc z1ou3LkLmWn9Q;`!5LM=jGn^d22ROaQduNuwqIYImC+?z>#}I_3$ey87Cw=0lK*7yO zBv26q=xgi;2!D$prvVKw-pM;9-f^ysoZ_E8cf&t{ttoSe=U!R}tRcfShr!utm|BOb z(cTp-2c-%ts5cr$T&!VSot(mM>95brS?UnFzgHA zD)9rshYvyi;_Tu5*deaj>@B1)OMc`4{|U5ix&mTl>=GaZoq_5EFANn|Aq882XDk%I z6vY_6vEctO9|#RuegJW5G4X delta 7205 zcmbUm3vg7`b?^SOy9pr#NC<>YAlWR0&4&bH$Y)6cM2PZ9NXX-}Z1%n^E1TWqz6~a@ zG-}XMq>A3Es4&`b+8M2OIYjrwmbwtd+u`P9rD;4B&RJq+hKse_OHI#ZA#vkQ$RZJ^ZaU}$3Pd*f@pzMAlgEy zaFc;9p~c4xv{+s=X_AWQQffM8kQT$do0?(XJZY4cz^H^;U}S+&F^o#76-HJVErroC zYJ-stMsB*C+F^eQEu&e`meO+SfOZ-6(28Rd25C9gu}SBQ}E}bRZZD%h7naG4&7Qf-Iiv z+HGA5zGO@Jh-=;4BIqXJs~7>;2`ftd+T|wsB`}g)5hV~j6pTcJ15ue3@h`F~^R=z8 z%G?MDfQeL1&UvyRb49gKd0;erio0|7bMx|idnI6ct!x!Pom)|;!?gzjkys=V2%P30 z<=(Zt0#H?JaClgbQRYQ9D-f(iFiqz8T%TT;Izadle{=4aUa<+u=yztZRWPes6*O>^x9YxQf)?XSte$|gtZ`2D*``) zOv+;QW$w%0i`JaG7vK< z;8lyBzP3OT?5{bH2(K#Ihvs*SeY5)sqacg_rpObRC4ltj4Cx5Z{(k{wf^Bc5HZ49u z8ZrY)wS@x1GK&mpUYuP*jHG<-EbiZ!4#bTph|tQS3o6Mf2r8@tQFOM)a4O(W6(8=> zh0`VUrv0kgVpAdtx9SLD3@zouOJ6;Zjv?!;*&xlqJ|huKvtgT1+^a`UIX~n6$ySkd zuEyt@PAQbqaY zBWIe#cZeJdsJnrmDJ|q*R{TI;2Uub8d#Ojg$Bo37DzDs4NF%>(Rk<^5|2m#pRb7F# zk2gm((_kW~T8HGsfjDJlFsYj2qY251^MM5ObEoeHJr5h8P4cMk>}t#tUbAEwjz?k% z)&nEf2SBnYp*WNEdC3k-e@OLJJwmKo5w(&3r@C}vhGaO2NK8nV&4Sn;K+p?7vW^8} zaW*7b$3>gw$Mzu3UIhCP1Q6^;;75>|%~a=Mu{{`#9F{*M{8Y^vLiz7%ZY>hYZ3pZK z!x#Sk^b%a=;xg{$2WtzhntuFv?aD$wEbHj9ESR&T-l{!8h>!2Cdp=v_FQS?b%HwP+ zFRich=ydId9Z-+OSwg0&2~CKmWgdRCehKmON9!BOP5fN_GIEfARbRbKr$^p_h?0m0 zg0Wz9T!C^m9;JEwhBYh#dkU%gBKy?l?C>ujqQYsP*_%jU-ef&58u`2aOLxSXCIl?qM zNhT_XStsA#=(!qy6p3Q>8p z%pincZCs%j!3oIQpD?W>Sz!~r9Bem2FuI!R`o=UOY5sz>F*dB4;2FUVBO_$Yjvz?$ zbV;r z9K!?SwagF~qngPGP)JxplI$)7NKQ2el+mHtPNuNuX&!33w+tyCb{z^v6`%GNs$jw_ zGx;3h3)@?r6G+#O;1(WiU#PcoF0A~RpJ+c=Rs=n-k@X`grW3tdb7Af*U))icgOP-X zxgEaByS%;Q^n{c!QDf3b%?t4zZ|X4Y-FVcVbUdDwa3;-3OVU0G>xG6N5@JX?GiJ?G zv#yL;i#9v*RG(q&PM~e3)}$#PXfHHO8i6`HY9HSx=(~{qL?SzBPnwDhlSIczEN86F zWO(W76yrWa%tZ|N={LlTy9_7Ei2VwpOlY#?pez~8X6k(D>P%5CUA??0_k$>Yw0Odr zbW9Rz<{ca7lSN$GSTwN+)*8l3!8UWKZOYykd#;Ofpk9uq-nNSh8b`YNAh93K9&4IO zS@^O7e3=dY$xS+v_CoNWfGz>8N!z3a`J6P4JknI$o zcbxNJ&&%)_RtEUW&a#pPWC2-ZEFeZw44+bBC9cd->efy(*(;9zcGw#3>WODZANWh_ z#m-A?8%)FHf~AP|Yk^3ontDUQG3}DWOTe4OZbT$;CXtxf3K{#8>l^e3c^)j=xo`99 zR;?=FUvFMso2iPLH6X`A2Zn;|V2KG7!gFje+DR_41B8!}JZtnukv>yJ?9ZTkm%K=~8+Dl!~Z{zN6PhR>GLrvgn?C1U6oh#Et)h#fslDLQ( zy(&njk-ym;fHI=8$6vP?#xs6*NgH1D+_?HIfHBk_f(d_4dzL@m^H(T8@7j5NiAtzS zVmc;YWi^H6m^?PjT2AnP?`*RUBbj@6bFa^XRR~0KLLO3}1i=7-0swBMglY-JN8z-b zMcn83{k^O8k#ZbXj_~un9&((MzQHDO^6U|15PxTrp%xMrNNeCmN-qMq7lp_A(*V(L zhqnZKG4(`W8R>orF;F0VAaMTBxJi`3QB2!k?2K2y- z%6t5qJw@b0zG!dn7V*R4dnk%YB|%Nk+4ef1o{7;wG$D_CJnb3k(tWap8 zt7i!cJQ31BI&4E1Vn(F{!XXT%VUr*TV=CnhK}RIdEl zQ0=ZBL`vHkrH4}^EF$*5g6q?ER>@AunusTY(OStWVz^e5k!al9fUTMWic-W|X(9QP z_tPQBs_780)iJ^seGUIYp0@;hkU#O}gys{` z+~_6ZUk5K4?6CeG=2#a$IM_^N{^X#$ViG4#A{YlC*_4A(`0s}pdkCkNBft|6|G{Dn zS2`(nvGQhzcyqXbOX1>)hjDlcfe%6YRER8zxs+kz6V_uQ{xAiwj}crzApDCVtX2KO z)WQs?%?w?q<3zPL4aMnbRBmyg;gr*WXKvx&hCBF%1D-6a-C@tOJMA0z%?HZ=2X-8& Ap#T5? diff --git a/data analysis/analysis.py b/data analysis/analysis.py index b16be01f..55a56355 100644 --- a/data analysis/analysis.py +++ b/data analysis/analysis.py @@ -7,10 +7,13 @@ #number of easter eggs: 2 #setup: -__version__ = "1.0.6.005" +__version__ = "1.0.7.000" #changelog should be viewed using print(analysis.__changelog__) __changelog__ = """changelog: +1.0.7.000: + - added tanh_regression (logistical regression) + - bug fixes 1.0.6.005: - added z_normalize function to normalize dataset - bug fixes @@ -119,6 +122,7 @@ import numbers import numpy as np import random import scipy +from scipy.optimize import curve_fit from sklearn import * #import statistics <-- statistics.py functions have been integrated into analysis.py as of v 1.0.3.002 import time @@ -640,7 +644,7 @@ def log_regression(x, y, base): _rms = rms(vals, y) r2_d2 = r_squared(vals, y) - return [eq_str, _rms, r2_d2] + return eq_str, _rms, r2_d2 def exp_regression(x, y, base): @@ -660,7 +664,26 @@ def exp_regression(x, y, base): _rms = rms(vals, y) r2_d2 = r_squared(vals, y) - return [eq_str, _rms, r2_d2] + return eq_str, _rms, r2_d2 + +def tanh_regression(x, y): + + def tanh (x, a, b, c, d): + + return a * np.tanh(b * (x - c)) + d + + reg_eq = np.float64(curve_fit(tanh, np.array(x), np.array(y))[0]).tolist() + eq_str = str(reg_eq[0]) + " * np.tanh(" + str(reg_eq[1]) + "*(z - " + str(reg_eq[2]) + ")) + " + str(reg_eq[3]) + vals = [] + + for i in range(len(x)): + z = x[i] + exec("vals.append(" + eq_str + ")") + + _rms = rms(vals, y) + r2_d2 = r_squared(vals, y) + + return eq_str, _rms, r2_d2 def r_squared(predictions, targets): # assumes equal size inputs @@ -735,25 +758,36 @@ def optimize_regression(x, y, _range, resolution):#_range in poly regression is r2s = [] for i in range (0, _range + 1, 1): - eqs.append(poly_regression(x_train, y_train, i)[0]) - rmss.append(poly_regression(x_train, y_train, i)[1]) - r2s.append(poly_regression(x_train, y_train, i)[2]) + x, y, z = poly_regression(x_train, y_train, i) + eqs.append(x) + rmss.append(y) + r2s.append(z) for i in range (1, 100 * resolution + 1): try: - eqs.append(exp_regression(x_train, y_train, float(i / resolution))[0]) - rmss.append(exp_regression(x_train, y_train, float(i / resolution))[1]) - r2s.append(exp_regression(x_train, y_train, float(i / resolution))[2]) + x, y, z = exp_regression(x_train, y_train, float(i / resolution)) + eqs.append(x) + rmss.append(y) + r2s.append(z) except: pass for i in range (1, 100 * resolution + 1): try: - eqs.append(log_regression(x_train, y_train, float(i / resolution))[0]) - rmss.append(log_regression(x_train, y_train, float(i / resolution))[1]) - r2s.append(log_regression(x_train, y_train, float(i / resolution))[2]) + x, y, z = log_regression(x_train, y_train, float(i / resolution)) + eqs.append(x) + rmss.append(y) + r2s.append(z) except: pass + + x, y, z = tanh_regression(x_train, y_train) + + eqs.append(x) + rmss.append(y) + r2s.append(z) + + print (eqs[::-1]) for i in range (0, len(eqs), 1): #marks all equations where r2 = 1 as they 95% of the time overfit the data if r2s[i] == 1: