From 6bfc258e854508d34b8db3269f99dc7f0a6f1827 Mon Sep 17 00:00:00 2001 From: ltcptgeneral <35508619+ltcptgeneral@users.noreply.github.com> Date: Wed, 28 Nov 2018 10:17:18 -0600 Subject: [PATCH] analysis.py - v 1.0.6.002 changelog: - bug fixes --- .../__pycache__/analysis.cpython-37.pyc | Bin 25085 -> 25279 bytes data analysis/analysis.py | 119 ++++-------------- 2 files changed, 22 insertions(+), 97 deletions(-) diff --git a/data analysis/__pycache__/analysis.cpython-37.pyc b/data analysis/__pycache__/analysis.cpython-37.pyc index a3a86a9df77a4f467c0709838b3ffb68d9b5a366..fd12ca862f1b98354a4539efb610dfa9d5d9558e 100644 GIT binary patch delta 8657 zcmbVSdvILUdEayQy^>{FTFZLc*YZPJSz5~vj4k|-0Rw}L-_Hy3`fBgh>f(Lr+$&iU zGGbzkr4 zt`@lvw6ouNf8Y7O?{~iQSpDzcvTNUG9k(x96!YQlzczn#f6chh_j4-T_^U^zA78uw z%N^^djd~+)EX~wwext!ymJQ^BS}+&VLd=J9V>YaXWg5{UNSm_JTuh72wpD3WsB6wv z=N4&;WL-B^K+UPVANY@x$Mm5s4#yVpW(hj5B zs6pCk^cahgCXA#}i?qvFZ!AH&&geDjkaimzjC!O!#zrHKG-+%ymLgqmY&LE|+G}hv zZbiDm*lKJ;D*m2*WxDZ*FL7ovx*^${+>-3=-Kbk(pD#VE=QC!ukm-xfHT1=zJ?h|S zMjf)oO?$3>LtjkAUyqtDaBij@(@;|>J)2FXRHuN-yrWxr6%}e>NOguyb!ar7cC13) zV=+xadvEa24FluTdN!RZjG27Ma=O)X!&Z7&){SO$$Fx=FTtUs5x;@HK0MogiCg%mN z>Ul#gVr+ru^nBW!on47Dak285*$w!dqVz2Wrj)I?+pPSQ-x`LcB-3RHLrcg7B`Du3+azQYkBMIjPjw zMD^lFt7)7Y(u+kiZO4KnZ=r@QL~bRrmB=;_H&QIv7Qw`2R^=oEreQf(#GxfKr{XlA4WKPGVmdBTMx|3k>IXpp^hY?AH_NC9;9YMi4hV%e-uUDm9WGKNYJQ9y=kqN{!~v zY#!?ceC$T@voo*cP0ZdAfn`6Lo^vg~4RxRDU#X0TF3++Hh~;}|`n!nq6DiqUr6PId zU3_iwojBF_=f4HKr1;MauaCv)rqj3F)F%s~jT2PDO#Gs0ZE69Sd7z)ehV+2+|8EC7 zKZLwIGuHeiwsk)2+;BRDuxjNz-!42w!dso_Ydv*WNw>RD=Zy(gKurrthBm*Gx>PuW zK-M7MY@OU!0kTiM$2cyq;`^$Qt)VTTc>`o(?f z8ttZ#nw!bzKvMwht7cwSGi>vGG+$jF7ZfWZJbmO3FxZ%Q2Nm@`BdQV01tP zRy}l21<ytmIm`72OO%$Hfm;y;KRIFqi7dAXxIIlvt9pFpf&X{@=JXKsX{F5sqSF z$(koW6+$J+pBX|N5IR)`pAw6Oy+rVt6pyZ5dvo~AvG^SD34;Bd;$^i-w0At*rG?Y> z*pQVqwHVQq&bdBO88$)~^04@E$7*(3{AWiyds)MRAhOj`}pJ((*8>E%MToUEnx?YauSH z%WSh!g(w~dxj;br0x~`&0w@KPS|y+~vj*{g_X=^nJ1G|TtYLm}Yfo|$1yGzzgT;cC zclc41_z4g#XydGJmT_09DFX6S@kc$6wa|Ht9|p+U1bG2QW_byGQ*Us(8;YEwrr=6((`(D!p1eAkTu>V6%j1N z+KpW-KXYhfkTg57>06~RgY%t1vpqn>Br*gt?@;6!n3~A!e3%Gnn5g0w<#i&Yp;UoC zNV$F@XNhEqXKR$x_O@4_2tcW9S|dU*Ej43Jy`=npzfOGe6pV4_ih@ zHt$@{aJk2&>K>|yi_R_WYf66Uph}vz3rJbT2{&9UWG8UzMqp*H0HQC=n~$bESMG zUv&Mr5pj-YzKV!VWHFH%5LnTn{3wVU7}RZ33y!B~s{K@bjtJfT_ymzjA`cOnB2uP- zRJj_Jdq{;wWPocZ(5}?5PAEXC$Z3Cf>;FvCw1H`M?nBl&>F?_5w8n2Jwf9NbOEj?o(9c)95sy1t&g-io|{q$f-&V8?koF zH2hLu-?SFt8&fizB>cR9L2lT=?bmc{en0Ukvm6!46FFi_2MeE4t)x+qf3#Il?5E>_ zy${4-=M{YYVTkd8{nGRj(lWFqf@M~w(833ZkZsXKn(zxmNDDWRvPW|sQ06PNOahGF z3v60UUWjX8jpCQLO&lO!OMweHxm>(NDw#%dOT@H@IWmezjzV6G@|=xr1y3-Vcz%2H zK8enL>PisR!mvJ`R1mIYzfXoMUW;z$X@11t7sR?!A_@UCrikQ@+lI7&Zssn~HvuG2c}1MsB44gilQFG}#sL&dx_!1azhN{awTClK9oGHLO`Q_qTv< z>EFioiDLh5c3FIdptN~aKgbW zR3SSmeHb2}^&7*=NoM;Jo_+KY4|#fxeUW+ z>_OzPif|{Pt(Cqif=>;rfq!d)D5Iu<(WhByWC~l3Pdj3Kbw#Ox?lt(-U=L6juz_+2 zB^uqSh|{;fQ`*V+A;`?^J6c$j{p!E&x{O%i^Ouo=H<1&N2=Xxf9^c4&lm4Wwan($dJsIb+4>f@OGnvW#Z}BH6N2iX=L-D8Y{^!Yy zyX5hM_lcM8?KCw1V8M|=(fZ)yk3ZG|_ORukQa7~VxdO*d8NstDV2zV|$VO!1ba1tJ z8s+e)ZD#S{szvd{=;#@)=j3yb7N#K?M6f`5bkNkqiM>0TkT2v-JPq;!)}%9`g}etU zP5i~)wk2BN9A25V2p)bNdc)PkTYFpZ`ug8{6Sa_g&T`Vjbk5KrR`l-czw>vY(4=3T zQWFgx?Bw1YCS55c$>R;D7$o1~Dj)8oiEbC)-Pb-%3#x24Ah!Udgimd8M$efkJk#YJ zyk?rVRx2y=Wi>UjrZ5O3!6NqTkiy`Dc-OPWOuK|O?^w}K(#m5gCM)k&OT@r^$7-0$ zRHYM6m33lwEFs>#FMdlajcOx8ur3=fIjL0lG{^x02cYSTFqFl{{r_^~Qqz23*E%|` zB>b+D>~)#pvdYKk52M>NX5Jhx@||gM@xbj>GHm`H@oo~Y9!T7hK^D;pFWWZGQ)D1G zgcgt+Oyk7u@NZM!zXZ`DMO0fj+h}6_!8Mi7%@@(-uoynr!7hp?4&Dndzjd%>mki4h z%O0X9Y!h3MTovV}0{3SzbcnPhANvrl5Vn}C6@SpzFthhiH9PVgT4c<3W0q}?4yLoZ zZFADzyG2BgVpG^dzcz5PD~>KNAaU6d*PpX9WyV!$$W@T6xN>-ta@8lUAO5IHT11{1 zmWls3l2pFu6OBigZ>O1YDf;`!xgq%rhT}<4JY9Z3T`ClsLEQzxk9H_CKJmv#A3H8j zjS()T{2dJ#Ch}t$8}-PGB^k89usB?wP)?paM9~sJqoo5BTtkAO#IXlwJiZmvj;4n2 zqfaVtW`0=W7^BF~_|Gsn%>-+Ui)$wzV11(XRL_yuP|5caky9r*oWSqlM4eomu@1{H`?@vD5OoZ7E~!|HjNC+}lGQjmKvil@GoSdjHgMx=7QcW0-EW|}r00#Z zL|^DWq_w6eJT@ayPOy8DP<44h$y#o^Pv zS&8-#UkEoehL=Ic4U7-s4Q&oMu7V&Z)h+3K5@S9p85&2$*$-moN%8*aV-HXddF|of zquc<<4RpMNyz9qfRVCLGXov)0N$=7VyAg${9;+$tC2#q}H&V@r?a!yWr)6}IYxp0~ z%ISuO{|+$IR!+~#VCF{U56guDwv6Zy*S+7EWzcq`*bcZuO}vRl(jd9TmACa*0TiNNM>#OU0{1D11w=a0!2kT?j596c4;dw_}te$V<0#X>l@DnORr( zRFv_>FQ?ma4I9mxJL%?OUj_+m3-|*;F`~DMX+0ty*Eg&@770WaMcN|m_{Q-GMPiY< TNL6G{q$yI3mYw3dzWo0HN)->| delta 8581 zcmbVRX>c6Jb>5zxJpn8Zf*=4AAOL~{7Q{t@G;ac+4oVTlL%f&Aa z&l0;>0YzE6wXg5@UcdhOz3$$Zzr%j{cdX-3MMc}mzKlQSXC9Q-rUS8nN`tW=(zX)WAN82)t;Xx#*m`q=>3!4_+kpBeQ%AjiU5hoL)NJ}t@}bm>(niydk{_jwD7BaY zlmaNVn5||I<6F%(GlaCw++>Cy^<0i^GTY5ERJ5C$%`Hebn;m93(k*7ES%I{}jG7Uo zoo1I=i8N|%HLH+zncZeJ(yeBX*^9K>>@)k3_L$qu?MQpg0kcN@6}!1{?MYAc%4}#` zZ-4L3-v0i6gZn+6#F&vyTIpPJ@8Z07EFfyjp7(FSs_+fs)v|rn^~h#uku;E?#_L6W z`AsIp-PjaNTrRDw8d#$RF?tUa-38)DZK_yO5L$CXIdkZ$s6L4$}1bXsOwfD-9&nb^nyhF zyifeRvZYn}FsoFegu4vJ&@o_6_uFCSqRlT*#)#1A^ z?itZh{R37lep%g=P}(THN>mE*J|)7o44xR{yHVhKi0mb@kH~%y863~qDJPZ7N|q|s zTer-V^Cx1yX7PMA4LAT24e&wA4-r{ax~`=2Vd`{<$X!HA=0dG6h{xAl+P|t{nagMS zKv~AEDKn7^OZdu$G}j+!`IV5{?~RQDG(aAH)~xj1wyi=l6myL5SFIBeI>y0FfOaGLUCp zq@Pla=J9c&~cRn4dboyDwIfPf5U^O~3PCT2en{q?UeEUi30h`Kv2zmn*J&$a9d zV)=dGv^p+mRsID(n>M-lorbUe8t7BvKQ#LOTij^8ymQqxS$Q!DDz`kMY5j+S9Vzq? zVCTiKzg>K8{S$2Wa@fg0B0g^MRK|tP$|@4f17fu4{Jn*_-A{@r^Y}`jCjE+`%@0$T z!X{V(S()%P&mJiO*$4P7Ewl_|^D3%!E@4Q>suAC7{+m){`B!u-1u?-QpRy#2KR`O) z8BY!H@HROJ57Svai?E<3!Y!XZwS;bLz?J-3LHvgXRgd`3mUDe88Ghx(B0TLBGp(Cr zt9r2|Ej|L0bei*_1l?XmL@j}Oz)#0Mv2lji!&dz7L;Cbw+(Nx-! zVWM$^b7Q8|UHH-67Vmbpvxmj{Xd8P?9E^UYe%?FhJ?WWarvIvDzlGFyReOr@&xl*m zXywnChrZ`M6F)nLpV-kg-MBO$KppL`Vn`4>XZ#~2O?#32xbZ;pupkbGY^z<*(L&s=D>Zais(x0)Mv7E`bpi+8slMarU zsE<49$Rd65jAdkH0EwAG2IZV-Nq^jSaGsQ*_>{p>pFmY2t{7IhY240n$1< z4J*GIfV^n})bAnV(Sm@Ku2pHe)~MB?6l5AJXMPr@RP$?LEnn6mnl5hj^f6s@^=|QE zfH=|HJ3z3+;nz2wOJyA%Ly2Dkkv=<-<5rP$B|M0x*TwU_H})Q&p65W4_~G;B%W#D1 zByk=ksb>slOzQRocceC+lFY&Zz~wHc`datMz--(ePp4dKm5K=w3z}mUv4*O;rfK`N zAij&Q^-VF>FAi^ep{C%2ne<+@X836l*}kKrXxT8DD}DTVt9ogcWI`SlsqIbdF7ep* zy;nFYWH6nHkK`t^rVM3LW&MSSmS2x#BsA z)`4~GCt}aQHul)!)d3$(U}DFY3Z4kZze!T+G?6JH*Fct?nU-wJr>T902u&nf#%C!% zL4+ovH06&_P92`-C^t{!H$i0CI0qsVbaLNhyQR2$u`1S>x`#nR;+s2L%YZ8^0ILF? z7T?}^A6rK!I`>o$%bQk`AEAnBacEcD=7RZ^UnRW>O{wu285qx{XGXE;t>W=rn_&E} z?dm#M66VX$zZZvRyB!(e8*5(!TB9&tr&Y0DtsixD2*kBOxA@0B^`d9@D}DvYYVp6j z`xZWdfzfbLSXMe{*>UN`HH&`|&AgS!lSC?sL_lE4->3XjAW|POY)ks4d-T0?3_nPk`AW1X|!uC7j4 z@r}LB?i|WW=D*|W`4bi(CyT{;Z`7*E}cj{cB8v4G!d-A zf1IcjFYRkvkU_pZu3Sgd%gIB@fP?F_<=EUHmPKZwBANYniMcv#XysZ01@)nLK$T>A zoBWK)<~97h`03N#4$2OjW7nCfUH9JYxl}*zn`3hez6Q^nXAZR9V|v$k=DicmXFYRX zx9fV(yzXdU^v>zef|;YwY1bKizkZZ2c@pK)qP`-#XCRv^Lz5$clcQPjXNk~^q#m~? zGk7zYN&+S@Tj>-DUQf^z3%(5K)#@?Xu=t<-Gsl)>&z4UaX}j0G+jnw>;$lsnB$;F} z1=(m=2Ca!nLtVi`JYysB7^$ojFCJ)&DTFH25=>JWoQN{9Yj_m~ycuKKJpoXBi-!0^)JXxB zn5m(zox%y2G?R^E0A7v|_(yPfGaOqEhp04`vYnia+YKgYJ)#1#GC-wb0?mZK0NLr$(IecY?Sbpp%E*a7|R&^>c>ce+OUvN1sP;T zynwzcUcKiF-Jge?A`eHQ`aRYySs22PQrjRA6-J&HPv6^es-O|#AYGb9|egFOUaikneIjA&D>ARNWh|xiK6l?-qFZ?$AA*T~{ ziYHJGOxji&c$Fc%mO4g~8yWQ$g_wjPNjzQZiOCU*VtqQDga-J$IhfDXCCnB*H!9oRBgP+W zTcF)g6sKtYfMx=+tV|jiD~`v-tb-?7%a&EDB3o1wQ8l>{Kn|v|^FwkY592j5HD%ca zSh^=2@*K++9G+X<3QvfJqo*U-^)1+&CD(hI#MBf}0rH8SZ~_fRP0%E<>)w%d%zs5Vu>0BY_b z$ovbMy9$H2vBCE-9%73>J<-li;-4e;oG-&E+nyXrqz&8VWDWOD6FP<9;z}bsH)AK4Ef$6Z&~{Udo*d8uO#IQwUzI5fsTfl$zI3YhVjT^xI7UYR?0UslhR+!3 z$-E3wmMfhasiz-!$0d&|(OQ0b)TLyOKGeM;dQW#~J|@ndzHvsKc!OMtr`I(%JZ_|% z(uTP6TAF1ua#2nCa4ed%)Z|K-;|~(-Ola)9A$FZ<(%P9gbLO{hW9OgG^c_^LZHTO+ z=U3jL|9o(7Gp0wmHQ_q6`VrN|qHkhAD_JKEOocyu%KN ze>&fj?f@r#Y)NI}&uLq@z-T9NJ~x=EO+|p@wL~IBmfKmq2h@E#+rxe0^r^^z3%mQh$`~4qaE$;;T4vGUqEY-HT(Cl zdN<8|c~`z;NzhW~ePj;ugu45>g4-aw+_RuwP6~D&B3?n)&Wg!$J4<$2`kkC(q_-m& zk&D~zYRX)ZBu#qbw!O1pd@}n6Z3C-SU%nfvE#1X@;%k?$uyZ03?_W@;_4Ac*Kx23j z1f)Jag15dUAV>}Fs{{g$B&0^|w? zdr%?0cv~wKbO~b~AVGBM^I$9Hoh{X4qvZF?HYP4yX~ciVQ&+kd)Q(r%b2EuKj-E1L zT8UJ~NGk^=L!*<~gp#nBp7i^ZlLUnCK$noG`?cD~Rk;KDB+XCGO)U8P!jn!e3 zze;s<6jPB{?Gbl}D*sJUfRjsB3&qt2=%HdikpUtKYqV?J3!GY2WpOf9ky$r*T2O96 zr3~)JC2%rr9V|o1rpu?kP50_Pv2Em7>t)>TYJyF{&B69yBffr=YJ!pAW#r3)mBC^0 I>`3$f1JEw;-T(jq diff --git a/data analysis/analysis.py b/data analysis/analysis.py index 45c65527..607b2c0e 100644 --- a/data analysis/analysis.py +++ b/data analysis/analysis.py @@ -8,9 +8,12 @@ #setup: -__version__ = "1.0.6.001" +__version__ = "1.0.6.002" +#changelog should be viewed using print(analysis.__changelog__) __changelog__ = """changelog: +1.0.6.002: + - bug fixes 1.0.6.001: - corrected __all__ to contain all of the functions 1.0.6.000: @@ -62,7 +65,7 @@ __changelog__ = """changelog: - major bug fixes 1.0.0.xxx: - added loading csv - - added 1d, column, row basic stats""" #changelog should be viewed using print(analysis.__changelog__) + - added 1d, column, row basic stats""" __author__ = ( "Arthur Lu , " @@ -392,7 +395,6 @@ def basic_stats(data, method, arg): # data=array, mode = ['1d':1d_basic_stats, ' data_t = [] for i in range (0, len(data) - 1, 1): - data_t.append(float(data[i])) _mean = mean(data_t) @@ -402,20 +404,15 @@ def basic_stats(data, method, arg): # data=array, mode = ['1d':1d_basic_stats, ' except: _mode = None try: - _stdev = stdev(data_t) - + _stdev = stdev(data_t) except: - _stdev = None - try: _variance = variance(data_t) except: _variance = None - - out = [_mean, _median, _mode, _stdev, _variance] - return out + return [_mean, _median, _mode, _stdev, _variance] elif method == "column" or method == 1: @@ -442,10 +439,8 @@ def basic_stats(data, method, arg): # data=array, mode = ['1d':1d_basic_stats, ' _variance = variance(c_data) except: _variance = None - - out = [_mean, _median, _mode, _stdev, _variance] - return out + return [_mean, _median, _mode, _stdev, _variance] elif method == "row" or method == 2: @@ -469,9 +464,8 @@ def basic_stats(data, method, arg): # data=array, mode = ['1d':1d_basic_stats, ' except: _variance = None - out = [_mean, _median, _mode, _stdev, _variance] + return [_mean, _median, _mode, _stdev, _variance] - return out else: raise error("method error") @@ -482,17 +476,12 @@ def z_score(point, mean, stdev): #returns z score with inputs of point, mean and def stdev_z_split(mean, stdev, delta, low_bound, high_bound): #returns n-th percentile of spread given mean, standard deviation, lower z-score, and upper z-score z_split = [] - i = low_bound while True: - z_split.append(float((1 / (stdev * math.sqrt(2 * math.pi))) * math.e ** (-0.5 * (((i - mean) / stdev) ** 2)))) - i = i + delta - if i > high_bound: - break return z_split @@ -546,15 +535,12 @@ def histo_analysis(hist_data, delta, low_bound, high_bound): i = low_bound while True: - if i > high_bound: break try: pred_change = mean_derivative + i * stdev_derivative - - except: - + except: pred_change = mean_derivative predictions.append(float(hist_data[-1:][0]) + pred_change) @@ -566,21 +552,16 @@ def histo_analysis(hist_data, delta, low_bound, high_bound): def poly_regression(x, y, power): if x == "null": #if x is 'null', then x will be filled with integer points between 1 and the size of y - x = [] for i in range(len(y)): - print(i) - x.append(i+1) reg_eq = scipy.polyfit(x, y, deg = power) - eq_str = "" for i in range(0, len(reg_eq), 1): - if i < len(reg_eq)- 1: eq_str = eq_str + str(reg_eq[i]) + "*(z**" + str(len(reg_eq) - i - 1) + ")+" else: @@ -590,11 +571,9 @@ def poly_regression(x, y, power): for i in range(0, len(x), 1): z = x[i] - exec("vals.append(" + eq_str + ")") _rms = rms(vals, y) - r2_d2 = r_squared(vals, y) return [eq_str, _rms, r2_d2] @@ -604,23 +583,17 @@ def log_regression(x, y, base): x_fit = [] for i in range(len(x)): - x_fit.append(np.log(x[i]) / np.log(base)) #change of base for logs reg_eq = np.polyfit(x_fit, y, 1) # y = reg_eq[0] * log(x, base) + reg_eq[1] - eq_str = str(reg_eq[0]) + "* (np.log(z) / np.log(" + str(base) +"))+" + str(reg_eq[1]) - vals = [] for i in range(len(x)): - z = x[i] - exec("vals.append(" + eq_str + ")") _rms = rms(vals, y) - r2_d2 = r_squared(vals, y) return [eq_str, _rms, r2_d2] @@ -629,24 +602,18 @@ def exp_regression(x, y, base): y_fit = [] - for i in range(len(y)): - + for i in range(len(y)): y_fit.append(np.log(y[i]) / np.log(base)) #change of base for logs reg_eq = np.polyfit(x, y_fit, 1, w=np.sqrt(y_fit)) # y = base ^ (reg_eq[0] * x) * base ^ (reg_eq[1]) - eq_str = "(" + str(base) + "**(" + str(reg_eq[0]) + "*z))*(" + str(base) + "**(" + str(reg_eq[1]) + "))" - vals = [] for i in range(len(x)): - z = x[i] - exec("vals.append(" + eq_str + ")") _rms = rms(vals, y) - r2_d2 = r_squared(vals, y) return [eq_str, _rms, r2_d2] @@ -660,25 +627,17 @@ def r_squared(predictions, targets): # assumes equal size inputs def rms(predictions, targets): # assumes equal size inputs out = 0 - _sum = 0 - avg = 0 - for i in range(0, len(targets), 1): - _sum = (targets[i] - predictions[i]) ** 2 - avg = _sum/len(targets) - - out = math.sqrt(avg) - - return float(out) + return float(math.sqrt(_sum/len(targets))) def calc_overfit(equation, rms_train, r2_train, x_test, y_test): - #overfit = performance(train) - performance(test) where performance is r^2 - #overfir = error(train) - error(test) where error is rms + #performance overfit = performance(train) - performance(test) where performance is r^2 + #error overfit = error(train) - error(test) where error is rms; biased towards smaller values vals = [] @@ -696,107 +655,79 @@ def calc_overfit(equation, rms_train, r2_train, x_test, y_test): def strip_data(data, mode): if mode == "adam": #x is the row number, y are the data - pass if mode == "eve": #x are the data, y is the column number - pass else: - raise error("mode error") def optimize_regression(x, y, _range, resolution):#_range in poly regression is the range of powers tried, and in log/exp it is the inverse of the stepsize taken from -1000 to 1000 #usage not: for demonstration purpose only, performance is shit if type(resolution) != int: - raise error("resolution must be int") - x = x - y = y - x_train = [] - y_train = [] + x_train = x + y_train = y x_test = [] y_test = [] for i in range (0, math.floor(len(x) * 0.4), 1): - index = random.randint(0, len(x) - 1) x_test.append(x[index]) y_test.append(y[index]) - x.pop(index) - y.pop(index) - - x_train = x - y_train = y + x_train.pop(index) + y_train.pop(index) #print(x_train, x_test) #print(y_train, y_test) eqs = [] - rmss = [] - r2s = [] for i in range (0, _range + 1, 1): - eqs.append(poly_regression(x_train, y_train, i)[0]) rmss.append(poly_regression(x_train, y_train, i)[1]) r2s.append(poly_regression(x_train, y_train, i)[2]) for i in range (1, 100 * resolution + 1): - try: - eqs.append(exp_regression(x_train, y_train, float(i / resolution))[0]) rmss.append(exp_regression(x_train, y_train, float(i / resolution))[1]) r2s.append(exp_regression(x_train, y_train, float(i / resolution))[2]) - except: - pass for i in range (1, 100 * resolution + 1): - try: - eqs.append(log_regression(x_train, y_train, float(i / resolution))[0]) rmss.append(log_regression(x_train, y_train, float(i / resolution))[1]) r2s.append(log_regression(x_train, y_train, float(i / resolution))[2]) - except: - pass for i in range (0, len(eqs), 1): #marks all equations where r2 = 1 as they 95% of the time overfit the data - if r2s[i] == 1: - eqs[i] = "" rmss[i] = "" r2s[i] = "" while True: #removes all equations marked for removal - - try: - + try: eqs.remove('') rmss.remove('') r2s.remove('') - except: - break overfit = [] for i in range (0, len(eqs), 1): - overfit.append(calc_overfit(eqs[i], rmss[i], r2s[i], x_test, y_test)) return eqs, rmss, r2s, overfit @@ -808,8 +739,7 @@ def basic_analysis(filepath): #assumes that rows are the independent variable an column = [] - for i in range(0, row, 1): - + for i in range(0, row, 1): column.append(len(data[i])) column_max = max(column) @@ -844,11 +774,9 @@ def generate_data(filename, x, y, low, high): file = open(filename, "w") for i in range (0, y, 1): - temp = "" for j in range (0, x - 1, 1): - temp = str(random.uniform(low, high)) + "," + temp temp = temp + str(random.uniform(low, high)) @@ -906,18 +834,15 @@ def debug(): print("--------------------------------") print(poly_regression([1, 2, 3, 4, 5], [1, 2, 4, 8, 16], 2)) - print(log_regression([1, 2, 3, 4], [2, 4, 8, 16], 2.717)) - print(exp_regression([1, 2, 3, 4], [2, 4, 8, 16], 2.717)) x, y, z = optimize_regression([0, 1, 2, 3, 4], [1, 2, 4, 7, 19], 10, 100) for i in range(0, len(x), 1): - print(str(x[i]) + " | " + str(y[i]) + " | " + str(z[i])) -#statistics def below------------------------------------------------------------------------------------------------------------------------------------------------------ +#statistics def below class StatisticsError(ValueError): pass @@ -1110,4 +1035,4 @@ def stdev(data, xbar=None): try: return var.sqrt() except AttributeError: - return math.sqrt(var) + return math.sqrt(var) \ No newline at end of file