From 367b2fe60d31a94a96b713124d4d78b36fa17039 Mon Sep 17 00:00:00 2001 From: ltcptgeneral <35508619+ltcptgeneral@users.noreply.github.com> Date: Tue, 27 Nov 2018 19:56:16 -0600 Subject: [PATCH] analysis.py - v 1.0.5.000 changelog: - added optimize_regression function, which is a sample function to find the optimal regressions - optimize_regression function filters out some overfit funtions (functions with r^2 = 1) - planned addition: overfit detection in the optimize_regression function --- .../__pycache__/analysis.cpython-37.pyc | Bin 23794 -> 23960 bytes data analysis/analysis.py | 30 ++++++++++++++++-- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/data analysis/__pycache__/analysis.cpython-37.pyc b/data analysis/__pycache__/analysis.cpython-37.pyc index c1b475c5fc788fc5f0c795a1697a15f294199c8e..34f8d5f6d39d12ad2eefd109c136ceeb11bb3e4b 100644 GIT binary patch delta 1287 zcmZuwUuaWj6#ve5Z<5=Tgw%wtf0Ei(T$gD(#TS`oi(_uxw5FzN)1~Y|W8-sD5HukcYQL7@#D0RUtZs>X5i%9pqkn_-C48ef<^!L0IKF&vHbsA zgK8oSO5T1oq$YGuB7YrJoVCtZ^B=bcR5C!#sqO&o=Pr}=)SK!FVVZ5vkEQZtwM5m_ z2=vnLsU0niI;!Dd&N>##~LNPl6j)`$J-g4*D zn;>zM7s9KQpx&Tu-KgU_26)qbvS-nP>+a&-vW5hW_Wsnc!2F!>H5(4#g4_L07GRt{ zd3V--kA1jEiG6QDfr|Uw7ap-=QrHd~3beEf_{i$UOrC~nJ#!hUuI@|9N|n$42z?1$V7`36D!14QhUCt6hQ@4=x>Efbf!N6QTn3) zKDhMR!B%LY%LfY(A;@+|e_q)ztI`BEV4U`3t4>5x+1N_+*)7puR!L!K=egiUTFM@C zWWGrYHI)Wlj^0@%1)0L6PP#a7>D5MQFC*=x1tbP93Ye`&rH&6r#bFeY5u+urL>&!L zU!GU~pxHyWL(i%$Q|Y6_arm1q9KQ2Pi;&Zjz%cZ3vL^AalW3Sp3+{y@4z%4B{^txy z@ZkM5GTvw36LJTScC4Y`rj=|)YaxM6+jDPsm|dhLk+WRbP}lC_>s(vXI3?7xMk|&7 zWKaWS1RlGJHizLHvKkK-5`e% z%lyxJnF+o`g?wB$*fgDQhj#iVpM@CxoByzFo=JF~!3$JQjF-l60&j?`Qy@Z>!bst& zF$Hc3$hE|60wTb&xaM%nl|VVdrg1bv#ifyr0-XXPFBSwu9Nz!d;qM?#w})SR3f@T{ AJOBUy delta 1019 zcmZuwUr1A76u;-Y+niIkbmpA@XEa%~wAMr9pQeN++SF;ad8f8y738$O*-$X(si#Qc z5c(!0(o=urrXXT3^%4Y83Bh{m$%jZ&5k(UF&b3scJ^c2&=lsrh&i$S5j?Ka9ESPK3 z(oB;0ogUd%nr=^-qvdQ2)O+P4kUFFr(hnJK$gPsgfR8KP%$1DqDo>!UsuRkvyUN4( zbMdyr`Hux2=;8VZb`fTsrmJo=eJmS>3!OtZo5*P^4knt5y=nex%{!zP8D9N z)nshoo_@i-(zUscZ)Bfv6N1hVz3BJdr9feyzq)zflZz>D`vwat*|hm2nWbq;NA zFR6^XT{P*zxz7a)ELxCC6LME9wun7JJ2>${aPrju3~^Go170L_1J}a*uOryep2OB; z9Bhwg=tvo*?(%N;5(FiWZOd4w6hR?+6+c)Jm74{xg@`|-36f&BdLm)o%cm(+OkhEK zs8e}P^bo-u0ewTfj9`CD+D)cgu3b)TMy{EiOM?^;I0$G@+CDfL%$@itG!Dm+cQ}6o DTut`Y diff --git a/data analysis/analysis.py b/data analysis/analysis.py index 3172c8a0..b038ccc3 100644 --- a/data analysis/analysis.py +++ b/data analysis/analysis.py @@ -8,9 +8,13 @@ #setup: -__version__ = "1.0.4.001" +__version__ = "1.0.5.000" __changelog__ = """changelog: +1.0.5.000: + - added optimize_regression function, which is a sample function to find the optimal regressions + - optimize_regression function filters out some overfit funtions (functions with r^2 = 1) + - planned addition: overfit detection in the optimize_regression function 1.0.4.002: - added __changelog__ - updated debug function with log and exponential regressions @@ -678,7 +682,7 @@ def strip_data(data, mode): raise error("mode error") def optimize_regression(x, y, _range, resolution):#_range in poly regression is the range of powers tried, and in log/exp it is the inverse of the stepsize taken from -1000 to 1000 - +#usage not: for demonstration purpose only, performance is shit if type(resolution) != int: raise error("resolution must be int") @@ -718,8 +722,28 @@ def optimize_regression(x, y, _range, resolution):#_range in poly regression is except: pass + + for i in range (0, len(eqs), 1): #marks all equations where r2 = 1 as they 95% of the time overfit the data - return [eqs, rmss, r2s] + if r2s[i] == 1: + + eqs[i] = "" + rmss[i] = "" + r2s[i] = "" + + while True: #removes all equations marked for removal + + try: + + eqs.remove('') + rmss.remove('') + r2s.remove('') + + except: + + break + + return eqs, rmss, r2s def basic_analysis(filepath): #assumes that rows are the independent variable and columns are the dependant. also assumes that time flows from lowest column to highest column.