From 5fe5e5026c6286f9b99f9a8a4935182ce8380dea Mon Sep 17 00:00:00 2001 From: ltcptgeneral <35508619+ltcptgeneral@users.noreply.github.com> Date: Wed, 14 Nov 2018 15:48:49 -0600 Subject: [PATCH] commit-2018/11/14@15:49CST --- __pycache__/analysis.cpython-37.pyc | Bin 8544 -> 12815 bytes __pycache__/generate_data.cpython-37.pyc | Bin 483 -> 479 bytes __pycache__/statistics.cpython-37.pyc | Bin 0 -> 18155 bytes analysis.py | 273 +++++- analysis.pyc | Bin 0 -> 14201 bytes analysis_benchmark.py | 8 + analysis_test.py | 13 +- data.txt | 1049 +++++++++++++++++++++- generate_data.py | 2 +- statistics.py | 669 ++++++++++++++ 10 files changed, 1942 insertions(+), 72 deletions(-) create mode 100644 __pycache__/statistics.cpython-37.pyc create mode 100644 analysis.pyc create mode 100644 analysis_benchmark.py create mode 100644 statistics.py diff --git a/__pycache__/analysis.cpython-37.pyc b/__pycache__/analysis.cpython-37.pyc index 01dbc7295d363be9b999971ba170ffda3fadd817..3ec7dbdd2cf5a3d81f20035260a63ce55719955e 100644 GIT binary patch literal 12815 zcmdT~O^h7Jb?*P^nVnrOm#gK9q^P!HQ9Cq6Qt6c7XoF(Gw7=7s_O2lSMR;=y{h-++1ZML-}nFG7hn6)ONQ|?-VA?pC|t!AS53oE zhBB3qpE7QHMeG{Id!aKu9=csYnJ5pnk~7r=1A_YxsrQpCCU9Y zU-I%=S@OzSMe>=o8RYYw>YIi-u8eOQYqMW5)Pgdv7U( zc2HmGu3rzEN&6T$EH!*n)I@a8&hw>#}5o!e~3 zNw?mJ8lC;P9jC{(yPf^I4qG~m<90Vn%eo%#>^5|$Qd@7u>0I{K;R7FYMOSNPYMyUe zEaT_o*KZ+@SCRBV+8dU>gxor5^v#pTz)FleW@22o?${q1`dr_NzyKeb1G{g(!TvV- z|2F%KYnAf`SOp|{)Viy$#M@!Bz0nRqPoZ6)x?vnd-6RP2 zFw2^gx=B|zH&ZWb*T*rowAzmAjhl^jr?K7%(WexLNz#s5sol`6)Jejv?Q}lb4D$u4 z-|B8Pq8}RiG#>usiI=au`>O7e`tiGozI^HO6=w^*>sRsOvh@nZulb=ui}bdMG_iddt=RN8EZB!2bYV>!&RctfNgvV4)QXE2YH18 zgnWiVguF^YLO!czLAyE4_cu8l{+PSCWT^ra5=wz-VhOIBxGY>YE(e#}a`6IdwfB=T z7Y3V+IA|n^Zm;hqBz9vfj4uRjg=D)MgKfJJi@LiVYVz=fpb@E{(`~hz7lV4n>GdFK z+`z+e*x9%cJf(t$){Xt3yD@xSu}WTjxzMNZ8N7M)=;SAhZSwh=m%7`!9VKb0S(kCA zZnKWpGs8>MDzeeI*(rO>OdSqX);~+mfNrU)!u8!2^qdis|GC7y`7xm`N^{d=6MoFO z5JLCIQ0Rl&b)ye@*UdgCUbp(7dEM^2NF8B&bG7E`v*@cIWfCy?91}iY^YpJFOWkZH zHs@h;7L~#rT$#b-$o=6m$Y82?i(z`BfA2(L62qPc=3(OjdGsm@;1dXK-~2G+Aq!RZWUzNy6vZ>C|k9O zIqNTAsITCPv*}hLj@iF=yfEEt0L>CQ@>j;iwUxMql}iqLjk%J*r%Ma zj~9mxXh=;}rJrPSk;xaCTtbriLvc&Z_5@K=Csgg^P4tTS1q14B*_?wMK*?E?SMOMX z&V23wdQd!s%3ntTRJIPGaz-(ZmOFXw=DEjQ6q=pUOh1KB)k^wl zWcnEc zZ&hGs<2yK@bOxZlt6OcD1EJ=?sDBKrw^Ue&LYLLrTnf!VbyBaVm3n=vt9CmqSL<~c z1f8tqP=%yFkGJddOek#nH<$>?o?~e|I4KFqI3SWTUP9tq4u~dy%Xj>;@B2%>-$cvk z$NpDweO5?mm~T(C&EfIC=nH zM-Z4ncd>0a0D#XUpvW{{NXuZaY@D^Jd?|eL2Y7l6o5u@O`z&$uH}TaRH}&Hj|2Za4 zFj+y8mhyoYm{s_cO#Ka9Id~MXa)GB#ls6z4ZZp{IV7FU{lxDX`xePjHBh zRjap9oZzo2Jc_EU$w!hK-r>nnFiSqwjDapyK;JiFpK2oG+61R|ge^N=HV; zACedosCa;{9y~0{0H>=TqS0S-L_`z7;mP6>9K=I?1wHihOkQ9jw!svc`xMYesJV%2 z8ua-9zeAz#6+Cp9lbAb*rQAXEa|f}UJBXFsL7d4QMC`VRgGdwi0PKxyUTVL?V6O%5 zoX+7tjn3>IL`;U@lrM2qo2Ceb4EbPj`x|KZ>= zD{TZ7CkjLSC1S;0Qy42j*_}EJC%C&ki4(^QQ@9sUd>hjzP$aSe!yR!zhJ}qjZaJBS zjXpw#*&jm2V|_x#W223}RoUos`eVp=UpD$osmUNi%SN9oH5p`lQ#SfM)*J#ErHtPM zO2<1^VvvrhH61s`Bs+BPiY`lu#G~Or+^A$bd3PfRESwS3{(pszF+q+D9cRXzlW$^T z{u#%|ESI6W*y;jK*23JEBdAa&uY84GQ;7Iij3bR(=GDgU1_I9H8F827cNy2F~6^I6n@z`XT@BvZ363v)}DI zcPwg#zWXrRc$Tqq<(iQ=chG`EO%EkcN~QfdluJ@JuNl$22_?mokAeA)5m|2=(vHIe zJmnu~_Y3W1dA@w0y_~h4Jw9?xcQ&Di-Q8{Io7y^SlMQDrnEmWDH8`Cc<(`@Lqw_M^|J#F@lD#+-bV zEZxU^s(Y6bgR8@>{o7zR%FYhfgfIOK^R2WS5`$<8(#(I77p+O9q#P5aVQ@=jWf1X* zxV0JTP^yD=oaIS3SPyaHio^Y0sIuN{Y(xPLK7$S2-3l%RPX?E9V$QQR9D4`dC`k6V zLms~l53qxk_5GmP?d)zvL01R3(cN1$ImyM#x>0980G4*zO&*~K_~_87j)RpAJTNqw z;$SxpWyrx(wQM^XBjIW(M?;T<9S&4i|!(4UNiX_+Dx~rQ~Exysp&CK81jW?yZlhUegLcP?s=*TBYEQ z(cMhl4a8I=X@!v#&`RxQoI3n!>SbTW9iC2I`D*G9zgnA39WIjoJ-kHoJSVlgy9uHX zP+3p1Q=>Q7_9hehSNa-zdCfYzq_bIGkGncS_=TsXdQ#0%jXg;MB~iFnztP5 zl(l440NoZ;pO@v9XH~5H-kL+71)j2?mt`)o&t(7MbJE8Ci@27|GxzN`QnYkINMTX5 zjD3K3n#&N;(?+JAYUZlUh|w9_U2vrql8_ppC_`2zCrpR_k2qnz+rYVPbGThWQjCD; zecV7RxP2S${_B1B4s?UU)&;>xS&4PsS%j8dG)@~R!|viyDh6hrMv%%0W6|K1X+pFh zY-Ip@tnb`#bmdiJrveL)(GWOr<+-8$UHKRi!}!;aaHb#|9k1aIyg|Hj;vJo&)^O-m0&6cO#WnHruVuyv(Q_hUrW_VdPA|M>>$K1{TJ#Eb9f!$KSI? z;LOGm(!GW5gk~VxDRm&?s#SV})H3&81jdzfjH>|SDuQw623Fr3>QPlWX;8xo;)$+Z z1|@-c&rqX+!mgV6E^svQaJXGn<=eI@KMed#fP1v03-z<82S%#7zPo4$jU9C?FQd0A zFM`gC29DDmHJ{f5{nYWiT;UiHy=8n0^Sft3$~m1iHX^-gD%TWzJFAjfT&+4#_1 z)z4l+^%?R3-gH;%m-x^Lwie#>94i-~p(54LSpXE2RX6HAiukASN{}GVOMK_`FpQ|} zHDaYOzJgkW8D`%I_ch)Bk7+2iUjN&(-}=wjKKZEili;uP=AZt2RCsCF^wP)wi(k#3 zP?=g--^&%3O{pdiiqle?Psti8MNpMix6u^zo)q8^10|C@ZV2V_Bw99Rw}Pc{x8THMdjSX6n^+tX4BckwI+6ot zha-KjZ?L_?gezN{(OWDlCS+3KLLIRrQ6pWJwwdf8NoR5zFl;WnsW&$nzB9p$nmaRp zPwu0?C8!VepK~90kEl*K@J%GRC+?rMs@4dX`8;7$aQT@rEL;E^7<*^MkcJ5@M)Vwz z+PGt7!CO10z*aT`0^w#@@bPRYcCaHAJJ`qF`$q00p(b~bA|gGQSZbDy8;1VNz6 zq~=~~?$cZX(qvi_K60xb?xbG0gMeQ=!;htAFLmk++I98iy1G2cLsUOMD`+rq%mS=J zXfKqOta+&4yLkq?NeS2({ThOl|qC;Ua z`xt|WWzE*VkM3y+rg1AwV*L*8CdbB^R$w@0MjknrRwRG)=11Rs8#g?Zz-8ldM&~-< z+&mU<83qOd3>gi^6x?N>fbF?+7V3yxiJq`U9oeIxvu?qJv2L3fe;!RYw(zjcJKk)2 zAD`9@CIma!e4Ld26DFJxC?D@`A&B^9YpV6e^_=Gf$JQH(zKp4Y$XFHm(Z-{_D@6VW z6u7m5Z4?F^V&DS=1`q7LbKIGvFy{La5Ue{gwZ215EVdc;Br!1?&me$!fB<5IF1~o+ zxr+nrQ{p+03)V1@MznVcH4p*5N#gUJ+aMBzBoi|~|F&EBwx7QP0-n8N=LaCQ2capo zC^V%)z@1*joLaBE*`4kD`OjY(7CzFygK8*Pu5}iSCv>NlP3^75US>^Dhtm1knOum97EsF!&?Ls(Rzz+o8inbACjw5i* zLmK^u>{Wr~xZUX{o%TAy(QkARlNm__xdd-(N}i?&Xln3`NMc@*Q#4sd%nZQ@O(7S( zk!cW4s4z)6UF9xs>c0Rd76E=QQt~8f--1^-{qL0+)Opw6@@M8=!vDFsE57T09mh-y O{v-b5{u93EFZ>@V3Z8QS literal 8544 zcmd5?&5s<#74Prv>75;~?X_8d#B@wBo@g=lf*}|L7IFAWvH@GcWH2F`}X!na2yTg6r%M;j8@SD`i5F>$o1a$R zM>l6})#X~$kWNn~$nkOZ+uvX19H$amg@bg@8DNJSYH5}x$M`EeOSUUA9ly3fZhPBZO6B z`mp9WbCKn%w~;wvv33))mIuM>{kYj$6}4XcHuqJhn={ZM#h{C-*4KPDcfN;$n<^? z=CWjMCq=Iaw3cg-6$fqES&{cp>l`Jd)$%YUN04OB$gnfDHzCu^2t+TLN3Zx85(NM_ zr#4^FF|o*Eegg%FS=&X-xzO}d%=%DcF_UmLP)VeDZWOs$ z6?_05l={iTb}{F(6B-n+ExwA#l6^^ zmSjberY;GtcQti6L#+oWAr+SQQQ}cjx~#lJz$B-T?8P0=A{qN6Lvswract)x_voOp zyl559WeC4?&xq#N%%Tm+zcTHd}D7lhjBI6 ztMN+mBGQV#jC<@`_LQRY3G)L}3(z2Mz zSdto56ph&8z2OS(Nq;aio<-pe2oUHZ04+86r>Fp?tX*KrW+1Xln6ih~4&K1p<_(Cl z)f*6Ht2Y3piv0{@l6*O$f}OBzEFq@?W2Knn2|UVJzdN+uhb|r5G5LiO=eJ{gV=TOF z2v4fO%l`@rUp>MzY-~aHFOtSxgZd3HJPGO*H5oyAeRoJNm*C5x`L{6ei!?adWQpd4 zg`1$}U)K=aB*XurX?ngHK-YodJ6+vRz|Q0_x>T}na{ac!RuKZ;nUe%nt0PS+rF9+HWVC0|C=sSY;0ZHv;RIH;y z(H}d;Z=*okO2PJf+An8^*kvZ#ZJjpPx3DF;3|)Fzxv0E~T_s{7wHZYGdaAg)L~R3flb62yLjR`X#FC*RnJ|sgnZd+WkHySA0TvjhD z;f#t?FFH*asy|Ue?Nxfl;Z_Hw8kcI=INJ!0owU|9OYSUi_a|^?Uen%Jc zGup%6f--=+qu{PyNQ`#8rPSV6ulEZc>lh`M8pQh9eCsWs=3N;~j~y0x594&N8=vz;J892*IIRt^J(Jxhyc;Wl44CZo z;yh0xZz;g$4QH&IfyjGPV>|S4Xy+})bMm>@~D|J8nro{8MH|9JMXvrV(c>D$u1%3&q=h}>BXot0fnhSrW0dXwk z_-PH$_t6PET6S)oMUR8((H)&)9LWN2nXw`e40k3T@PL4=S!e}6BSssnd;a;UQEr~ux)`e zR04aUCE8VER{2n!RgNeqBcPx|KX9lX!H1ju%B(_njz(zF?ooyaG*aW5A>Egh>n=hU zI@ZV0AUS{eFb{@AQU3Zdwuq!7lD6Q`1NZ|Ra9@{6rmgidHNa_DIafO5?kDZ6){k!Z zOVJ<{nY+^KuH@ww%^5aS=*89(jBJ{9%P+bZBo>mif<-bkO?y&v@Ut}e9B8F4q#h(D zb|;JEPT&dQHVzA%q0%=h`Z6V)u6>Dwr`~)Dj-*yN5*LodWk+&isHN%%7KM9{LV(F` zg`sl}f`wywga8r(H^tO@a8`+hQ`tJca_ORW2u`X}pGF-HNHkJ&R$);aVoy)9JL4g4?bIh1j1%_2zyoC=e?EG(Z6Y@8sG0o0~^+;%ZL_`r=czEl}fuZRE43 zK|V_AfH!SxeT*L3Ppy?VJx;`Z0Hsj0r2vAj_kd4PxOs0FCBZo_Spl%p2d1SU2npb2 z&_R!&f`o{6^lQO}JPj?mLs3Zg;z!5c{^!e|ecb(v_j|eW!9U8v)1#)R@BACrmSf(r z1z0&&;R6b+oKT`UlhwdS!qAnuh@E=t;Bq}+_{;W=2?fk$dk9+nF+H2fb$?8e-@}Vr zPJgwqoEqd+6rGiVD4(T1Hkle^-?Gq|a+|s`0UarK*aA_SgrEj-9r+^{c#ur)1l~!= zO2~OwJ)zar8FdC}@zdrI(eY1{7tymr$Ve9*BA15v2`Ysl)&;yiOwVA`(+krxV0uPj zdX*SX7!}jw*rYJM6Kr}0OwWYrSuiyVhONW&aH>DFD|Kwih7QK734Q3M+R(gt9x|mw^bNqXGD9y n%y|-_4*nkS6sa~JUed=Y{IrJCc)W48@u*`u&*D67pL66tcVi@Y diff --git a/__pycache__/generate_data.cpython-37.pyc b/__pycache__/generate_data.cpython-37.pyc index a304ff14d3a405a70d34bbdcb6c0cc0d606772df..7ef4e1f1e9c1ecdf1f683dc20a4659bb075edf17 100644 GIT binary patch delta 63 zcmaFNe4m-uiI-b_UQ7Hd_=;%Nj|hJ!6<2S!@s4=9`o(E3qYO?1?QqmOOH3D{QbYNVe47 zpf1pos5GlmmQtBiO{%iZX3CyS7L&>MdmMfmG3(jKtD)5?6^`1 zrN;B(;@*4C`OXt>A3a*O@b}gyzy0jptCsa!o}_t#7U);!W0TOVs3T|a7DKep6!%6^FV`Qs0A>(8iVbzFV#p}jt#PNA6Gw6FJb>Z<*JvlKcS}7E7+e@E9$g*6+N6%XH*$io>$Xq4Esq{ zQAO-uP_L;G>|azrQYGxauU6Hp8pRVoP*rsVS6)(U>UA}TE3P`L=2ZblFROEELFI5X zrQT4BDvzUA)Ood}931^njbF7YZ+f>!Evx*t7dGl{7?qtVdv|>8ZTsPj zd&koauU+?NT(7P8R9kr(>*C*OprfFTH(%`QHhr)>d&pn8{N{l_*FA9QaTm&$lNq^v zf0z8Qt{b}?&Uf&JmhZLuI+w$@bR*hn`B-sRZciukthmYIRm(e`ZUyZ|eWUOBcav-8 zp{!}kS6I`*cYI*3O}83KHGM35v*Y*mZXVfa26uapy^oWmtD&wpb{g9|S;KdE@`#4p zI_T{B%Ct3Om~p`>%X86=yX)(^-;NqhP#CYC>>5RLCPSN9H^g%E0pXW06?|27->Gg_-Nu%?7j(2~vKyd_ z5DVtJAbMM&hbSnshqjN4pxND>0Ib*Uv^IS`FsqG(^fEg$LClfUyisx8S5#1s@ccRV zNAJA<{(B!?eeWuka&0mFt5#EZ3U`7|Q@NYInawWOP~jN!0C?T?_Rev;CX$&N>b>WXC)4*l?y>J2m=1qgXl6{A zO(aal3&H9~Be3WEydV>Hgr>uVY^l1qc6MccZE107X-W9+JrQ^#Kw!5!ji3`Y_cFy# zkr#gU+A|_)b*Q0TL6^w&Lb(};9`%~#(60wj`JUeHw17AO3%S2VK$$YkJk@7l;I?-s zXed|v{dPtpO6Dmr^``sTJdjuZ10v0T%y#95f#;6r@22F2xSaPEAx~R@h(mz$*yz9za#h+@Vkg# z_^;R~d*6wyuKmFN+}Y3V=ezlCu3_&Nx`mteelc>QT$JbC2hRS;y^o_p*NXBFoJ-b? zOK4x(AB~*uNVg<+=b|Ed8kM_KNpFsFR;_)fTkPf)`sn6PTHQi7f8M(N)9cplwdxBCw?gng#{*DFL7|5RwVKSNR-*vPaNs{gB2CJe7UqRPB1qNI~kE=n; zcSFEl9Uc|dMy#jVU~9|u1wrmMnoX{#z#kyS4CfX1?M|Z^&B6>J;mkxr_r9+KvmU+i z&il)-NAcEK9^G(XbLZ#g=AJxGG%_Pyl;$$8bQa(S&@{mbWA(vy3^116-GF3IhRIQE z>7bRd4lKyS}l6QK`oN#7XO-DGe z<}0Mx93Edv7Cjr6GEkR}Dwz=nbtxN^2mo};rRk};>ipvB@)UZKlZE-Ywd`bZZe>0@ znOmAaJ5{-n^^r;G(7==TSCbbHj`}ReO=9&A{e#9mN)<%Stb4tqH54fHFwAy9B6KRL zW4LD~QB#l>58?wo+c3=SHK8@g_*_PP6oB|ULFYQR{LA47bg@GIUc{zr-8c?#GqC9Z z8l9h7wsqT47NE$64$%Q2e^qxHtGTW#yE$`#Ca}*ocF8W<G-VDM z;BU|h+R?f-8cswuw4MEh-GyeoRnjkZF{rK`Ik!sT4E7eT&B-+k9mnUp75t_i?h744Nq8^~ue_EiXAxIN5DFX7&=@e5zW#u}N(m#n-qVdZV^%h= zijbL_kr|OCX$4v0^>uUokF2)ysb#j;t@bqTjzpy`2Q7Y{NZTmisvkyWGPttYH{=eO z7#-=_UyhlP+&ub~h=ACc*jpd_pp6NBxc5i}fi`zqtSm1I^b9zn9kd#4o>bsB zHFRPjiDK+S!s$7fABi3jwP;L&d!#ww6;`q_yJ$3k?b=b#`hea_6drsNp{QY>EBEK% ztbtDwyUlRXnLpV-!=Ywle7%2&Nz$) z`XMYtjNz^U=yK0960kAMxXPAAgqHzahT|N)$TyzlCUX%)!RYVf+P~vx0Jr4iPXW6e zXAJn|I6yIH!ajzxG8j$TW_U&x%K-N#P7Emp!zKi4Y@!&DWExDBQ}(T#o{sYSHkgUn z1r!6j6u>GfVHLJ7qWvH9{>_mGIOmfO>=(eEUnFd%z(AGU{Z}s9KmNRh=LLk9a<~+Ubn_6|}dyxj$oj`y=c2%60U>#knH3iA=pxjE~fVW)u7klnvvNi}Y%I z9p|CVH9TMhhA}AyhU0?v+S@P^ntr>I*V7m(l(e_m^w&pP-n}jD-;V9Z@RHEBK#$?q zXdWKJ#&V82Y(wP*$Ipp`F)+7RagFn|Z#g&y z$s-3!hqH6;&%6(_2a#SX06NwZ+!mr4e1KKw(NYNHwXw~@?B6uMKkPf-Bg#{+eao`0lj<{q%PbIKCxIiQ}LB{MVXbhdA15+!)-)L@7>%^>0MoaW;9%`&h502gAr`NXNh^|(FJ zo9ejj0s-`gc>3${w!aO{i`cKM?*Ss#(I_OR+=Ci{pFc?=LegOiVlj+WV@%#6ymuqH ztM7sHNWTD3X8H|h+%4#JMraWq_u6}KYDrtf*kOaWjfW;oO_Lczr4F6NE-u2~qYf2# z8=~as20Pur6i)SKSH|2hyN~g3CZZ%_#*)`?k{}r^uj7V@`(QwdEQM9(Hxq$;w1g1x z!1d2_`)Nc3EE`^q3b<43LS$b6BR`Otdk-*-M6;21lfdI}x&-bl4eUNW#z@q35~W)- zA-cquL!7(kX@3)+(R6uv8r`j?uHphc?90>e6^$DSHMy8Njj4w?J&z~QaOkaxNA_5c z(7aMyz@9VM=NBT;lX-&TFD7$Ll<)Kz1P##UO!97oKn{srT+ISoq974wF@XTFK2y() zI^q6ljt4P-+nok7EiOF0rtiUgWCB8*SIJpaM)W1Lj`K|>wc z?|TrjKJHILsuEo801xmr?ZPHew1R+KoAoJ$wJ?RGVE{T{M z2OUNmvhHY}hUgBFH1@)1o@pqtBwiu|n1CIveRLXZMTqWsyJ)wI%m~oy?hY>i!n|$b z78DNBvQYyAyt~t=?+Ahs=1k}SmZLc2A&?HoFia)Q67KrG3XvK#=ScwQDg#1rmc$Uf z11|>}2+^H9 zOrh?-juc}XL$4$w<0_ey2VS8NLNQkEbqRu)L`0HeciD+dM<&AsYanvTah^fK6XAlb zpxFq1uLlw-F(nh9W>y%ckvxif857K=WxA65M%I-X%3^iJbomL>3Bm!xB)u!yOqzo1 zFdZ*u=4}R@2wCO%6-38`Lb6EYmmrX^ZZEWY2(HKgVOJPulpHaroeZO3W`~ZwRa*JMlbP9;SRXqhIVijaX&!EW2ZiWcSJnoZ-Jpj{D7i2Cl?`0mnI*VHloai6y=V6rRqkM!&EKK!xqoP6@+yvflzkA)< zn_yd&f5AFs?H7``E2FZ%Oaf^mT{rE7|TbUSbgq#TOIY`zcE(>!#_aL`F(kfiL}rg z?GDOb1V0D-TGr?FD%s$0nmQC9cUFp$6r`j;UR@nVfUFR184t^kz>hLGX}BzfDP$9= z#)UT^^|r9Egxd|mBl*X(q1hz$wP}Hzbk_-?pv3g=Rq+Al`sPU6IT=#bea|RW7@sgm z6Qe&V1i<^mfDf5v#>g>fKdM(<;NX7Cvo0soGcWfBh` zt`S0)B%*ZG8#$;oL{+ou2P}pGE(9&eLqr;uV8b@SJl;gqgDb`QR5|FG6HoE*Li%t5 zTEdNKEx4PR{Pi8bev1r-slYD8#CryoNspz03K>GJmW~dq>@nYJtS~Ls{v&M|K{CO` z`;%GnBeDFD_jI@6e4N&m@(^Zf#>S8oj&xku+=FYY#rj^sRf&vB%n(meDI#B*tXt32 zj&s50&Dee)Vas{^7S^5mR%~w#he$^8_}>$LiP+YpQJrFL=a`ikM_AdxZ`_&8mvMzD z;sKSKMcd!T;|VGw?5x;Y1N$I1nXXDDT@dR@#z&w z<-gQ0h{K%NP<#g~8cblHRSsSUHD_RDH>~d12!5XG?lGE*=J)0Ph6`69sjLAEuP$E4w<4$d&ny-JKT;4{xaGTeYbM6upKZD~l zM%yFqN7(@Q7$;eBe3&BRcKw8rMPo5jvfzRf;)%=R4f*1U*wD^1vQvfq-)T2)ci5}) z-$0}T1={XuWr8nwVShP&c>e_b1upxbx4`tnPuS@Jqw!r{9Wol3vHdMMUyR0MC2%(R z8{D4D><-!- ztN?`TARV8gsBZ*8yw90&ft>-{<2flQU>a@>mI?xgMFb8XLPlU`7>U8NIc9bd=6jCll)Lx!c0-3l1NN0Q9Qk` zHz*^_t07&>h76F+3IwVoqytG2yiV;0e$64gEv+B{*TBCAi%RcpdKxwmJ~d)}E8_5} zIV}7xvX#Y~6Jcg`s&VpLD2QPU2fFZ}JbRrefEC$*Y!OXU!<==cG_#CILGCjn>7dyS z88&fr5lRoC=k#j_OvlVF>sQ8`t!0J|u&3b4WL~&8AmerCoOe-Twg7p2mQ;2{n z#|0FzBAlIU`TY{hO}LfIJcMc8t87fiAvMUxGMuk2uA)eLac%DGve{<(F8crnK~G>u z|6=u1q4#0YVP6u+6o^q7jfcM`sMSbp;2O;DM}9Xz(um>f4ty4nULKejz6^N$+#YKL zOZ8tkvupAKcjgi^d=-2w<4+Mg2O%okpG=ec5Yqj()P%YCVNi}yTG;tOW9|}ZJ zoi02hq(z?$_H+DHHN|&Ad-7eF!FnnJqYfA-=s|v_PX;ibAGZ=ap)#Kq07z0rXY}#CQapXC)nm){lpC>F97U7l<=JF3bDB1LnQ; z`OeUMVX3+{|E;mSZ$N_|gP5;E=|sRR6Occ_xXC=e|U2 zJE{IR^@RV}Z*v=8RR@TGCBc*-Gd#fuR-g~U3rhr&4>OIK5I|D#p|_N*ryp)n8z-Na zO4L>gE|LwA!+iX{1kIi(S!nY$=+@Px`Q_z>`Lk;atA;XH;0;mH#rg2Ij-)tP&*MBz zLDqRM2lyLaS2{3%%5|0G#%LMQ1%9iLX~yqnP5=4vYkG)I99Y+k;fUK$y|CspP{gu` z#7|t=x$4=qm9?{r3#*IE3#$vu^Sy=T8t5%9d|*NWUQ ztEnk*nn7C4c$G2H$ckE)jr^s#7<~(r*=BL+Jc4BS)Z@YjXc2Obd4~*$dJ25igMX+S w=D&&JM6rCXG*Wu0 high_bound: + break - pred_change = mean_derivative + i * stdev_derivative + try: + pred_change = mean_derivative + i * stdev_derivative + + except: + + pred_change = mean_derivative predictions.append(float(hist_data[-1:][0]) + pred_change) i = i + delta - if i > high_bound: - - break - return predictions + +def poly_regression(x, y, power): + + if x == "null": + + x = [] + + for i in range(len(y)): + + x.append(i) + + reg_eq = scipy.polyfit(x, y, deg = power) + + print(reg_eq) + + eq_str = "" + + for i in range(0, len(reg_eq), 1): + + if i < len(reg_eq)- 1: + eq_str = eq_str + str(reg_eq[i]) + "*(z**" + str(len(reg_eq) - i - 1) + ")+" + else: + eq_str = eq_str + str(reg_eq[i]) + "*(z**" + str(len(reg_eq) - i - 1) + ")" + + vals = [] + + for i in range(0, len(x), 1): + print(x[i]) + z = x[i] + + exec("vals.append(" + eq_str + ")") + + print(vals) + + _rms = rms(vals, y) + + r2_d2 = r_squared(vals, y) + + return [eq_str, _rms, r2_d2] + +def r_squared(predictions, targets): # assumes equal size inputs + + out = metrics.r2_score(targets, predictions) + + return out + +def rms(predictions, targets): # assumes equal size inputs + + out = 0 + + _sum = 0 + + avg = 0 + + for i in range(0, len(targets), 1): + + _sum = (targets[i] - predictions[i]) ** 2 + + avg = _sum/len(targets) + + out = math.sqrt(avg) + + return float(out) + +def basic_analysis(filepath): #assumes that rows are the independent variable and columns are the dependant. also assumes that time flows from lowest column to highest column. + + data = load_csv(filepath) + row = len(data) + + column = [] + + for i in range(0, row, 1): + + column.append(len(data[i])) + + column_max = max(column) + row_b_stats = [] + row_histo = [] + + for i in range(0, row, 1): + row_b_stats.append(basic_stats(data, "row", i)) + row_histo.append(histo_analysis(data[i], 0.67449, -0.67449, 0.67449)) + + column_b_stats = [] + + for i in range(0, column_max, 1): + column_b_stats.append(basic_stats(data, "column", i)) + + return[row_b_stats, column_b_stats, row_histo] diff --git a/analysis.pyc b/analysis.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c992d6962f45a623892d601c984f61ab49e9bdd GIT binary patch literal 14201 zcmd^GTWlOx89p<+-iu@B>NGKF$TaE2Ns~60R#lVI^b(pBQL7yX(z;ED@yvKV>3YVV znRV>SMo2AEA1WabNIW1ef+xfSycG$lNKoDoLgE1=9*|IZppQsA@KV0-KQp^yXPq>5 zQmE?noISTWm;ZkL^Ka!}!-H@C?1Ps*mH$=n{RE!$-$;D?8>+3;MdnP^Hq@jcwaA-l z(k${7HCZY0RW(^H@&jtJR^;nyvR>pHYOkOaW9p;|~yNh^|8B^{7dXzxvQ0O1*eVbG8%8Bm=jKafyWYCvH^fs;eZ zo){zGJWj`l;1!f}vl!|81z_!pU>`VU-joy=MII{{0K!Noil>X=%2hJ-$x@%vG#I)2>e?yGZ8E(OU`9&u99gzdnde}FJcp45!=BR{EPp|d zAQVUf<;pimLPUihOm$iZISvO?#(?1wU3|LCc>)D+BJklLBj|BZ6bv~iR;3BW0m+EO znh<^;7u?%hF@Q}1wMn4%4kkO12xE-LQJ}l+VigyqVM2x^MEFg3kd8~xU`GuLWH!J+ z4ZKxQ!M(_*{B2S zo8x072!WM|8h36ki2QzFvv;Fo31mc8B!naYjX*~lj7|Zzs34;yA)?6H1mNsDG2MfB za)50&ijT()H1Uv>gvbAk40wE{t`^TyA|nrJt3d|fKz_VYQRglnR4a|VcDbQO0a?oM zg9-(_I)9M=mjGW1_lipVoCu)`mh0-V2uKk{z_zdDbV(_aw}Bq@d*91R6;h>W8z|b| z$*K1WXgGfl5>LPPC$(IYSLWC-QlT?PM2q8y z0Ct?&*zdFh$60q#_6Xjz_c6JX374iJd>FajLM3-IHjm9EeFjf@2a=ju1F6c#Y*w4i zW}|sWvmrZ*dRBsmp7KX%K85F#WA6L|t1a!s8jIrSm={M`5{tcQ6(-%`4Vps1^fb-P zTW<`lvXR!SJ;=V06&rAYa5&Y36$Ee*Tp&qZu;! zI{p=3iPr=N4((Zii$!}@Ry+3I#2|37XwS-)YdkAdYEN^icOk(@>w8vOaD?SPmEWLm zckQ_yaU+jPkc6{A79_bxrO@KH)~9mRI#qV5l;-SfL2&au^2l8!aQuX`_AMby64(#$ z^h^gD=6p!00(Oe6kK!f)wu=)#-|wy3rlUoctBnO|<&m&wEQn`7ww2lspk;$Pk;eIt z@#LbR@OgOG?CBgj>7&ZVJszd~^3dT}ouLd)7?@fr*e6s2Qv7Y+6)Hrz4|{|k(k@-){wDzEEpAYBS(GGsn(Z-Yq7 z3MMW=RD0j93(VW0&fmi-)>G$Q>Lr;kuj5{jy--Py>*e(nYL^H$eafN^|2-F)>u~y2 z`|9xSl()}ga2#`gmusQ??s^R9FR0@)0_9EPD4ybdOStff)80Q^wfK*wzIh39yHs}h zwxpP2Euo3?^OoskdYEskPVO#uU7f5C@SD`ef8{>3q5ePB#_Mjkrv}6oy-79v_uh8b zSHr(WC#{CHI6HrVw+Og4z%2sY$Rgra-y&#r4Dr`f$46M8IzGbx(k()p4rC4jr|VjE z{8n*`fT7l^#X{!Mf_Wi-%is;Ql8uawih1G}INsgQf?z7>b+ItIC+Lhep^`xNp+0N5W1w7v>? zce5eaCA79^u0GA4d?gc;28_=uL&NySooYp{5+Mi3zj74|QGlzYOTJPsya?8w7~?48 z`tT53ZHG~SQF1qnT$Ca(Ad*ZwOfwz-kuJyyD+ydbNMrzUK~9J5V9w2E^tm!!U|B2& zW*oWr-`JT%MsLTi?|A9L7UGCUBxUS2_8A`6Ka8i#Y$K?_UI7N)k0M&f>_)zzRw7^0 z2c$<;dh9e>O087Y;u*Pot>%}nl$ir+FCMvl-LF<~t%f^l1lq_26z=gy(Fgy={0r(5 zK*U>7xxR)VWf7Nex>#3ByQBt^SJxQoQZ%d#!vG$DqsUM%=3SbUwnS2++@(?OGFXh# zEO%*27v8~Q7F?I%uDHp#kRs{_+o^HC=@Em_8IZjXncVvzV^>zTsKry9{M_aHx!0wA zYI(5OuD(&sw|i;+b6B~7^hG?y&Ie`H-JR=VdFQ(3x;xihuDf&H z3%8g70)mp(8SCmykOWd~g{l6|VrwcuxDdjvUJrcTn~iQ{g;8crC-JOx+&X5RwETck z8VL5X;>gOD<^skJ6>(bD*wm8c#qG{)WW|YvLK0s+EMZ6(ERNbs7NR`bp~vts3zHVE zt<)Nu#tVfxom!nVkR@9u4(l)`tegwNcn{_iM4j2GAhGx!H)oyTl#)An#+vR#IN)H3 zNsx7t2w-@5HgF^C4441%E$~P)M5Z0J7Tg4tUT_)WRjCtxh%^s!pm@>~ionGDA^V6h z0W_$g+xS5L)K-|nb?8Y4ZT!EFjpg@S1(A{VLY+AP~b58nVWo2f1W*_VRC`Vv+OGya@Z@Qto0=8q;Zl3zSad&vqAAr zIFzVyJ0<*Es8cYlY6#BTYu3y|=00;9LiH+$x~Ld~{5C7*pqZD=-KO+2YUVK7_Tg!) z?O(jd@1noC-#Bo?exhRt!LApRaaj0lkMFwGtK=&~a z>_J*^z(>y%!DxeO4|NyNM!M9BY%kF?e?g2B^hek-s1JrA$Q4Y>a4qGQ6hc;p1qbI42ci$ZI-0!dE#ZJCufe zl=T!Y5d4wvCO%Z>LI@pzC|#apqc{srTiFbh8UuT$f*_)DO#%;{prp_xS3xw3UkjG( zv#epEWxw;;uYd5uf8K4qXZ<{x`R4oG%-N#ptd2n^)vi%Xws0r4KrZt+BIVli_#Pvv z3kzahh_t4bpsj(!LAZdmF9f1kvK_1H3hlweJ@pzQw2ST%0j^oCS7}djmIjjcY|hx^ zdVn@Bv1ul6vR`IrZiG?+C=(@x7nx->Un{Ed60`C?r3MNc6cNN6VGuioapS~o|4qsX z84>zy6aiU>jAQ0b;|_BG&sKz?%g5Yn zw1mikr65)I;weOhux?LOK;BeWi3)AfYEf}YiwY4-xu}3aF9M!Ysv@ESfr*GWU0VxZ z5_q3$l0bKyDIkO(Hx>ipA=z(0IzZx3N&G=QA1Sr8Q%)+VZBV8R7H1hcq6YO@rr8s5 z5RV5izlaR5GzA+6o#2{LllRz;M%BWB{^Fe3MF*wqRx&pG z@N6RUDhVi%?WB|J6Rx`klNc=}oO=GS~LrxPvNFPy&qBdDyF?NbMcfj1L)hAI& zMiugJVi1@v&}=}w_Gtk1%*zikpwAXh6!r{sVIDAzBRf=?});E!r?zBA@eXi^7B zV-)%E1`HXJFQTv~t{v;Fj8&7dgzi9$7!FJ(q*#{*Fp8ceSL|q)*Lgt3Zj{CrdnzA${x2YIR6w!TRSUo05i6*4%Vs|bFtxsTF&=Jz=l!!q=3V;< zRBMarapd%M<{{>&n4oYW1l>hVL-QyI^VnzD;R{SSs*It9HwC&r(l~fchs4t0(g#S9 z{y641g8YnRkax~IY=WNj$)FIZzau?K$P?&k$gEXIaWLR9p=OR6qsHB!s8M6T*_*GF zqplYFn~dq&=eR=#^V*NHffOevz&LK}Uzng9enUi~KbjMZ6ox@y)6rzhbsa52o3StU zaV8Hj5m*ajPOx~H38bB}31j;zlG1`uQa2N(CpzjeoFEZf{J3c2dojWwg3wDJv$>> mean([-1.0, 2.5, 3.25, 5.75]) +2.625 + + +Calculate the standard median of discrete data: + +>>> median([2, 3, 4, 5]) +3.5 + + +Calculate the median, or 50th percentile, of data grouped into class intervals +centred on the data values provided. E.g. if your data points are rounded to +the nearest whole number: + +>>> median_grouped([2, 2, 3, 3, 3, 4]) #doctest: +ELLIPSIS +2.8333333333... + +This should be interpreted in this way: you have two data points in the class +interval 1.5-2.5, three data points in the class interval 2.5-3.5, and one in +the class interval 3.5-4.5. The median of these data points is 2.8333... + + +Calculating variability or spread +--------------------------------- + +================== ============================================= +Function Description +================== ============================================= +pvariance Population variance of data. +variance Sample variance of data. +pstdev Population standard deviation of data. +stdev Sample standard deviation of data. +================== ============================================= + +Calculate the standard deviation of sample data: + +>>> stdev([2.5, 3.25, 5.5, 11.25, 11.75]) #doctest: +ELLIPSIS +4.38961843444... + +If you have previously calculated the mean, you can pass it as the optional +second argument to the four "spread" functions to avoid recalculating it: + +>>> data = [1, 2, 2, 4, 4, 4, 5, 6] +>>> mu = mean(data) +>>> pvariance(data, mu) +2.5 + + +Exceptions +---------- + +A single exception is defined: StatisticsError is a subclass of ValueError. + +""" + +__all__ = [ 'StatisticsError', + 'pstdev', 'pvariance', 'stdev', 'variance', + 'median', 'median_low', 'median_high', 'median_grouped', + 'mean', 'mode', 'harmonic_mean', + ] + +import collections +import math +import numbers + +from fractions import Fraction +from decimal import Decimal +from itertools import groupby +from bisect import bisect_left, bisect_right + + + +# === Exceptions === + +class StatisticsError(ValueError): + pass + + +# === Private utilities === + +def _sum(data, start=0): + """_sum(data [, start]) -> (type, sum, count) + + Return a high-precision sum of the given numeric data as a fraction, + together with the type to be converted to and the count of items. + + If optional argument ``start`` is given, it is added to the total. + If ``data`` is empty, ``start`` (defaulting to 0) is returned. + + + Examples + -------- + + >>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75) + (, Fraction(11, 1), 5) + + Some sources of round-off error will be avoided: + + # Built-in sum returns zero. + >>> _sum([1e50, 1, -1e50] * 1000) + (, Fraction(1000, 1), 3000) + + Fractions and Decimals are also supported: + + >>> from fractions import Fraction as F + >>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)]) + (, Fraction(63, 20), 4) + + >>> from decimal import Decimal as D + >>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")] + >>> _sum(data) + (, Fraction(6963, 10000), 4) + + Mixed types are currently treated as an error, except that int is + allowed. + """ + count = 0 + n, d = _exact_ratio(start) + partials = {d: n} + partials_get = partials.get + T = _coerce(int, type(start)) + for typ, values in groupby(data, type): + T = _coerce(T, typ) # or raise TypeError + for n,d in map(_exact_ratio, values): + count += 1 + partials[d] = partials_get(d, 0) + n + if None in partials: + # The sum will be a NAN or INF. We can ignore all the finite + # partials, and just look at this special one. + total = partials[None] + assert not _isfinite(total) + else: + # Sum all the partial sums using builtin sum. + # FIXME is this faster if we sum them in order of the denominator? + total = sum(Fraction(n, d) for d, n in sorted(partials.items())) + return (T, total, count) + + +def _isfinite(x): + try: + return x.is_finite() # Likely a Decimal. + except AttributeError: + return math.isfinite(x) # Coerces to float first. + + +def _coerce(T, S): + """Coerce types T and S to a common type, or raise TypeError. + + Coercion rules are currently an implementation detail. See the CoerceTest + test class in test_statistics for details. + """ + # See http://bugs.python.org/issue24068. + assert T is not bool, "initial type T is bool" + # If the types are the same, no need to coerce anything. Put this + # first, so that the usual case (no coercion needed) happens as soon + # as possible. + if T is S: return T + # Mixed int & other coerce to the other type. + if S is int or S is bool: return T + if T is int: return S + # If one is a (strict) subclass of the other, coerce to the subclass. + if issubclass(S, T): return S + if issubclass(T, S): return T + # Ints coerce to the other type. + if issubclass(T, int): return S + if issubclass(S, int): return T + # Mixed fraction & float coerces to float (or float subclass). + if issubclass(T, Fraction) and issubclass(S, float): + return S + if issubclass(T, float) and issubclass(S, Fraction): + return T + # Any other combination is disallowed. + msg = "don't know how to coerce %s and %s" + raise TypeError(msg % (T.__name__, S.__name__)) + + +def _exact_ratio(x): + """Return Real number x to exact (numerator, denominator) pair. + + >>> _exact_ratio(0.25) + (1, 4) + + x is expected to be an int, Fraction, Decimal or float. + """ + try: + # Optimise the common case of floats. We expect that the most often + # used numeric type will be builtin floats, so try to make this as + # fast as possible. + if type(x) is float or type(x) is Decimal: + return x.as_integer_ratio() + try: + # x may be an int, Fraction, or Integral ABC. + return (x.numerator, x.denominator) + except AttributeError: + try: + # x may be a float or Decimal subclass. + return x.as_integer_ratio() + except AttributeError: + # Just give up? + pass + except (OverflowError, ValueError): + # float NAN or INF. + assert not _isfinite(x) + return (x, None) + msg = "can't convert type '{}' to numerator/denominator" + raise TypeError(msg.format(type(x).__name__)) + + +def _convert(value, T): + """Convert value to given numeric type T.""" + if type(value) is T: + # This covers the cases where T is Fraction, or where value is + # a NAN or INF (Decimal or float). + return value + if issubclass(T, int) and value.denominator != 1: + T = float + try: + # FIXME: what do we do if this overflows? + return T(value) + except TypeError: + if issubclass(T, Decimal): + return T(value.numerator)/T(value.denominator) + else: + raise + + +def _counts(data): + # Generate a table of sorted (value, frequency) pairs. + table = collections.Counter(iter(data)).most_common() + if not table: + return table + # Extract the values with the highest frequency. + maxfreq = table[0][1] + for i in range(1, len(table)): + if table[i][1] != maxfreq: + table = table[:i] + break + return table + + +def _find_lteq(a, x): + 'Locate the leftmost value exactly equal to x' + i = bisect_left(a, x) + if i != len(a) and a[i] == x: + return i + raise ValueError + + +def _find_rteq(a, l, x): + 'Locate the rightmost value exactly equal to x' + i = bisect_right(a, x, lo=l) + if i != (len(a)+1) and a[i-1] == x: + return i-1 + raise ValueError + + +def _fail_neg(values, errmsg='negative value'): + """Iterate over values, failing if any are less than zero.""" + for x in values: + if x < 0: + raise StatisticsError(errmsg) + yield x + + +# === Measures of central tendency (averages) === + +def mean(data): + """Return the sample arithmetic mean of data. + + >>> mean([1, 2, 3, 4, 4]) + 2.8 + + >>> from fractions import Fraction as F + >>> mean([F(3, 7), F(1, 21), F(5, 3), F(1, 3)]) + Fraction(13, 21) + + >>> from decimal import Decimal as D + >>> mean([D("0.5"), D("0.75"), D("0.625"), D("0.375")]) + Decimal('0.5625') + + If ``data`` is empty, StatisticsError will be raised. + """ + if iter(data) is data: + data = list(data) + n = len(data) + if n < 1: + raise StatisticsError('mean requires at least one data point') + T, total, count = _sum(data) + assert count == n + return _convert(total/n, T) + + +def harmonic_mean(data): + """Return the harmonic mean of data. + + The harmonic mean, sometimes called the subcontrary mean, is the + reciprocal of the arithmetic mean of the reciprocals of the data, + and is often appropriate when averaging quantities which are rates + or ratios, for example speeds. Example: + + Suppose an investor purchases an equal value of shares in each of + three companies, with P/E (price/earning) ratios of 2.5, 3 and 10. + What is the average P/E ratio for the investor's portfolio? + + >>> harmonic_mean([2.5, 3, 10]) # For an equal investment portfolio. + 3.6 + + Using the arithmetic mean would give an average of about 5.167, which + is too high. + + If ``data`` is empty, or any element is less than zero, + ``harmonic_mean`` will raise ``StatisticsError``. + """ + # For a justification for using harmonic mean for P/E ratios, see + # http://fixthepitch.pellucid.com/comps-analysis-the-missing-harmony-of-summary-statistics/ + # http://papers.ssrn.com/sol3/papers.cfm?abstract_id=2621087 + if iter(data) is data: + data = list(data) + errmsg = 'harmonic mean does not support negative values' + n = len(data) + if n < 1: + raise StatisticsError('harmonic_mean requires at least one data point') + elif n == 1: + x = data[0] + if isinstance(x, (numbers.Real, Decimal)): + if x < 0: + raise StatisticsError(errmsg) + return x + else: + raise TypeError('unsupported type') + try: + T, total, count = _sum(1/x for x in _fail_neg(data, errmsg)) + except ZeroDivisionError: + return 0 + assert count == n + return _convert(n/total, T) + + +# FIXME: investigate ways to calculate medians without sorting? Quickselect? +def median(data): + """Return the median (middle value) of numeric data. + + When the number of data points is odd, return the middle data point. + When the number of data points is even, the median is interpolated by + taking the average of the two middle values: + + >>> median([1, 3, 5]) + 3 + >>> median([1, 3, 5, 7]) + 4.0 + + """ + data = sorted(data) + n = len(data) + if n == 0: + raise StatisticsError("no median for empty data") + if n%2 == 1: + return data[n//2] + else: + i = n//2 + return (data[i - 1] + data[i])/2 + + +def median_low(data): + """Return the low median of numeric data. + + When the number of data points is odd, the middle value is returned. + When it is even, the smaller of the two middle values is returned. + + >>> median_low([1, 3, 5]) + 3 + >>> median_low([1, 3, 5, 7]) + 3 + + """ + data = sorted(data) + n = len(data) + if n == 0: + raise StatisticsError("no median for empty data") + if n%2 == 1: + return data[n//2] + else: + return data[n//2 - 1] + + +def median_high(data): + """Return the high median of data. + + When the number of data points is odd, the middle value is returned. + When it is even, the larger of the two middle values is returned. + + >>> median_high([1, 3, 5]) + 3 + >>> median_high([1, 3, 5, 7]) + 5 + + """ + data = sorted(data) + n = len(data) + if n == 0: + raise StatisticsError("no median for empty data") + return data[n//2] + + +def median_grouped(data, interval=1): + """Return the 50th percentile (median) of grouped continuous data. + + >>> median_grouped([1, 2, 2, 3, 4, 4, 4, 4, 4, 5]) + 3.7 + >>> median_grouped([52, 52, 53, 54]) + 52.5 + + This calculates the median as the 50th percentile, and should be + used when your data is continuous and grouped. In the above example, + the values 1, 2, 3, etc. actually represent the midpoint of classes + 0.5-1.5, 1.5-2.5, 2.5-3.5, etc. The middle value falls somewhere in + class 3.5-4.5, and interpolation is used to estimate it. + + Optional argument ``interval`` represents the class interval, and + defaults to 1. Changing the class interval naturally will change the + interpolated 50th percentile value: + + >>> median_grouped([1, 3, 3, 5, 7], interval=1) + 3.25 + >>> median_grouped([1, 3, 3, 5, 7], interval=2) + 3.5 + + This function does not check whether the data points are at least + ``interval`` apart. + """ + data = sorted(data) + n = len(data) + if n == 0: + raise StatisticsError("no median for empty data") + elif n == 1: + return data[0] + # Find the value at the midpoint. Remember this corresponds to the + # centre of the class interval. + x = data[n//2] + for obj in (x, interval): + if isinstance(obj, (str, bytes)): + raise TypeError('expected number but got %r' % obj) + try: + L = x - interval/2 # The lower limit of the median interval. + except TypeError: + # Mixed type. For now we just coerce to float. + L = float(x) - float(interval)/2 + + # Uses bisection search to search for x in data with log(n) time complexity + # Find the position of leftmost occurrence of x in data + l1 = _find_lteq(data, x) + # Find the position of rightmost occurrence of x in data[l1...len(data)] + # Assuming always l1 <= l2 + l2 = _find_rteq(data, l1, x) + cf = l1 + f = l2 - l1 + 1 + return L + interval*(n/2 - cf)/f + + +def mode(data): + """Return the most common data point from discrete or nominal data. + + ``mode`` assumes discrete data, and returns a single value. This is the + standard treatment of the mode as commonly taught in schools: + + >>> mode([1, 1, 2, 3, 3, 3, 3, 4]) + 3 + + This also works with nominal (non-numeric) data: + + >>> mode(["red", "blue", "blue", "red", "green", "red", "red"]) + 'red' + + If there is not exactly one most common value, ``mode`` will raise + StatisticsError. + """ + # Generate a table of sorted (value, frequency) pairs. + table = _counts(data) + if len(table) == 1: + return table[0][0] + elif table: + raise StatisticsError( + 'no unique mode; found %d equally common values' % len(table) + ) + else: + raise StatisticsError('no mode for empty data') + + +# === Measures of spread === + +# See http://mathworld.wolfram.com/Variance.html +# http://mathworld.wolfram.com/SampleVariance.html +# http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance +# +# Under no circumstances use the so-called "computational formula for +# variance", as that is only suitable for hand calculations with a small +# amount of low-precision data. It has terrible numeric properties. +# +# See a comparison of three computational methods here: +# http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/ + +def _ss(data, c=None): + """Return sum of square deviations of sequence data. + + If ``c`` is None, the mean is calculated in one pass, and the deviations + from the mean are calculated in a second pass. Otherwise, deviations are + calculated from ``c`` as given. Use the second case with care, as it can + lead to garbage results. + """ + if c is None: + c = mean(data) + T, total, count = _sum((x-c)**2 for x in data) + # The following sum should mathematically equal zero, but due to rounding + # error may not. + U, total2, count2 = _sum((x-c) for x in data) + assert T == U and count == count2 + total -= total2**2/len(data) + assert not total < 0, 'negative sum of square deviations: %f' % total + return (T, total) + + +def variance(data, xbar=None): + """Return the sample variance of data. + + data should be an iterable of Real-valued numbers, with at least two + values. The optional argument xbar, if given, should be the mean of + the data. If it is missing or None, the mean is automatically calculated. + + Use this function when your data is a sample from a population. To + calculate the variance from the entire population, see ``pvariance``. + + Examples: + + >>> data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5] + >>> variance(data) + 1.3720238095238095 + + If you have already calculated the mean of your data, you can pass it as + the optional second argument ``xbar`` to avoid recalculating it: + + >>> m = mean(data) + >>> variance(data, m) + 1.3720238095238095 + + This function does not check that ``xbar`` is actually the mean of + ``data``. Giving arbitrary values for ``xbar`` may lead to invalid or + impossible results. + + Decimals and Fractions are supported: + + >>> from decimal import Decimal as D + >>> variance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")]) + Decimal('31.01875') + + >>> from fractions import Fraction as F + >>> variance([F(1, 6), F(1, 2), F(5, 3)]) + Fraction(67, 108) + + """ + if iter(data) is data: + data = list(data) + n = len(data) + if n < 2: + raise StatisticsError('variance requires at least two data points') + T, ss = _ss(data, xbar) + return _convert(ss/(n-1), T) + + +def pvariance(data, mu=None): + """Return the population variance of ``data``. + + data should be an iterable of Real-valued numbers, with at least one + value. The optional argument mu, if given, should be the mean of + the data. If it is missing or None, the mean is automatically calculated. + + Use this function to calculate the variance from the entire population. + To estimate the variance from a sample, the ``variance`` function is + usually a better choice. + + Examples: + + >>> data = [0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25] + >>> pvariance(data) + 1.25 + + If you have already calculated the mean of the data, you can pass it as + the optional second argument to avoid recalculating it: + + >>> mu = mean(data) + >>> pvariance(data, mu) + 1.25 + + This function does not check that ``mu`` is actually the mean of ``data``. + Giving arbitrary values for ``mu`` may lead to invalid or impossible + results. + + Decimals and Fractions are supported: + + >>> from decimal import Decimal as D + >>> pvariance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")]) + Decimal('24.815') + + >>> from fractions import Fraction as F + >>> pvariance([F(1, 4), F(5, 4), F(1, 2)]) + Fraction(13, 72) + + """ + if iter(data) is data: + data = list(data) + n = len(data) + if n < 1: + raise StatisticsError('pvariance requires at least one data point') + T, ss = _ss(data, mu) + return _convert(ss/n, T) + + +def stdev(data, xbar=None): + """Return the square root of the sample variance. + + See ``variance`` for arguments and other details. + + >>> stdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75]) + 1.0810874155219827 + + """ + var = variance(data, xbar) + try: + return var.sqrt() + except AttributeError: + return math.sqrt(var) + + +def pstdev(data, mu=None): + """Return the square root of the population variance. + + See ``pvariance`` for arguments and other details. + + >>> pstdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75]) + 0.986893273527251 + + """ + var = pvariance(data, mu) + try: + return var.sqrt() + except AttributeError: + return math.sqrt(var)