mirror of
				https://github.com/titanscouting/tra-analysis.git
				synced 2025-10-24 18:09:20 +00:00 
			
		
		
		
	testing release 1.2 of analysis.py
This commit is contained in:
		
										
											Binary file not shown.
										
									
								
							| @@ -7,10 +7,16 @@ | |||||||
| #    current benchmark of optimization: 1.33 times faster | #    current benchmark of optimization: 1.33 times faster | ||||||
| # setup: | # setup: | ||||||
|  |  | ||||||
| __version__ = "1.1.13.009" | __version__ = "1.2.0.000" | ||||||
|  |  | ||||||
| # changelog should be viewed using print(analysis.__changelog__) | # changelog should be viewed using print(analysis.__changelog__) | ||||||
| __changelog__ = """changelog: | __changelog__ = """changelog: | ||||||
|  |     1.2.0.000: | ||||||
|  |         - cleaned up wild card imports with scipy and sklearn | ||||||
|  |         - added CorrelationTests class | ||||||
|  |         - added StatisticalTests class | ||||||
|  |         - added several correlation tests to CorrelationTests | ||||||
|  |         - added several statistical tests to StatisticalTests | ||||||
|     1.1.13.009: |     1.1.13.009: | ||||||
|         - moved elo, glicko2, trueskill functions under class Metrics |         - moved elo, glicko2, trueskill functions under class Metrics | ||||||
|     1.1.13.008: |     1.1.13.008: | ||||||
| @@ -288,9 +294,9 @@ import numba | |||||||
| from numba import jit | from numba import jit | ||||||
| import numpy as np | import numpy as np | ||||||
| import scipy | import scipy | ||||||
| from scipy import * | from scipy import optimize, stats | ||||||
| import sklearn | import sklearn | ||||||
| from sklearn import * | from sklearn import preprocessing, pipeline, linear_model, metrics, cluster, decomposition, tree, neighbors, naive_bayes, svm, model_selection, ensemble | ||||||
| from analysis import trueskill as Trueskill | from analysis import trueskill as Trueskill | ||||||
|  |  | ||||||
| class error(ValueError): | class error(ValueError): | ||||||
| @@ -698,3 +704,206 @@ def random_forest_regressor(data, outputs, test_size, n_estimators="warn", crite | |||||||
|     predictions = kernel.predict(data_test) |     predictions = kernel.predict(data_test) | ||||||
|  |  | ||||||
|     return kernel, RegressionMetrics(predictions, outputs_test) |     return kernel, RegressionMetrics(predictions, outputs_test) | ||||||
|  |  | ||||||
|  | class CorrelationTests: | ||||||
|  |  | ||||||
|  |     def anova_oneway(*args): #expects arrays of samples | ||||||
|  |  | ||||||
|  |         results = scipy.stats.f_oneway(*args) | ||||||
|  |         return {"F-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def pearson(x, y): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.pearsonr(x, y) | ||||||
|  |         return {"r-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def spearman(a, b = None, axis = 0, nan_policy = 'propagate'): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.spearmanr(a, b = b, axis = axis, nan_policy = nan_policy) | ||||||
|  |         return {"r-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def point_biserial(x,y): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.pointbiserialr(x, y) | ||||||
|  |         return {"r-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def kendall(x, y, initial_lexsort = None, nan_policy = 'propagate', method = 'auto'): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.kendalltau(x, y, initial_lexsort = initial_lexsort, nan_policy = nan_policy, method = method) | ||||||
|  |         return {"tau": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def kendall_weighted(x, y, rank = True, weigher = None, additive = True): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.weightedtau(x, y, rank = rank, weigher = weigher, additive = additive) | ||||||
|  |         return {"tau": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def mgc(x, y, compute_distance = None, reps = 1000, workers = 1, is_twosamp = False, random_state = None): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.multiscale_graphcorr(x, y, compute_distance = compute_distance, reps = reps, workers = workers, is_twosamp = is_twosamp, random_state = random_state) | ||||||
|  |         return {"k-value": results[0], "p-value": results[1], "data": results[2]} # unsure if MGC test returns a k value | ||||||
|  |  | ||||||
|  | class StatisticalTests: | ||||||
|  |  | ||||||
|  |     def ttest_onesample(a, popmean, axis = 0, nan_policy = 'propagate'): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.ttest_1samp(a, popmean, axis = axis, nan_policy = nan_policy) | ||||||
|  |         return {"t-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def ttest_independent(a, b, equal = True, nan_policy = 'propagate'): | ||||||
|  |  | ||||||
|  |         results = scipt.stats.ttest_ind(a, b, equal_var = equal, nan_policy = nan_policy) | ||||||
|  |         return {"t-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def ttest_statistic(o1, o2, equal = True): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.ttest_ind_from_stats(o1["mean"], o1["std"], o1["nobs"], o2["mean"], o2["std"], o2["nobs"], equal_var = equal) | ||||||
|  |         return {"t-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def ttest_related(a, b, axis = 0, nan_policy='propagate'): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.ttest_rel(a, b, axis = axis, nan_policy = nan_policy) | ||||||
|  |         return {"t-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def ks_fitness(rvs, cdf, args = (), N = 20, alternative = 'two-sided', mode = 'approx'): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.kstest(rvs, cdf, args = args, N = N, alternative = alternative, mode = mode) | ||||||
|  |         return {"ks-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def chisquare(f_obs, f_exp = None, ddof = None, axis = 0): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.chisquare(f_obs, f_exp = f_exp, ddof = ddof, axis = axis) | ||||||
|  |         return {"chisquared-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def powerdivergence(f_obs, f_exp = None, ddof = None, axis = 0, lambda_ = None): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.power_divergence(f_obs, f_exp = f_exp, ddof = ddof, axis = axis, lambda_ = lambda_) | ||||||
|  |         return {"powerdivergence-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def ks_twosample(x, y, alternative = 'two_sided', mode = 'auto'): | ||||||
|  |          | ||||||
|  |         results = scipy.stats.ks_2samp(x, y, alternative = alternative, mode = mode) | ||||||
|  |         return {"ks-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def es_twosample(x, y, t = (0.4, 0.8)): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.epps_singleton_2samp(x, y, t = t) | ||||||
|  |         return {"es-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def mw_rank(x, y, use_continuity = True, alternative = None): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.mannwhitneyu(x, y, use_continuity = use_continuity, alternative = alternative) | ||||||
|  |         return {"u-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def mw_tiecorrection(rank_values): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.tiecorrect(rank_values) | ||||||
|  |         return {"correction-factor": results} | ||||||
|  |  | ||||||
|  |     def rankdata(a, method = 'average'): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.rankdata(a, method = method) | ||||||
|  |         return results | ||||||
|  |  | ||||||
|  |     def wilcoxon_ranksum(a, b): # this seems to be superceded by Mann Whitney Wilcoxon U Test | ||||||
|  |  | ||||||
|  |         results = scipy.stats.ranksums(a, b) | ||||||
|  |         return {"u-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def wilcoxon_signedrank(x, y = None, method = 'wilcox', correction = False, alternative = 'two-sided'): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.wilcoxon(x, y = y, method = method, correction = correction, alternative = alternative) | ||||||
|  |         return {"t-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def kw_htest(*args, nan_policy = 'propagate'): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.kruskal(*args, nan_policy = nan_policy) | ||||||
|  |         return {"h-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def friedman_chisquare(*args): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.friedmanchisquare(*args) | ||||||
|  |         return {"chisquared-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def bm-wtest(x, y, alternative = 'two-sided', distribution = 't', nan_policy = 'propagate'): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.brunnermunzel(x, y, alternative = alternative, distribution = distribution, nan_policy = nan_policy) | ||||||
|  |         return {"w-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def combine_pvalues(pvalues, method = 'fisher', weights = None): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.combine_pvalues(pvalues, method = method, weights = weights) | ||||||
|  |         return {"combined-statistic": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def jb_fitness(x): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.jarque_bera(x) | ||||||
|  |         return {"jb-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def ab_equality(x, y): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.ansari(x, y) | ||||||
|  |         return {"ab-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def bartlett_variance(*args): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.bartlett(*args) | ||||||
|  |         return {"t-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def levene_variance(*args, center = 'median', proportiontocut = 0.05): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.levene(*args center = center, proportiontocut = proportiontocut) | ||||||
|  |         return {"w-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def sw_normality(x): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.shapiro(x) | ||||||
|  |         return {"w-value": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def shapiro(x): | ||||||
|  |  | ||||||
|  |         return "destroyed by facts and logic" | ||||||
|  |  | ||||||
|  |     def ad_onesample(x, dist = 'norm'): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.anderson(x, dist = dist): | ||||||
|  |         return {"d-value": results[0], "critical-values": results[1], "significance-value": results[2]} | ||||||
|  |      | ||||||
|  |     def ad_ksample(samples, midrank = True): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.anderson_ksamp(samples, midrank = midrank) | ||||||
|  |         return {"d-value": results[0], "critical-values": results[1], "significance-value": results[2]} | ||||||
|  |  | ||||||
|  |     def binomial(x, n = None, p = 0.5, alternative = 'two-sided'): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.binom_test(x, n = n, p = p, alternative = alternative) | ||||||
|  |         return {"p-value": results} | ||||||
|  |  | ||||||
|  |     def fk_variance(*args, center = 'median', proportiontocut = 0.05): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.fligner(*args center = center, proportiontocut = proportiontocut) | ||||||
|  |         return {"h-value": results[0], "p-value": results[1]} # unknown if the statistic is an h value | ||||||
|  |  | ||||||
|  |     def mood_mediantest(*args, ties = 'below', correction = True, lambda_ = 1, nan_policy = 'propagate'): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.median_test(*args, ties = ties, correction = correction, lambda_ = lambda_, nan_policy = nan_policy)* | ||||||
|  |         return {"chisquared-value": results[0], "p-value": results[1], "m-value": results[2], "table": results[3]} | ||||||
|  |  | ||||||
|  |     def mood_equalscale(x, y, axis = 0): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.mood(x, y, axis = axis) | ||||||
|  |         return {"z-score": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def skewtest(a, axis = 0, nan_policy = 'propogate'): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.skewtest(a, axis = axis, nan_policy = nan_policy) | ||||||
|  |         return {"z-score": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def kurtosistest(a, axis = 0, nan_policy = 'propogate'): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.kurtosistest(a, axis = axis, nan_policy = nan_policy) | ||||||
|  |         return {"z-score": results[0], "p-value": results[1]} | ||||||
|  |  | ||||||
|  |     def normaltest(a, axis = 0, nan_policy = 'propogate'): | ||||||
|  |  | ||||||
|  |         results = scipy.stats.normaltest(a, axis = axis, nan_policy = nan_policy) | ||||||
|  |         return {"z-score": results[0], "p-value": results[1]} | ||||||
		Reference in New Issue
	
	Block a user