mirror of
				https://github.com/titanscouting/tra-analysis.git
				synced 2025-10-24 18:09:20 +00:00 
			
		
		
		
	analysis pkg v 1.0.0.12
analysis.py v 1.2.0.004
This commit is contained in:
		| @@ -7,10 +7,17 @@ | ||||
| #    current benchmark of optimization: 1.33 times faster | ||||
| # setup: | ||||
|  | ||||
| __version__ = "1.2.0.003" | ||||
| __version__ = "1.2.0.004" | ||||
|  | ||||
| # changelog should be viewed using print(analysis.__changelog__) | ||||
| __changelog__ = """changelog: | ||||
|     1.2.0.004: | ||||
|         - fixed __all__ to reflected the correct functions and classes | ||||
|         - fixed CorrelationTests and StatisticalTests class functions to require self invocation | ||||
|         - added missing math import | ||||
|         - fixed KNN class functions to require self invocation | ||||
|         - fixed Metrics class functions to require self invocation | ||||
|         - various spelling fixes in CorrelationTests and StatisticalTests | ||||
|     1.2.0.003: | ||||
|         - bug fixes with CorrelationTests and StatisticalTests | ||||
|         - moved glicko2 and trueskill to the metrics subpackage | ||||
| @@ -275,22 +282,19 @@ __all__ = [ | ||||
|     'z_normalize', | ||||
|     'histo_analysis', | ||||
|     'regression', | ||||
|     'elo', | ||||
|     'glicko2', | ||||
|     'trueskill', | ||||
|     'Metrics', | ||||
|     'RegressionMetrics', | ||||
|     'ClassificationMetrics', | ||||
|     'kmeans', | ||||
|     'pca', | ||||
|     'decisiontree', | ||||
|     'knn_classifier', | ||||
|     'knn_regressor', | ||||
|     'KNN', | ||||
|     'NaiveBayes', | ||||
|     'SVM', | ||||
|     'random_forest_classifier', | ||||
|     'random_forest_regressor', | ||||
|     'CorrelationTests', | ||||
|     'RegressionTests', | ||||
|     'StatisticalTests', | ||||
|     # all statistics functions left out due to integration in other functions | ||||
| ] | ||||
|  | ||||
| @@ -301,6 +305,7 @@ __all__ = [ | ||||
| import csv | ||||
| from analysis.metrics import elo as Elo | ||||
| from analysis.metrics import glicko2 as Glicko2 | ||||
| import math | ||||
| import numba | ||||
| from numba import jit | ||||
| import numpy as np | ||||
| @@ -467,11 +472,11 @@ def regression(inputs, outputs, args): # inputs, outputs expects N-D array | ||||
|  | ||||
| class Metrics: | ||||
|  | ||||
|     def elo(starting_score, opposing_score, observed, N, K): | ||||
|     def elo(self, starting_score, opposing_score, observed, N, K): | ||||
|  | ||||
|         return Elo.calculate(starting_score, opposing_score, observed, N, K) | ||||
|  | ||||
|     def glicko2(starting_score, starting_rd, starting_vol, opposing_score, opposing_rd, observations): | ||||
|     def glicko2(self, starting_score, starting_rd, starting_vol, opposing_score, opposing_rd, observations): | ||||
|  | ||||
|         player = Glicko2.Glicko2(rating = starting_score, rd = starting_rd, vol = starting_vol) | ||||
|  | ||||
| @@ -479,7 +484,7 @@ class Metrics: | ||||
|  | ||||
|         return (player.rating, player.rd, player.vol) | ||||
|  | ||||
|     def trueskill(teams_data, observations): # teams_data is array of array of tuples ie. [[(mu, sigma), (mu, sigma), (mu, sigma)], [(mu, sigma), (mu, sigma), (mu, sigma)]] | ||||
|     def trueskill(self, teams_data, observations): # teams_data is array of array of tuples ie. [[(mu, sigma), (mu, sigma), (mu, sigma)], [(mu, sigma), (mu, sigma), (mu, sigma)]] | ||||
|  | ||||
|         team_ratings = [] | ||||
|  | ||||
| @@ -584,7 +589,7 @@ def decisiontree(data, labels, test_size = 0.3, criterion = "gini", splitter = " | ||||
|  | ||||
| class KNN: | ||||
|  | ||||
|     def knn_classifier(data, labels, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, n_neighbors=5, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling | ||||
|     def knn_classifier(self, data, labels, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, n_neighbors=5, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling | ||||
|  | ||||
|         data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) | ||||
|         model = sklearn.neighbors.KNeighborsClassifier() | ||||
| @@ -593,7 +598,7 @@ class KNN: | ||||
|  | ||||
|         return model, ClassificationMetrics(predictions, labels_test) | ||||
|  | ||||
|     def knn_regressor(data, outputs, test_size, n_neighbors = 5, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None): | ||||
|     def knn_regressor(self, data, outputs, test_size, n_neighbors = 5, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None): | ||||
|  | ||||
|         data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1) | ||||
|         model = sklearn.neighbors.KNeighborsRegressor(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs) | ||||
| @@ -716,203 +721,203 @@ def random_forest_regressor(data, outputs, test_size, n_estimators="warn", crite | ||||
|  | ||||
| class CorrelationTests: | ||||
|  | ||||
|     def anova_oneway(*args): #expects arrays of samples | ||||
|     def anova_oneway(self, *args): #expects arrays of samples | ||||
|  | ||||
|         results = scipy.stats.f_oneway(*args) | ||||
|         return {"F-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def pearson(x, y): | ||||
|     def pearson(self, x, y): | ||||
|  | ||||
|         results = scipy.stats.pearsonr(x, y) | ||||
|         return {"r-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def spearman(a, b = None, axis = 0, nan_policy = 'propagate'): | ||||
|     def spearman(self, a, b = None, axis = 0, nan_policy = 'propagate'): | ||||
|  | ||||
|         results = scipy.stats.spearmanr(a, b = b, axis = axis, nan_policy = nan_policy) | ||||
|         return {"r-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def point_biserial(x,y): | ||||
|     def point_biserial(self, x,y): | ||||
|  | ||||
|         results = scipy.stats.pointbiserialr(x, y) | ||||
|         return {"r-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def kendall(x, y, initial_lexsort = None, nan_policy = 'propagate', method = 'auto'): | ||||
|     def kendall(self, x, y, initial_lexsort = None, nan_policy = 'propagate', method = 'auto'): | ||||
|  | ||||
|         results = scipy.stats.kendalltau(x, y, initial_lexsort = initial_lexsort, nan_policy = nan_policy, method = method) | ||||
|         return {"tau": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def kendall_weighted(x, y, rank = True, weigher = None, additive = True): | ||||
|     def kendall_weighted(self, x, y, rank = True, weigher = None, additive = True): | ||||
|  | ||||
|         results = scipy.stats.weightedtau(x, y, rank = rank, weigher = weigher, additive = additive) | ||||
|         return {"tau": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def mgc(x, y, compute_distance = None, reps = 1000, workers = 1, is_twosamp = False, random_state = None): | ||||
|     def mgc(self, x, y, compute_distance = None, reps = 1000, workers = 1, is_twosamp = False, random_state = None): | ||||
|  | ||||
|         results = scipy.stats.multiscale_graphcorr(x, y, compute_distance = compute_distance, reps = reps, workers = workers, is_twosamp = is_twosamp, random_state = random_state) | ||||
|         return {"k-value": results[0], "p-value": results[1], "data": results[2]} # unsure if MGC test returns a k value | ||||
|  | ||||
| class StatisticalTests: | ||||
|  | ||||
|     def ttest_onesample(a, popmean, axis = 0, nan_policy = 'propagate'): | ||||
|     def ttest_onesample(self, a, popmean, axis = 0, nan_policy = 'propagate'): | ||||
|  | ||||
|         results = scipy.stats.ttest_1samp(a, popmean, axis = axis, nan_policy = nan_policy) | ||||
|         return {"t-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def ttest_independent(a, b, equal = True, nan_policy = 'propagate'): | ||||
|     def ttest_independent(self, a, b, equal = True, nan_policy = 'propagate'): | ||||
|  | ||||
|         results = scipt.stats.ttest_ind(a, b, equal_var = equal, nan_policy = nan_policy) | ||||
|         results = scipy.stats.ttest_ind(a, b, equal_var = equal, nan_policy = nan_policy) | ||||
|         return {"t-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def ttest_statistic(o1, o2, equal = True): | ||||
|     def ttest_statistic(self, o1, o2, equal = True): | ||||
|  | ||||
|         results = scipy.stats.ttest_ind_from_stats(o1["mean"], o1["std"], o1["nobs"], o2["mean"], o2["std"], o2["nobs"], equal_var = equal) | ||||
|         return {"t-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def ttest_related(a, b, axis = 0, nan_policy='propagate'): | ||||
|     def ttest_related(self, a, b, axis = 0, nan_policy='propagate'): | ||||
|  | ||||
|         results = scipy.stats.ttest_rel(a, b, axis = axis, nan_policy = nan_policy) | ||||
|         return {"t-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def ks_fitness(rvs, cdf, args = (), N = 20, alternative = 'two-sided', mode = 'approx'): | ||||
|     def ks_fitness(self, rvs, cdf, args = (), N = 20, alternative = 'two-sided', mode = 'approx'): | ||||
|  | ||||
|         results = scipy.stats.kstest(rvs, cdf, args = args, N = N, alternative = alternative, mode = mode) | ||||
|         return {"ks-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def chisquare(f_obs, f_exp = None, ddof = None, axis = 0): | ||||
|     def chisquare(self, f_obs, f_exp = None, ddof = None, axis = 0): | ||||
|  | ||||
|         results = scipy.stats.chisquare(f_obs, f_exp = f_exp, ddof = ddof, axis = axis) | ||||
|         return {"chisquared-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def powerdivergence(f_obs, f_exp = None, ddof = None, axis = 0, lambda_ = None): | ||||
|     def powerdivergence(self, f_obs, f_exp = None, ddof = None, axis = 0, lambda_ = None): | ||||
|  | ||||
|         results = scipy.stats.power_divergence(f_obs, f_exp = f_exp, ddof = ddof, axis = axis, lambda_ = lambda_) | ||||
|         return {"powerdivergence-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def ks_twosample(x, y, alternative = 'two_sided', mode = 'auto'): | ||||
|     def ks_twosample(self, x, y, alternative = 'two_sided', mode = 'auto'): | ||||
|          | ||||
|         results = scipy.stats.ks_2samp(x, y, alternative = alternative, mode = mode) | ||||
|         return {"ks-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def es_twosample(x, y, t = (0.4, 0.8)): | ||||
|     def es_twosample(self, x, y, t = (0.4, 0.8)): | ||||
|  | ||||
|         results = scipy.stats.epps_singleton_2samp(x, y, t = t) | ||||
|         return {"es-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def mw_rank(x, y, use_continuity = True, alternative = None): | ||||
|     def mw_rank(self, x, y, use_continuity = True, alternative = None): | ||||
|  | ||||
|         results = scipy.stats.mannwhitneyu(x, y, use_continuity = use_continuity, alternative = alternative) | ||||
|         return {"u-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def mw_tiecorrection(rank_values): | ||||
|     def mw_tiecorrection(self, rank_values): | ||||
|  | ||||
|         results = scipy.stats.tiecorrect(rank_values) | ||||
|         return {"correction-factor": results} | ||||
|  | ||||
|     def rankdata(a, method = 'average'): | ||||
|     def rankdata(self, a, method = 'average'): | ||||
|  | ||||
|         results = scipy.stats.rankdata(a, method = method) | ||||
|         return results | ||||
|  | ||||
|     def wilcoxon_ranksum(a, b): # this seems to be superceded by Mann Whitney Wilcoxon U Test | ||||
|     def wilcoxon_ranksum(self, a, b): # this seems to be superceded by Mann Whitney Wilcoxon U Test | ||||
|  | ||||
|         results = scipy.stats.ranksums(a, b) | ||||
|         return {"u-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def wilcoxon_signedrank(x, y = None, method = 'wilcox', correction = False, alternative = 'two-sided'): | ||||
|     def wilcoxon_signedrank(self, x, y = None, zero_method = 'wilcox', correction = False, alternative = 'two-sided'): | ||||
|  | ||||
|         results = scipy.stats.wilcoxon(x, y = y, method = method, correction = correction, alternative = alternative) | ||||
|         results = scipy.stats.wilcoxon(x, y = y, zero_method = zero_method, correction = correction, alternative = alternative) | ||||
|         return {"t-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def kw_htest(*args, nan_policy = 'propagate'): | ||||
|     def kw_htest(self, *args, nan_policy = 'propagate'): | ||||
|  | ||||
|         results = scipy.stats.kruskal(*args, nan_policy = nan_policy) | ||||
|         return {"h-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def friedman_chisquare(*args): | ||||
|     def friedman_chisquare(self, *args): | ||||
|  | ||||
|         results = scipy.stats.friedmanchisquare(*args) | ||||
|         return {"chisquared-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def bm_wtest(x, y, alternative = 'two-sided', distribution = 't', nan_policy = 'propagate'): | ||||
|     def bm_wtest(self, x, y, alternative = 'two-sided', distribution = 't', nan_policy = 'propagate'): | ||||
|  | ||||
|         results = scipy.stats.brunnermunzel(x, y, alternative = alternative, distribution = distribution, nan_policy = nan_policy) | ||||
|         return {"w-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def combine_pvalues(pvalues, method = 'fisher', weights = None): | ||||
|     def combine_pvalues(self, pvalues, method = 'fisher', weights = None): | ||||
|  | ||||
|         results = scipy.stats.combine_pvalues(pvalues, method = method, weights = weights) | ||||
|         return {"combined-statistic": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def jb_fitness(x): | ||||
|     def jb_fitness(self, x): | ||||
|  | ||||
|         results = scipy.stats.jarque_bera(x) | ||||
|         return {"jb-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def ab_equality(x, y): | ||||
|     def ab_equality(self, x, y): | ||||
|  | ||||
|         results = scipy.stats.ansari(x, y) | ||||
|         return {"ab-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def bartlett_variance(*args): | ||||
|     def bartlett_variance(self, *args): | ||||
|  | ||||
|         results = scipy.stats.bartlett(*args) | ||||
|         return {"t-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def levene_variance(*args, center = 'median', proportiontocut = 0.05): | ||||
|     def levene_variance(self, *args, center = 'median', proportiontocut = 0.05): | ||||
|  | ||||
|         results = scipy.stats.levene(*args, center = center, proportiontocut = proportiontocut) | ||||
|         return {"w-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def sw_normality(x): | ||||
|     def sw_normality(self, x): | ||||
|  | ||||
|         results = scipy.stats.shapiro(x) | ||||
|         return {"w-value": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def shapiro(x): | ||||
|     def shapiro(self, x): | ||||
|  | ||||
|         return "destroyed by facts and logic" | ||||
|  | ||||
|     def ad_onesample(x, dist = 'norm'): | ||||
|     def ad_onesample(self, x, dist = 'norm'): | ||||
|  | ||||
|         results = scipy.stats.anderson(x, dist = dist) | ||||
|         return {"d-value": results[0], "critical-values": results[1], "significance-value": results[2]} | ||||
|      | ||||
|     def ad_ksample(samples, midrank = True): | ||||
|     def ad_ksample(self, samples, midrank = True): | ||||
|  | ||||
|         results = scipy.stats.anderson_ksamp(samples, midrank = midrank) | ||||
|         return {"d-value": results[0], "critical-values": results[1], "significance-value": results[2]} | ||||
|  | ||||
|     def binomial(x, n = None, p = 0.5, alternative = 'two-sided'): | ||||
|     def binomial(self, x, n = None, p = 0.5, alternative = 'two-sided'): | ||||
|  | ||||
|         results = scipy.stats.binom_test(x, n = n, p = p, alternative = alternative) | ||||
|         return {"p-value": results} | ||||
|  | ||||
|     def fk_variance(*args, center = 'median', proportiontocut = 0.05): | ||||
|     def fk_variance(self, *args, center = 'median', proportiontocut = 0.05): | ||||
|  | ||||
|         results = scipy.stats.fligner(*args, center = center, proportiontocut = proportiontocut) | ||||
|         return {"h-value": results[0], "p-value": results[1]} # unknown if the statistic is an h value | ||||
|  | ||||
|     def mood_mediantest(*args, ties = 'below', correction = True, lambda_ = 1, nan_policy = 'propagate'): | ||||
|     def mood_mediantest(self, *args, ties = 'below', correction = True, lambda_ = 1, nan_policy = 'propagate'): | ||||
|  | ||||
|         results = scipy.stats.median_test(*args, ties = ties, correction = correction, lambda_ = lambda_, nan_policy = nan_policy) | ||||
|         return {"chisquared-value": results[0], "p-value": results[1], "m-value": results[2], "table": results[3]} | ||||
|  | ||||
|     def mood_equalscale(x, y, axis = 0): | ||||
|     def mood_equalscale(self, x, y, axis = 0): | ||||
|  | ||||
|         results = scipy.stats.mood(x, y, axis = axis) | ||||
|         return {"z-score": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def skewtest(a, axis = 0, nan_policy = 'propogate'): | ||||
|     def skewtest(self, a, axis = 0, nan_policy = 'propogate'): | ||||
|  | ||||
|         results = scipy.stats.skewtest(a, axis = axis, nan_policy = nan_policy) | ||||
|         return {"z-score": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def kurtosistest(a, axis = 0, nan_policy = 'propogate'): | ||||
|     def kurtosistest(self, a, axis = 0, nan_policy = 'propogate'): | ||||
|  | ||||
|         results = scipy.stats.kurtosistest(a, axis = axis, nan_policy = nan_policy) | ||||
|         return {"z-score": results[0], "p-value": results[1]} | ||||
|  | ||||
|     def normaltest(a, axis = 0, nan_policy = 'propogate'): | ||||
|     def normaltest(self, a, axis = 0, nan_policy = 'propogate'): | ||||
|  | ||||
|         results = scipy.stats.normaltest(a, axis = axis, nan_policy = nan_policy) | ||||
|         return {"z-score": results[0], "p-value": results[1]} | ||||
		Reference in New Issue
	
	Block a user