mirror of
				https://github.com/titanscouting/tra-analysis.git
				synced 2025-10-25 02:19:20 +00:00 
			
		
		
		
	tests: New unit tests for submoduling (#66)
* feat: created kivy gui boilerplate * migrated docker base image to debian Signed-off-by: ltcptgeneral <learthurgo@gmail.com> * migrated to ubuntu Signed-off-by: ltcptgeneral <learthurgo@gmail.com> * fixed issues Signed-off-by: ltcptgeneral <learthurgo@gmail.com> * fix: docker build? * fix: use ubuntu bionic * fix: get kivy installed * @ltcptgeneral can't spell * optim dockerfile for not installing unused packages * install basic stuff while building the container * use prebuilt image for development * install pylint on base image * rename and use new kivy * tests: added tests for Array and CorrelationTest Both are not working due to errors * fix: Array no longer has *args and CorrelationTest functions no longer have self in the arguments * use new thing * use 20.04 base * symlink pip3 to pip * use pip instead of pip3 * tra_analysis v 2.1.0-alpha.2 SVM v 1.0.1 added unvalidated SVM unit tests Signed-off-by: ltcptgeneral <learthurgo@gmail.com> * fixed version number Signed-off-by: ltcptgeneral <learthurgo@gmail.com> * tests: added tests for ClassificationMetric * partially fixed and commented out svm unit tests * fixed some SVM unit tests * added installing pytest to devcontainer.json * fix: small fixes to KNN Namely, removing self from parameters and passing correct arguments to KNeighborsClassifier constructor * fix, test: Added tests for KNN and NaiveBayes. Also made some small fixes in KNN, NaiveBayes, and RegressionMetric * test: finished unit tests except for StatisticalTest Also made various small fixes and style changes * StatisticalTest v 1.0.1 * fixed RegressionMetric unit test temporarily disabled CorrelationTest unit tests * tra_analysis v 2.1.0-alpha.3 * readded __all__ * fix: floating point issues in unit tests for CorrelationTest Co-authored-by: AGawde05 <agawde05@gmail.com> Co-authored-by: ltcptgeneral <learthurgo@gmail.com> Co-authored-by: Dev Singh <dev@devksingh.com> Co-authored-by: jzpan1 <panzhenyu2014@gmail.com>
This commit is contained in:
		| @@ -397,7 +397,7 @@ from .RandomForest_obj import RandomForest | ||||
| from .RegressionMetric import RegressionMetric | ||||
| from .Sort_obj import Sort | ||||
| from .StatisticalTest_obj import StatisticalTest | ||||
| from .SVM import SVM | ||||
| from . import SVM | ||||
|  | ||||
| class error(ValueError): | ||||
| 	pass | ||||
|   | ||||
| @@ -27,55 +27,37 @@ class Array(): # tests on nd arrays independent of basic_stats | ||||
|  | ||||
| 		return str(self.array) | ||||
| 	 | ||||
| 	def elementwise_mean(self, *args, axis = 0): # expects arrays that are size normalized | ||||
| 		if len(*args) == 0: | ||||
| 			return np.mean(self.array, axis = axis) | ||||
| 		else: | ||||
| 			return np.mean([*args], axis = axis) | ||||
| 	def elementwise_mean(self, axis = 0): # expects arrays that are size normalized | ||||
|  | ||||
| 	def elementwise_median(self, *args, axis = 0): | ||||
| 		return np.mean(self.array, axis = axis) | ||||
|  | ||||
| 		if len(*args) == 0: | ||||
| 			return np.median(self.array, axis = axis) | ||||
| 		else: | ||||
| 			return np.median([*args], axis = axis) | ||||
| 	def elementwise_median(self, axis = 0): | ||||
|  | ||||
| 	def elementwise_stdev(self, *args, axis = 0): | ||||
| 		return np.median(self.array, axis = axis) | ||||
|  | ||||
| 		if len(*args) == 0: | ||||
| 			return np.std(self.array, axis = axis) | ||||
| 		else: | ||||
| 			return np.std([*args], axis = axis) | ||||
| 	def elementwise_stdev(self, axis = 0): | ||||
|  | ||||
| 	def elementwise_variance(self, *args, axis = 0): | ||||
| 		return np.std(self.array, axis = axis) | ||||
|  | ||||
| 		if len(*args) == 0: | ||||
| 			return np.var(self.array, axis = axis) | ||||
| 		else: | ||||
| 			return np.var([*args], axis = axis) | ||||
| 	def elementwise_variance(self, axis = 0): | ||||
|  | ||||
| 	def elementwise_npmin(self, *args, axis = 0): | ||||
| 		return np.var(self.array, axis = axis) | ||||
|  | ||||
| 		if len(*args) == 0: | ||||
| 			return np.amin(self.array, axis = axis) | ||||
| 		else: | ||||
| 			return np.amin([*args], axis = axis) | ||||
| 	def elementwise_npmin(self, axis = 0): | ||||
| 		return np.amin(self.array, axis = axis) | ||||
|  | ||||
| 	def elementwise_npmax(self, *args, axis = 0): | ||||
|  | ||||
| 		if len(*args) == 0: | ||||
| 			return np.amax(self.array, axis = axis) | ||||
| 		else: | ||||
| 			return np.amax([*args], axis = axis) | ||||
| 	def elementwise_npmax(self, axis = 0): | ||||
| 		return np.amax(self.array, axis = axis) | ||||
|  | ||||
| 	def elementwise_stats(self, *args, axis = 0): | ||||
| 	def elementwise_stats(self, axis = 0): | ||||
|  | ||||
| 		_mean = self.elementwise_mean(*args, axis = axis) | ||||
| 		_median = self.elementwise_median(*args, axis = axis) | ||||
| 		_stdev = self.elementwise_stdev(*args, axis = axis) | ||||
| 		_variance = self.elementwise_variance(*args, axis = axis) | ||||
| 		_min = self.elementwise_npmin(*args, axis = axis) | ||||
| 		_max = self.elementwise_npmax(*args, axis = axis) | ||||
| 		_mean = self.elementwise_mean(axis = axis) | ||||
| 		_median = self.elementwise_median(axis = axis) | ||||
| 		_stdev = self.elementwise_stdev(axis = axis) | ||||
| 		_variance = self.elementwise_variance(axis = axis) | ||||
| 		_min = self.elementwise_npmin(axis = axis) | ||||
| 		_max = self.elementwise_npmax(axis = axis) | ||||
|  | ||||
| 		return _mean, _median, _stdev, _variance, _min, _max | ||||
|  | ||||
|   | ||||
| @@ -22,37 +22,37 @@ __all__ = [ | ||||
| import scipy | ||||
| from scipy import stats | ||||
|  | ||||
| def anova_oneway(self, *args): #expects arrays of samples | ||||
| def anova_oneway(*args): #expects arrays of samples | ||||
|  | ||||
| 	results = scipy.stats.f_oneway(*args) | ||||
| 	return {"f-value": results[0], "p-value": results[1]} | ||||
|  | ||||
| def pearson(self, x, y): | ||||
| def pearson(x, y): | ||||
|  | ||||
| 	results = scipy.stats.pearsonr(x, y) | ||||
| 	return {"r-value": results[0], "p-value": results[1]} | ||||
|  | ||||
| def spearman(self, a, b = None, axis = 0, nan_policy = 'propagate'): | ||||
| def spearman(a, b = None, axis = 0, nan_policy = 'propagate'): | ||||
|  | ||||
| 	results = scipy.stats.spearmanr(a, b = b, axis = axis, nan_policy = nan_policy) | ||||
| 	return {"r-value": results[0], "p-value": results[1]} | ||||
|  | ||||
| def point_biserial(self, x,y): | ||||
| def point_biserial(x, y): | ||||
|  | ||||
| 	results = scipy.stats.pointbiserialr(x, y) | ||||
| 	return {"r-value": results[0], "p-value": results[1]} | ||||
|  | ||||
| def kendall(self, x, y, initial_lexsort = None, nan_policy = 'propagate', method = 'auto'): | ||||
| def kendall(x, y, initial_lexsort = None, nan_policy = 'propagate', method = 'auto'): | ||||
|  | ||||
| 	results = scipy.stats.kendalltau(x, y, initial_lexsort = initial_lexsort, nan_policy = nan_policy, method = method) | ||||
| 	return {"tau": results[0], "p-value": results[1]} | ||||
|  | ||||
| def kendall_weighted(self, x, y, rank = True, weigher = None, additive = True): | ||||
| def kendall_weighted(x, y, rank = True, weigher = None, additive = True): | ||||
|  | ||||
| 	results = scipy.stats.weightedtau(x, y, rank = rank, weigher = weigher, additive = additive) | ||||
| 	return {"tau": results[0], "p-value": results[1]} | ||||
|  | ||||
| def mgc(self, x, y, compute_distance = None, reps = 1000, workers = 1, is_twosamp = False, random_state = None): | ||||
| def mgc(x, y, compute_distance = None, reps = 1000, workers = 1, is_twosamp = False, random_state = None): | ||||
|  | ||||
| 	results = scipy.stats.multiscale_graphcorr(x, y, compute_distance = compute_distance, reps = reps, workers = workers, is_twosamp = is_twosamp, random_state = random_state) | ||||
| 	return {"k-value": results[0], "p-value": results[1], "data": results[2]} # unsure if MGC test returns a k value | ||||
| @@ -14,29 +14,32 @@ __changelog__ = """changelog: | ||||
|  | ||||
| __author__ = ( | ||||
| 	"Arthur Lu <learthurgo@gmail.com>", | ||||
| 	"James Pan <zpan@imsa.edu>" | ||||
| ) | ||||
|  | ||||
| __all__ = [ | ||||
| 	'knn_classifier', | ||||
| 	'knn_regressor' | ||||
| ] | ||||
|  | ||||
| import sklearn | ||||
| from sklearn import model_selection, neighbors | ||||
| from . import ClassificationMetric, RegressionMetric | ||||
|  | ||||
| def knn_classifier(self, data, labels, n_neighbors, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling | ||||
| def knn_classifier(data, labels, n_neighbors = 5, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling | ||||
|  | ||||
| 	data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) | ||||
| 	model = sklearn.neighbors.KNeighborsClassifier() | ||||
| 	model = sklearn.neighbors.KNeighborsClassifier(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs) | ||||
| 	model.fit(data_train, labels_train) | ||||
| 	predictions = model.predict(data_test) | ||||
|  | ||||
| 	return model, ClassificationMetric(predictions, labels_test) | ||||
|  | ||||
| def knn_regressor(self, data, outputs, n_neighbors, test_size = 0.3, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None): | ||||
| def knn_regressor(data, outputs, n_neighbors = 5, test_size = 0.3, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None): | ||||
|  | ||||
| 	data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1) | ||||
| 	model = sklearn.neighbors.KNeighborsRegressor(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs) | ||||
| 	model.fit(data_train, outputs_train) | ||||
| 	predictions = model.predict(data_test) | ||||
|  | ||||
| 	return model, RegressionMetric(predictions, outputs_test) | ||||
| 	return model, RegressionMetric.RegressionMetric(predictions, outputs_test) | ||||
| @@ -16,13 +16,17 @@ __author__ = ( | ||||
| ) | ||||
|  | ||||
| __all__ = [ | ||||
| 	'gaussian', | ||||
| 	'multinomial' | ||||
| 	'bernoulli', | ||||
| 	'complement' | ||||
| ] | ||||
|  | ||||
| import sklearn | ||||
| from sklearn import model_selection, naive_bayes | ||||
| from . import ClassificationMetric, RegressionMetric | ||||
|  | ||||
| def guassian(data, labels, test_size = 0.3, priors = None, var_smoothing = 1e-09): | ||||
| def gaussian(data, labels, test_size = 0.3, priors = None, var_smoothing = 1e-09): | ||||
|  | ||||
| 	data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) | ||||
| 	model = sklearn.naive_bayes.GaussianNB(priors = priors, var_smoothing = var_smoothing) | ||||
|   | ||||
| @@ -39,4 +39,4 @@ def random_forest_regressor(data, outputs, test_size, n_estimators, criterion="m | ||||
| 	kernel.fit(data_train, outputs_train) | ||||
| 	predictions = kernel.predict(data_test) | ||||
|  | ||||
| 	return kernel, RegressionMetric(predictions, outputs_test) | ||||
| 	return kernel, RegressionMetric.RegressionMetric(predictions, outputs_test) | ||||
| @@ -16,8 +16,10 @@ __author__ = ( | ||||
| ) | ||||
|  | ||||
| __all__ = [ | ||||
| 	'RegressionMetric' | ||||
| ] | ||||
|  | ||||
| import numpy as np | ||||
| import sklearn | ||||
| from sklearn import metrics | ||||
|  | ||||
| @@ -37,4 +39,4 @@ class RegressionMetric(): | ||||
|  | ||||
| 	def rms(self, predictions, targets): | ||||
|  | ||||
| 		return math.sqrt(sklearn.metrics.mean_squared_error(targets, predictions)) | ||||
| 		return np.sqrt(sklearn.metrics.mean_squared_error(targets, predictions)) | ||||
| @@ -4,9 +4,12 @@ | ||||
| #    this should be imported as a python module using 'from tra_analysis import SVM' | ||||
| # setup: | ||||
|  | ||||
| __version__ = "1.0.0" | ||||
| __version__ = "1.0.1" | ||||
|  | ||||
| __changelog__ = """changelog: | ||||
| 	1.0.1: | ||||
| 		- removed unessasary self calls | ||||
| 		- removed classness | ||||
| 	1.0.0: | ||||
| 		- ported analysis.SVM() here | ||||
| """ | ||||
| @@ -22,58 +25,56 @@ import sklearn | ||||
| from sklearn import svm | ||||
| from . import ClassificationMetric, RegressionMetric | ||||
|  | ||||
| class SVM: | ||||
| class CustomKernel: | ||||
|  | ||||
| 	class CustomKernel: | ||||
| 	def __new__(cls, C, kernel, degre, gamma, coef0, shrinking, probability, tol, cache_size, class_weight, verbose, max_iter, decision_function_shape, random_state): | ||||
|  | ||||
| 		def __new__(cls, C, kernel, degre, gamma, coef0, shrinking, probability, tol, cache_size, class_weight, verbose, max_iter, decision_function_shape, random_state): | ||||
| 		return sklearn.svm.SVC(C = C, kernel = kernel, gamma = gamma, coef0 = coef0, shrinking = shrinking, probability = probability, tol = tol, cache_size = cache_size, class_weight = class_weight, verbose = verbose, max_iter = max_iter, decision_function_shape = decision_function_shape, random_state = random_state) | ||||
|  | ||||
| 			return sklearn.svm.SVC(C = C, kernel = kernel, gamma = gamma, coef0 = coef0, shrinking = shrinking, probability = probability, tol = tol, cache_size = cache_size, class_weight = class_weight, verbose = verbose, max_iter = max_iter, decision_function_shape = decision_function_shape, random_state = random_state) | ||||
| class StandardKernel: | ||||
|  | ||||
| 	class StandardKernel: | ||||
| 	def __new__(cls, kernel, C=1.0, degree=3, gamma='auto_deprecated', coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, decision_function_shape='ovr', random_state=None): | ||||
|  | ||||
| 		def __new__(cls, kernel, C=1.0, degree=3, gamma='auto_deprecated', coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, decision_function_shape='ovr', random_state=None): | ||||
| 		return sklearn.svm.SVC(C = C, kernel = kernel, gamma = gamma, coef0 = coef0, shrinking = shrinking, probability = probability, tol = tol, cache_size = cache_size, class_weight = class_weight, verbose = verbose, max_iter = max_iter, decision_function_shape = decision_function_shape, random_state = random_state) | ||||
|  | ||||
| 			return sklearn.svm.SVC(C = C, kernel = kernel, gamma = gamma, coef0 = coef0, shrinking = shrinking, probability = probability, tol = tol, cache_size = cache_size, class_weight = class_weight, verbose = verbose, max_iter = max_iter, decision_function_shape = decision_function_shape, random_state = random_state) | ||||
| class PrebuiltKernel: | ||||
|  | ||||
| 	class PrebuiltKernel: | ||||
| 	class Linear: | ||||
|  | ||||
| 		class Linear: | ||||
| 		def __new__(cls): | ||||
|  | ||||
| 			def __new__(cls): | ||||
| 			return sklearn.svm.SVC(kernel = 'linear') | ||||
|  | ||||
| 				return sklearn.svm.SVC(kernel = 'linear') | ||||
| 	class Polynomial: | ||||
|  | ||||
| 		class Polynomial: | ||||
| 		def __new__(cls, power, r_bias): | ||||
|  | ||||
| 			def __new__(cls, power, r_bias): | ||||
| 			return sklearn.svm.SVC(kernel = 'polynomial', degree = power, coef0 = r_bias) | ||||
|  | ||||
| 				return sklearn.svm.SVC(kernel = 'polynomial', degree = power, coef0 = r_bias) | ||||
| 	class RBF: | ||||
|  | ||||
| 		class RBF: | ||||
| 		def __new__(cls, gamma): | ||||
|  | ||||
| 			def __new__(cls, gamma): | ||||
| 			return sklearn.svm.SVC(kernel = 'rbf', gamma = gamma) | ||||
|  | ||||
| 				return sklearn.svm.SVC(kernel = 'rbf', gamma = gamma) | ||||
| 	class Sigmoid: | ||||
|  | ||||
| 		class Sigmoid: | ||||
| 		def __new__(cls, r_bias): | ||||
|  | ||||
| 			def __new__(cls, r_bias): | ||||
| 			return sklearn.svm.SVC(kernel = 'sigmoid', coef0 = r_bias) | ||||
|  | ||||
| 				return sklearn.svm.SVC(kernel = 'sigmoid', coef0 = r_bias) | ||||
| def fit(kernel, train_data, train_outputs): # expects *2d data, 1d labels or outputs | ||||
|  | ||||
| 	def fit(self, kernel, train_data, train_outputs): # expects *2d data, 1d labels or outputs | ||||
| 	return kernel.fit(train_data, train_outputs) | ||||
|  | ||||
| 		return kernel.fit(train_data, train_outputs) | ||||
| def eval_classification(kernel, test_data, test_outputs): | ||||
|  | ||||
| 	def eval_classification(self, kernel, test_data, test_outputs): | ||||
| 	predictions = kernel.predict(test_data) | ||||
|  | ||||
| 		predictions = kernel.predict(test_data) | ||||
| 	return ClassificationMetric(predictions, test_outputs) | ||||
|  | ||||
| 		return ClassificationMetric(predictions, test_outputs) | ||||
| def eval_regression(kernel, test_data, test_outputs): | ||||
|  | ||||
| 	def eval_regression(self, kernel, test_data, test_outputs): | ||||
| 	predictions = kernel.predict(test_data) | ||||
|  | ||||
| 		predictions = kernel.predict(test_data) | ||||
|  | ||||
| 		return RegressionMetric(predictions, test_outputs) | ||||
| 	return RegressionMetric(predictions, test_outputs) | ||||
| @@ -4,9 +4,11 @@ | ||||
| #    this should be imported as a python module using 'from tra_analysis import StatisticalTest' | ||||
| # setup: | ||||
|  | ||||
| __version__ = "1.0.0" | ||||
| __version__ = "1.0.1" | ||||
|  | ||||
| __changelog__ = """changelog: | ||||
| 	1.0.1: | ||||
| 		- fixed typo in __all__ | ||||
| 	1.0.0: | ||||
| 		- ported analysis.StatisticalTest() here | ||||
| 		- removed classness | ||||
| @@ -17,6 +19,39 @@ __author__ = ( | ||||
| ) | ||||
|  | ||||
| __all__ = [ | ||||
| 	'ttest_onesample', | ||||
| 	'ttest_independent', | ||||
| 	'ttest_statistic', | ||||
| 	'ttest_related', | ||||
| 	'ks_fitness', | ||||
| 	'chisquare', | ||||
| 	'powerdivergence' | ||||
| 	'ks_twosample', | ||||
| 	'es_twosample', | ||||
| 	'mw_rank', | ||||
| 	'mw_tiecorrection', | ||||
| 	'rankdata', | ||||
| 	'wilcoxon_ranksum', | ||||
| 	'wilcoxon_signedrank', | ||||
| 	'kw_htest', | ||||
| 	'friedman_chisquare', | ||||
| 	'bm_wtest', | ||||
| 	'combine_pvalues', | ||||
| 	'jb_fitness', | ||||
| 	'ab_equality', | ||||
| 	'bartlett_variance', | ||||
| 	'levene_variance', | ||||
| 	'sw_normality', | ||||
| 	'shapiro', | ||||
| 	'ad_onesample', | ||||
| 	'ad_ksample', | ||||
| 	'binomial', | ||||
| 	'fk_variance', | ||||
| 	'mood_mediantest', | ||||
| 	'mood_equalscale', | ||||
| 	'skewtest', | ||||
| 	'kurtosistest', | ||||
| 	'normaltest' | ||||
| ] | ||||
|  | ||||
| import scipy | ||||
|   | ||||
| @@ -7,20 +7,24 @@ | ||||
| #    current benchmark of optimization: 1.33 times faster | ||||
| # setup: | ||||
|  | ||||
| __version__ = "2.1.0-alpha.1" | ||||
| __version__ = "2.1.0-alpha.3" | ||||
|  | ||||
| # changelog should be viewed using print(analysis.__changelog__) | ||||
| __changelog__ = """changelog: | ||||
|     2.1.0-alpha.1: | ||||
|         - moved multiple submodules under analysis to their own modules/files | ||||
|         - added header, __version__, __changelog__, __author__, __all__ (unpopulated) | ||||
| 	2.1.0-alpha.3: | ||||
| 		- fixed indentation in meta data | ||||
| 	2.1.0-alpha.2: | ||||
| 		- updated SVM import | ||||
| 	2.1.0-alpha.1: | ||||
| 		- moved multiple submodules under analysis to their own modules/files | ||||
| 		- added header, __version__, __changelog__, __author__, __all__ (unpopulated) | ||||
| """ | ||||
|  | ||||
| __author__ = ( | ||||
| 	"Arthur Lu <learthurgo@gmail.com>", | ||||
|     "Jacob Levine <jlevine@imsa.edu>", | ||||
|     "Dev Singh <dev@devksingh.com>", | ||||
|     "James Pan <zpan@imsa.edu>" | ||||
| 	"Jacob Levine <jlevine@imsa.edu>", | ||||
| 	"Dev Singh <dev@devksingh.com>", | ||||
| 	"James Pan <zpan@imsa.edu>" | ||||
| ) | ||||
|  | ||||
| __all__ = [ | ||||
| @@ -37,4 +41,4 @@ from . import RandomForest | ||||
| from .RegressionMetric import RegressionMetric | ||||
| from . import Sort | ||||
| from . import StatisticalTest | ||||
| from .SVM import SVM | ||||
| from . import SVM | ||||
		Reference in New Issue
	
	Block a user