From f72d8457a74bfce7167b2bae679f7e512f29a92f Mon Sep 17 00:00:00 2001 From: zpan1 <72054510+zpan1@users.noreply.github.com> Date: Tue, 26 Jan 2021 21:46:29 -0600 Subject: [PATCH] tests: New unit tests for submoduling (#66) * feat: created kivy gui boilerplate * migrated docker base image to debian Signed-off-by: ltcptgeneral * migrated to ubuntu Signed-off-by: ltcptgeneral * fixed issues Signed-off-by: ltcptgeneral * fix: docker build? * fix: use ubuntu bionic * fix: get kivy installed * @ltcptgeneral can't spell * optim dockerfile for not installing unused packages * install basic stuff while building the container * use prebuilt image for development * install pylint on base image * rename and use new kivy * tests: added tests for Array and CorrelationTest Both are not working due to errors * fix: Array no longer has *args and CorrelationTest functions no longer have self in the arguments * use new thing * use 20.04 base * symlink pip3 to pip * use pip instead of pip3 * tra_analysis v 2.1.0-alpha.2 SVM v 1.0.1 added unvalidated SVM unit tests Signed-off-by: ltcptgeneral * fixed version number Signed-off-by: ltcptgeneral * tests: added tests for ClassificationMetric * partially fixed and commented out svm unit tests * fixed some SVM unit tests * added installing pytest to devcontainer.json * fix: small fixes to KNN Namely, removing self from parameters and passing correct arguments to KNeighborsClassifier constructor * fix, test: Added tests for KNN and NaiveBayes. Also made some small fixes in KNN, NaiveBayes, and RegressionMetric * test: finished unit tests except for StatisticalTest Also made various small fixes and style changes * StatisticalTest v 1.0.1 * fixed RegressionMetric unit test temporarily disabled CorrelationTest unit tests * tra_analysis v 2.1.0-alpha.3 * readded __all__ * fix: floating point issues in unit tests for CorrelationTest Co-authored-by: AGawde05 Co-authored-by: ltcptgeneral Co-authored-by: Dev Singh Co-authored-by: jzpan1 --- .devcontainer/Dockerfile | 9 +- .devcontainer/dev-dockerfile | 2 + .devcontainer/devcontainer.json | 4 +- analysis-master/test_analysis.py | 103 +++++++++++++++++- analysis-master/tra_analysis/Analysis.py | 2 +- analysis-master/tra_analysis/Array.py | 56 ++++------ .../tra_analysis/CorrelationTest.py | 14 +-- analysis-master/tra_analysis/KNN.py | 11 +- analysis-master/tra_analysis/NaiveBayes.py | 6 +- analysis-master/tra_analysis/RandomForest.py | 2 +- .../tra_analysis/RegressionMetric.py | 4 +- analysis-master/tra_analysis/SVM.py | 61 ++++++----- .../tra_analysis/StatisticalTest.py | 37 ++++++- analysis-master/tra_analysis/__init__.py | 20 ++-- data-analysis/design.kv | 46 ++++++++ data-analysis/main.py | 41 +++++++ 16 files changed, 322 insertions(+), 96 deletions(-) create mode 100644 .devcontainer/dev-dockerfile create mode 100644 data-analysis/design.kv create mode 100644 data-analysis/main.py diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 4cb82cc3..160a9b91 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,2 +1,7 @@ -FROM python:3.8 -WORKDIR ~/ \ No newline at end of file +FROM ubuntu:20.04 +WORKDIR / +RUN apt-get -y update +RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata +RUN apt-get install -y python3 python3-dev git python3-pip python3-kivy python-is-python3 libgl1-mesa-dev build-essential +RUN ln -s $(which pip3) /usr/bin/pip +RUN pip install pymongo pandas numpy scipy scikit-learn matplotlib pylint kivy \ No newline at end of file diff --git a/.devcontainer/dev-dockerfile b/.devcontainer/dev-dockerfile new file mode 100644 index 00000000..74659d49 --- /dev/null +++ b/.devcontainer/dev-dockerfile @@ -0,0 +1,2 @@ +FROM titanscout2022/tra-analysis-base:latest +WORKDIR / \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 3020c639..e670608d 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,7 +1,7 @@ { "name": "TRA Analysis Development Environment", "build": { - "dockerfile": "Dockerfile", + "dockerfile": "dev-dockerfile", }, "settings": { "terminal.integrated.shell.linux": "/bin/bash", @@ -24,5 +24,5 @@ "ms-python.python", "waderyan.gitblame" ], - "postCreateCommand": "apt install vim -y ; pip install -r data-analysis/requirements.txt ; pip install -r analysis-master/requirements.txt ; pip install --no-cache-dir pylint ; pip install --no-cache-dir tra-analysis" + "postCreateCommand": "/usr/bin/pip3 install -r /workspaces/red-alliance-analysis/data-analysis/requirements.txt && /usr/bin/pip3 install -r /workspaces/red-alliance-analysis/analysis-master/requirements.txt && /usr/bin/pip3 install --no-cache-dir pylint && pip3 install pytest" } \ No newline at end of file diff --git a/analysis-master/test_analysis.py b/analysis-master/test_analysis.py index 0cb41003..5676ca08 100644 --- a/analysis-master/test_analysis.py +++ b/analysis-master/test_analysis.py @@ -1,3 +1,7 @@ +import numpy as np +import sklearn +from sklearn import metrics + from tra_analysis import Analysis as an from tra_analysis import Array from tra_analysis import ClassificationMetric @@ -12,13 +16,27 @@ from tra_analysis import StatisticalTest from tra_analysis import SVM def test_(): + test_data_linear = [1, 3, 6, 7, 9] + test_data_linear2 = [2, 2, 5, 7, 13] + test_data_array = Array(test_data_linear) + x_data_circular = [] y_data_circular = [] + y_data_ccu = [1, 3, 7, 14, 21] y_data_ccd = [1, 5, 7, 8.5, 8.66] + test_data_scrambled = [-32, 34, 19, 72, -65, -11, -43, 6, 85, -17, -98, -26, 12, 20, 9, -92, -40, 98, -78, 17, -20, 49, 93, -27, -24, -66, 40, 84, 1, -64, -68, -25, -42, -46, -76, 43, -3, 30, -14, -34, -55, -13, 41, -30, 0, -61, 48, 23, 60, 87, 80, 77, 53, 73, 79, 24, -52, 82, 8, -44, 65, 47, -77, 94, 7, 37, -79, 36, -94, 91, 59, 10, 97, -38, -67, 83, 54, 31, -95, -63, 16, -45, 21, -12, 66, -48, -18, -96, -90, -21, -83, -74, 39, 64, 69, -97, 13, 55, 27, -39] test_data_sorted = [-98, -97, -96, -95, -94, -92, -90, -83, -79, -78, -77, -76, -74, -68, -67, -66, -65, -64, -63, -61, -55, -52, -48, -46, -45, -44, -43, -42, -40, -39, -38, -34, -32, -30, -27, -26, -25, -24, -21, -20, -18, -17, -14, -13, -12, -11, -3, 0, 1, 6, 7, 8, 9, 10, 12, 13, 16, 17, 19, 20, 21, 23, 24, 27, 30, 31, 34, 36, 37, 39, 40, 41, 43, 47, 48, 49, 53, 54, 55, 59, 60, 64, 65, 66, 69, 72, 73, 77, 79, 80, 82, 83, 84, 85, 87, 91, 93, 94, 97, 98] + + test_data_2D_pairs = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]]) + test_data_2D_positive = np.array([[23, 51], [21, 32], [15, 25], [17, 31]]) + test_output = np.array([1, 3, 4, 5]) + test_labels_2D_pairs = np.array([1, 1, 2, 2]) + validation_data_2D_pairs = np.array([[-0.8, -1], [0.8, 1.2]]) + validation_labels_2D_pairs = np.array([1, 2]) + assert an.basic_stats(test_data_linear) == {"mean": 5.2, "median": 6.0, "standard-deviation": 2.85657137141714, "variance": 8.16, "minimum": 1.0, "maximum": 9.0} assert an.z_score(3.2, 6, 1.5) == -1.8666666666666665 assert an.z_normalize([test_data_linear], 1).tolist() == [[0.07537783614444091, 0.22613350843332272, 0.45226701686664544, 0.5276448530110863, 0.6784005252999682]] @@ -30,6 +48,58 @@ def test_(): assert an.Metric().elo(1500, 1500, [1, 0], 400, 24) == 1512.0 assert an.Metric().glicko2(1500, 250, 0.06, [1500, 1400], [250, 240], [1, 0]) == (1478.864307445517, 195.99122679202452, 0.05999602937563585) #assert an.Metric().trueskill([[(25, 8.33), (24, 8.25), (32, 7.5)], [(25, 8.33), (25, 8.33), (21, 6.5)]], [1, 0]) == [(metrics.trueskill.Rating(mu=21.346, sigma=7.875), metrics.trueskill.Rating(mu=20.415, sigma=7.808), metrics.trueskill.Rating(mu=29.037, sigma=7.170)), (metrics.trueskill.Rating(mu=28.654, sigma=7.875), metrics.trueskill.Rating(mu=28.654, sigma=7.875), metrics.trueskill.Rating(mu=23.225, sigma=6.287))] + + assert test_data_array.elementwise_mean() == 5.2 + assert test_data_array.elementwise_median() == 6.0 + assert test_data_array.elementwise_stdev() == 2.85657137141714 + assert test_data_array.elementwise_variance() == 8.16 + assert test_data_array.elementwise_npmin() == 1 + assert test_data_array.elementwise_npmax() == 9 + assert test_data_array.elementwise_stats() == (5.2, 6.0, 2.85657137141714, 8.16, 1, 9) + + classif_metric = ClassificationMetric(test_data_linear2, test_data_linear) + assert classif_metric[0].all() == metrics.confusion_matrix(test_data_linear, test_data_linear2).all() + assert classif_metric[1] == metrics.classification_report(test_data_linear, test_data_linear2) + + assert all(np.isclose(list(CorrelationTest.anova_oneway(test_data_linear, test_data_linear2).values()), [0.05825242718446602, 0.8153507906592907], rtol=1e-10)) + assert all(np.isclose(list(CorrelationTest.pearson(test_data_linear, test_data_linear2).values()), [0.9153061540753287, 0.02920895440940868], rtol=1e-10)) + assert all(np.isclose(list(CorrelationTest.spearman(test_data_linear, test_data_linear2).values()), [0.9746794344808964, 0.004818230468198537], rtol=1e-10)) + assert all(np.isclose(list(CorrelationTest.point_biserial(test_data_linear, test_data_linear2).values()), [0.9153061540753287, 0.02920895440940868], rtol=1e-10)) + assert all(np.isclose(list(CorrelationTest.kendall(test_data_linear, test_data_linear2).values()), [0.9486832980505137, 0.022977401503206086], rtol=1e-10)) + assert all(np.isclose(list(CorrelationTest.kendall_weighted(test_data_linear, test_data_linear2).values()), [0.9750538072369643, np.nan], rtol=1e-10, equal_nan=True)) + + assert Fit.CircleFit(x=[0,0,-1,1], y=[1, -1, 0, 0]).LSC() == (0.0, 0.0, 1.0, 0.0) + + model, metric = KNN.knn_classifier(test_data_2D_pairs, test_labels_2D_pairs, 2) + assert isinstance(model, sklearn.neighbors.KNeighborsClassifier) + assert np.array([[0,0], [2,0]]).all() == metric[0].all() + assert ' precision recall f1-score support\n\n 1 0.00 0.00 0.00 0.0\n 2 0.00 0.00 0.00 2.0\n\n accuracy 0.00 2.0\n macro avg 0.00 0.00 0.00 2.0\nweighted avg 0.00 0.00 0.00 2.0\n' == metric[1] + model, metric = KNN.knn_regressor(test_data_2D_pairs, test_output, 2) + assert isinstance(model, sklearn.neighbors.KNeighborsRegressor) + assert (-25.0, 6.5, 2.5495097567963922) == metric + + model, metric = NaiveBayes.gaussian(test_data_2D_pairs, test_labels_2D_pairs) + assert isinstance(model, sklearn.naive_bayes.GaussianNB) + assert metric[0].all() == np.array([[0, 0], [2, 0]]).all() + model, metric = NaiveBayes.multinomial(test_data_2D_positive, test_labels_2D_pairs) + assert isinstance(model, sklearn.naive_bayes.MultinomialNB) + assert metric[0].all() == np.array([[0, 0], [2, 0]]).all() + model, metric = NaiveBayes.bernoulli(test_data_2D_pairs, test_labels_2D_pairs) + assert isinstance(model, sklearn.naive_bayes.BernoulliNB) + assert metric[0].all() == np.array([[0, 0], [2, 0]]).all() + model, metric = NaiveBayes.complement(test_data_2D_positive, test_labels_2D_pairs) + assert isinstance(model, sklearn.naive_bayes.ComplementNB) + assert metric[0].all() == np.array([[0, 0], [2, 0]]).all() + + model, metric = RandomForest.random_forest_classifier(test_data_2D_pairs, test_labels_2D_pairs, 0.3, 2) + assert isinstance(model, sklearn.ensemble.RandomForestClassifier) + assert metric[0].all() == np.array([[0, 0], [2, 0]]).all() + model, metric = RandomForest.random_forest_regressor(test_data_2D_pairs, test_labels_2D_pairs, 0.3, 2) + assert isinstance(model, sklearn.ensemble.RandomForestRegressor) + assert metric == (0.0, 1.0, 1.0) + + assert RegressionMetric(test_data_linear, test_data_linear2)== (0.7705314009661837, 3.8, 1.9493588689617927) + assert all(a == b for a, b in zip(Sort.quicksort(test_data_scrambled), test_data_sorted)) assert all(a == b for a, b in zip(Sort.mergesort(test_data_scrambled), test_data_sorted)) assert all(a == b for a, b in zip(Sort.heapsort(test_data_scrambled), test_data_sorted)) @@ -41,4 +111,35 @@ def test_(): assert all(a == b for a, b in zip(Sort.bubblesort(test_data_scrambled), test_data_sorted)) assert all(a == b for a, b in zip(Sort.cyclesort(test_data_scrambled), test_data_sorted)) assert all(a == b for a, b in zip(Sort.cocktailsort(test_data_scrambled), test_data_sorted)) - assert Fit.CircleFit(x=[0,0,-1,1], y=[1, -1, 0, 0]).LSC() == (0.0, 0.0, 1.0, 0.0) \ No newline at end of file + + assert Fit.CircleFit(x=[0,0,-1,1], y=[1, -1, 0, 0]).LSC() == (0.0, 0.0, 1.0, 0.0) + + svm(test_data_2D_pairs, test_labels_2D_pairs, validation_data_2D_pairs, validation_labels_2D_pairs) + +def svm(data, labels, test_data, test_labels): + + lin_kernel = SVM.PrebuiltKernel.Linear() + #ply_kernel = SVM.PrebuiltKernel.Polynomial(3, 0) + rbf_kernel = SVM.PrebuiltKernel.RBF('scale') + sig_kernel = SVM.PrebuiltKernel.Sigmoid(0) + + lin_kernel = SVM.fit(lin_kernel, data, labels) + #ply_kernel = SVM.fit(ply_kernel, data, labels) + rbf_kernel = SVM.fit(rbf_kernel, data, labels) + sig_kernel = SVM.fit(sig_kernel, data, labels) + + for i in range(len(test_data)): + + assert lin_kernel.predict([test_data[i]]).tolist() == [test_labels[i]] + + #for i in range(len(test_data)): + + # assert ply_kernel.predict([test_data[i]]).tolist() == [test_labels[i]] + + for i in range(len(test_data)): + + assert rbf_kernel.predict([test_data[i]]).tolist() == [test_labels[i]] + + for i in range(len(test_data)): + + assert sig_kernel.predict([test_data[i]]).tolist() == [test_labels[i]] diff --git a/analysis-master/tra_analysis/Analysis.py b/analysis-master/tra_analysis/Analysis.py index cfe66455..287d6fbb 100644 --- a/analysis-master/tra_analysis/Analysis.py +++ b/analysis-master/tra_analysis/Analysis.py @@ -397,7 +397,7 @@ from .RandomForest_obj import RandomForest from .RegressionMetric import RegressionMetric from .Sort_obj import Sort from .StatisticalTest_obj import StatisticalTest -from .SVM import SVM +from . import SVM class error(ValueError): pass diff --git a/analysis-master/tra_analysis/Array.py b/analysis-master/tra_analysis/Array.py index ab02c9a0..bd0b626a 100644 --- a/analysis-master/tra_analysis/Array.py +++ b/analysis-master/tra_analysis/Array.py @@ -27,55 +27,37 @@ class Array(): # tests on nd arrays independent of basic_stats return str(self.array) - def elementwise_mean(self, *args, axis = 0): # expects arrays that are size normalized - if len(*args) == 0: - return np.mean(self.array, axis = axis) - else: - return np.mean([*args], axis = axis) + def elementwise_mean(self, axis = 0): # expects arrays that are size normalized - def elementwise_median(self, *args, axis = 0): + return np.mean(self.array, axis = axis) - if len(*args) == 0: - return np.median(self.array, axis = axis) - else: - return np.median([*args], axis = axis) + def elementwise_median(self, axis = 0): - def elementwise_stdev(self, *args, axis = 0): + return np.median(self.array, axis = axis) - if len(*args) == 0: - return np.std(self.array, axis = axis) - else: - return np.std([*args], axis = axis) + def elementwise_stdev(self, axis = 0): - def elementwise_variance(self, *args, axis = 0): + return np.std(self.array, axis = axis) - if len(*args) == 0: - return np.var(self.array, axis = axis) - else: - return np.var([*args], axis = axis) + def elementwise_variance(self, axis = 0): - def elementwise_npmin(self, *args, axis = 0): + return np.var(self.array, axis = axis) - if len(*args) == 0: - return np.amin(self.array, axis = axis) - else: - return np.amin([*args], axis = axis) + def elementwise_npmin(self, axis = 0): + return np.amin(self.array, axis = axis) - def elementwise_npmax(self, *args, axis = 0): - if len(*args) == 0: - return np.amax(self.array, axis = axis) - else: - return np.amax([*args], axis = axis) + def elementwise_npmax(self, axis = 0): + return np.amax(self.array, axis = axis) - def elementwise_stats(self, *args, axis = 0): + def elementwise_stats(self, axis = 0): - _mean = self.elementwise_mean(*args, axis = axis) - _median = self.elementwise_median(*args, axis = axis) - _stdev = self.elementwise_stdev(*args, axis = axis) - _variance = self.elementwise_variance(*args, axis = axis) - _min = self.elementwise_npmin(*args, axis = axis) - _max = self.elementwise_npmax(*args, axis = axis) + _mean = self.elementwise_mean(axis = axis) + _median = self.elementwise_median(axis = axis) + _stdev = self.elementwise_stdev(axis = axis) + _variance = self.elementwise_variance(axis = axis) + _min = self.elementwise_npmin(axis = axis) + _max = self.elementwise_npmax(axis = axis) return _mean, _median, _stdev, _variance, _min, _max diff --git a/analysis-master/tra_analysis/CorrelationTest.py b/analysis-master/tra_analysis/CorrelationTest.py index 1fd841ef..3de6ebbb 100644 --- a/analysis-master/tra_analysis/CorrelationTest.py +++ b/analysis-master/tra_analysis/CorrelationTest.py @@ -22,37 +22,37 @@ __all__ = [ import scipy from scipy import stats -def anova_oneway(self, *args): #expects arrays of samples +def anova_oneway(*args): #expects arrays of samples results = scipy.stats.f_oneway(*args) return {"f-value": results[0], "p-value": results[1]} -def pearson(self, x, y): +def pearson(x, y): results = scipy.stats.pearsonr(x, y) return {"r-value": results[0], "p-value": results[1]} -def spearman(self, a, b = None, axis = 0, nan_policy = 'propagate'): +def spearman(a, b = None, axis = 0, nan_policy = 'propagate'): results = scipy.stats.spearmanr(a, b = b, axis = axis, nan_policy = nan_policy) return {"r-value": results[0], "p-value": results[1]} -def point_biserial(self, x,y): +def point_biserial(x, y): results = scipy.stats.pointbiserialr(x, y) return {"r-value": results[0], "p-value": results[1]} -def kendall(self, x, y, initial_lexsort = None, nan_policy = 'propagate', method = 'auto'): +def kendall(x, y, initial_lexsort = None, nan_policy = 'propagate', method = 'auto'): results = scipy.stats.kendalltau(x, y, initial_lexsort = initial_lexsort, nan_policy = nan_policy, method = method) return {"tau": results[0], "p-value": results[1]} -def kendall_weighted(self, x, y, rank = True, weigher = None, additive = True): +def kendall_weighted(x, y, rank = True, weigher = None, additive = True): results = scipy.stats.weightedtau(x, y, rank = rank, weigher = weigher, additive = additive) return {"tau": results[0], "p-value": results[1]} -def mgc(self, x, y, compute_distance = None, reps = 1000, workers = 1, is_twosamp = False, random_state = None): +def mgc(x, y, compute_distance = None, reps = 1000, workers = 1, is_twosamp = False, random_state = None): results = scipy.stats.multiscale_graphcorr(x, y, compute_distance = compute_distance, reps = reps, workers = workers, is_twosamp = is_twosamp, random_state = random_state) return {"k-value": results[0], "p-value": results[1], "data": results[2]} # unsure if MGC test returns a k value \ No newline at end of file diff --git a/analysis-master/tra_analysis/KNN.py b/analysis-master/tra_analysis/KNN.py index ed9ca9df..d594c72f 100644 --- a/analysis-master/tra_analysis/KNN.py +++ b/analysis-master/tra_analysis/KNN.py @@ -14,29 +14,32 @@ __changelog__ = """changelog: __author__ = ( "Arthur Lu ", + "James Pan " ) __all__ = [ + 'knn_classifier', + 'knn_regressor' ] import sklearn from sklearn import model_selection, neighbors from . import ClassificationMetric, RegressionMetric -def knn_classifier(self, data, labels, n_neighbors, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling +def knn_classifier(data, labels, n_neighbors = 5, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) - model = sklearn.neighbors.KNeighborsClassifier() + model = sklearn.neighbors.KNeighborsClassifier(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs) model.fit(data_train, labels_train) predictions = model.predict(data_test) return model, ClassificationMetric(predictions, labels_test) -def knn_regressor(self, data, outputs, n_neighbors, test_size = 0.3, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None): +def knn_regressor(data, outputs, n_neighbors = 5, test_size = 0.3, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None): data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1) model = sklearn.neighbors.KNeighborsRegressor(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs) model.fit(data_train, outputs_train) predictions = model.predict(data_test) - return model, RegressionMetric(predictions, outputs_test) \ No newline at end of file + return model, RegressionMetric.RegressionMetric(predictions, outputs_test) \ No newline at end of file diff --git a/analysis-master/tra_analysis/NaiveBayes.py b/analysis-master/tra_analysis/NaiveBayes.py index c666975a..304d731c 100644 --- a/analysis-master/tra_analysis/NaiveBayes.py +++ b/analysis-master/tra_analysis/NaiveBayes.py @@ -16,13 +16,17 @@ __author__ = ( ) __all__ = [ + 'gaussian', + 'multinomial' + 'bernoulli', + 'complement' ] import sklearn from sklearn import model_selection, naive_bayes from . import ClassificationMetric, RegressionMetric -def guassian(data, labels, test_size = 0.3, priors = None, var_smoothing = 1e-09): +def gaussian(data, labels, test_size = 0.3, priors = None, var_smoothing = 1e-09): data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) model = sklearn.naive_bayes.GaussianNB(priors = priors, var_smoothing = var_smoothing) diff --git a/analysis-master/tra_analysis/RandomForest.py b/analysis-master/tra_analysis/RandomForest.py index b39b56f7..6349c873 100644 --- a/analysis-master/tra_analysis/RandomForest.py +++ b/analysis-master/tra_analysis/RandomForest.py @@ -39,4 +39,4 @@ def random_forest_regressor(data, outputs, test_size, n_estimators, criterion="m kernel.fit(data_train, outputs_train) predictions = kernel.predict(data_test) - return kernel, RegressionMetric(predictions, outputs_test) \ No newline at end of file + return kernel, RegressionMetric.RegressionMetric(predictions, outputs_test) \ No newline at end of file diff --git a/analysis-master/tra_analysis/RegressionMetric.py b/analysis-master/tra_analysis/RegressionMetric.py index 37904a04..0b985493 100644 --- a/analysis-master/tra_analysis/RegressionMetric.py +++ b/analysis-master/tra_analysis/RegressionMetric.py @@ -16,8 +16,10 @@ __author__ = ( ) __all__ = [ + 'RegressionMetric' ] +import numpy as np import sklearn from sklearn import metrics @@ -37,4 +39,4 @@ class RegressionMetric(): def rms(self, predictions, targets): - return math.sqrt(sklearn.metrics.mean_squared_error(targets, predictions)) \ No newline at end of file + return np.sqrt(sklearn.metrics.mean_squared_error(targets, predictions)) \ No newline at end of file diff --git a/analysis-master/tra_analysis/SVM.py b/analysis-master/tra_analysis/SVM.py index ef568c5f..d1e2022b 100644 --- a/analysis-master/tra_analysis/SVM.py +++ b/analysis-master/tra_analysis/SVM.py @@ -4,9 +4,12 @@ # this should be imported as a python module using 'from tra_analysis import SVM' # setup: -__version__ = "1.0.0" +__version__ = "1.0.1" __changelog__ = """changelog: + 1.0.1: + - removed unessasary self calls + - removed classness 1.0.0: - ported analysis.SVM() here """ @@ -22,58 +25,56 @@ import sklearn from sklearn import svm from . import ClassificationMetric, RegressionMetric -class SVM: +class CustomKernel: - class CustomKernel: + def __new__(cls, C, kernel, degre, gamma, coef0, shrinking, probability, tol, cache_size, class_weight, verbose, max_iter, decision_function_shape, random_state): - def __new__(cls, C, kernel, degre, gamma, coef0, shrinking, probability, tol, cache_size, class_weight, verbose, max_iter, decision_function_shape, random_state): + return sklearn.svm.SVC(C = C, kernel = kernel, gamma = gamma, coef0 = coef0, shrinking = shrinking, probability = probability, tol = tol, cache_size = cache_size, class_weight = class_weight, verbose = verbose, max_iter = max_iter, decision_function_shape = decision_function_shape, random_state = random_state) - return sklearn.svm.SVC(C = C, kernel = kernel, gamma = gamma, coef0 = coef0, shrinking = shrinking, probability = probability, tol = tol, cache_size = cache_size, class_weight = class_weight, verbose = verbose, max_iter = max_iter, decision_function_shape = decision_function_shape, random_state = random_state) +class StandardKernel: - class StandardKernel: + def __new__(cls, kernel, C=1.0, degree=3, gamma='auto_deprecated', coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, decision_function_shape='ovr', random_state=None): - def __new__(cls, kernel, C=1.0, degree=3, gamma='auto_deprecated', coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, decision_function_shape='ovr', random_state=None): + return sklearn.svm.SVC(C = C, kernel = kernel, gamma = gamma, coef0 = coef0, shrinking = shrinking, probability = probability, tol = tol, cache_size = cache_size, class_weight = class_weight, verbose = verbose, max_iter = max_iter, decision_function_shape = decision_function_shape, random_state = random_state) - return sklearn.svm.SVC(C = C, kernel = kernel, gamma = gamma, coef0 = coef0, shrinking = shrinking, probability = probability, tol = tol, cache_size = cache_size, class_weight = class_weight, verbose = verbose, max_iter = max_iter, decision_function_shape = decision_function_shape, random_state = random_state) +class PrebuiltKernel: - class PrebuiltKernel: + class Linear: - class Linear: + def __new__(cls): - def __new__(cls): + return sklearn.svm.SVC(kernel = 'linear') - return sklearn.svm.SVC(kernel = 'linear') + class Polynomial: - class Polynomial: + def __new__(cls, power, r_bias): - def __new__(cls, power, r_bias): + return sklearn.svm.SVC(kernel = 'polynomial', degree = power, coef0 = r_bias) - return sklearn.svm.SVC(kernel = 'polynomial', degree = power, coef0 = r_bias) + class RBF: - class RBF: + def __new__(cls, gamma): - def __new__(cls, gamma): + return sklearn.svm.SVC(kernel = 'rbf', gamma = gamma) - return sklearn.svm.SVC(kernel = 'rbf', gamma = gamma) + class Sigmoid: - class Sigmoid: + def __new__(cls, r_bias): - def __new__(cls, r_bias): + return sklearn.svm.SVC(kernel = 'sigmoid', coef0 = r_bias) - return sklearn.svm.SVC(kernel = 'sigmoid', coef0 = r_bias) +def fit(kernel, train_data, train_outputs): # expects *2d data, 1d labels or outputs - def fit(self, kernel, train_data, train_outputs): # expects *2d data, 1d labels or outputs + return kernel.fit(train_data, train_outputs) - return kernel.fit(train_data, train_outputs) +def eval_classification(kernel, test_data, test_outputs): - def eval_classification(self, kernel, test_data, test_outputs): + predictions = kernel.predict(test_data) - predictions = kernel.predict(test_data) + return ClassificationMetric(predictions, test_outputs) - return ClassificationMetric(predictions, test_outputs) +def eval_regression(kernel, test_data, test_outputs): - def eval_regression(self, kernel, test_data, test_outputs): + predictions = kernel.predict(test_data) - predictions = kernel.predict(test_data) - - return RegressionMetric(predictions, test_outputs) \ No newline at end of file + return RegressionMetric(predictions, test_outputs) \ No newline at end of file diff --git a/analysis-master/tra_analysis/StatisticalTest.py b/analysis-master/tra_analysis/StatisticalTest.py index 8bc84fbe..37006566 100644 --- a/analysis-master/tra_analysis/StatisticalTest.py +++ b/analysis-master/tra_analysis/StatisticalTest.py @@ -4,9 +4,11 @@ # this should be imported as a python module using 'from tra_analysis import StatisticalTest' # setup: -__version__ = "1.0.0" +__version__ = "1.0.1" __changelog__ = """changelog: + 1.0.1: + - fixed typo in __all__ 1.0.0: - ported analysis.StatisticalTest() here - removed classness @@ -17,6 +19,39 @@ __author__ = ( ) __all__ = [ + 'ttest_onesample', + 'ttest_independent', + 'ttest_statistic', + 'ttest_related', + 'ks_fitness', + 'chisquare', + 'powerdivergence' + 'ks_twosample', + 'es_twosample', + 'mw_rank', + 'mw_tiecorrection', + 'rankdata', + 'wilcoxon_ranksum', + 'wilcoxon_signedrank', + 'kw_htest', + 'friedman_chisquare', + 'bm_wtest', + 'combine_pvalues', + 'jb_fitness', + 'ab_equality', + 'bartlett_variance', + 'levene_variance', + 'sw_normality', + 'shapiro', + 'ad_onesample', + 'ad_ksample', + 'binomial', + 'fk_variance', + 'mood_mediantest', + 'mood_equalscale', + 'skewtest', + 'kurtosistest', + 'normaltest' ] import scipy diff --git a/analysis-master/tra_analysis/__init__.py b/analysis-master/tra_analysis/__init__.py index 5342c0fd..7e99fd3f 100644 --- a/analysis-master/tra_analysis/__init__.py +++ b/analysis-master/tra_analysis/__init__.py @@ -7,20 +7,24 @@ # current benchmark of optimization: 1.33 times faster # setup: -__version__ = "2.1.0-alpha.1" +__version__ = "2.1.0-alpha.3" # changelog should be viewed using print(analysis.__changelog__) __changelog__ = """changelog: - 2.1.0-alpha.1: - - moved multiple submodules under analysis to their own modules/files - - added header, __version__, __changelog__, __author__, __all__ (unpopulated) + 2.1.0-alpha.3: + - fixed indentation in meta data + 2.1.0-alpha.2: + - updated SVM import + 2.1.0-alpha.1: + - moved multiple submodules under analysis to their own modules/files + - added header, __version__, __changelog__, __author__, __all__ (unpopulated) """ __author__ = ( "Arthur Lu ", - "Jacob Levine ", - "Dev Singh ", - "James Pan " + "Jacob Levine ", + "Dev Singh ", + "James Pan " ) __all__ = [ @@ -37,4 +41,4 @@ from . import RandomForest from .RegressionMetric import RegressionMetric from . import Sort from . import StatisticalTest -from .SVM import SVM \ No newline at end of file +from . import SVM \ No newline at end of file diff --git a/data-analysis/design.kv b/data-analysis/design.kv new file mode 100644 index 00000000..26092f24 --- /dev/null +++ b/data-analysis/design.kv @@ -0,0 +1,46 @@ +: + GridLayout: + cols: 1 + GridLayout: + cols: 1 + padding: 15, 15 + spacing: 20, 20 + Label: + text: "User Login" + font_size: "20sp" + TextInput: + id: username + hint_text: "Username" + TextInput: + id: password + password: True + hint_text: "Password" + RelativeLayout: + Button: + text: "Login" + on_press: root.login(root.ids.username.text, root.ids.password.text) + size_hint: 0.3, 0.5 + pos_hint: {"center_x": 0.5, "center_y": 0.6} + Label: + id: login_wrong + text: "" + GridLayout: + cols: 2 + size_hint: 0.2, 0.2 + padding: 10, 10 + spacing: 10, 0 + Button: + text: "Forgot Password?" + background_color: 1, 1, 1, 0 + opacity: 1 if self.state == "normal" else 0.5 + color: 0.1, 0.7, 1, 1 + Button: + text: "Sign Up" + on_press: root.sign_up() + background_color: 1, 1, 1 , 0 + opacity: 1 if self.state == "normal" else 0.5 + color: 0.1, 0.7, 1, 1 + +: + HomeScreen: + name: "home_screen" diff --git a/data-analysis/main.py b/data-analysis/main.py new file mode 100644 index 00000000..8bf4029b --- /dev/null +++ b/data-analysis/main.py @@ -0,0 +1,41 @@ +from kivy.app import App +from kivy.lang import Builder +from kivy.uix.screenmanager import ScreenManager , Screen +from kivy.animation import Animation +from hoverable import HoverBehavior +from kivy.uix.image import Image +from kivy.uix.behaviors import ButtonBehavior +import json +from datetime import datetime +import glob +from pathlib import Path +import random + +import superscript as ss + +Builder.load_file('design.kv') + +class HomeScreen(Screen): + # def sign_up(self): + # self.manager.transition.direction = "left" + # self.manager.current = "sign_up_screen" + + # def login(self, uname, pword): + # with open ("users.json") as file: + # users = json.load(file) + # if uname in users and users[uname]["password"] == pword: + # self.manager.transition.direction = "left" + # self.manager.current = "login_screen_success" + # else: + # self.ids.login_wrong.text = "Incorrect Username or Password" + + +class RootWidget(ScreenManager): + pass + +class MainApp(App): + def build(self): + return RootWidget() + +if __name__ == "__main__": + MainApp().run() \ No newline at end of file