Merge branch 'submoduling' into equation

This commit is contained in:
Arthur Lu 2021-01-26 20:18:23 -08:00 committed by GitHub
commit 177a30b456
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 353 additions and 114 deletions

View File

@ -1,2 +1,7 @@
FROM python:3.8 FROM ubuntu:20.04
WORKDIR ~/ WORKDIR /
RUN apt-get -y update
RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata
RUN apt-get install -y python3 python3-dev git python3-pip python3-kivy python-is-python3 libgl1-mesa-dev build-essential
RUN ln -s $(which pip3) /usr/bin/pip
RUN pip install pymongo pandas numpy scipy scikit-learn matplotlib pylint kivy

View File

@ -0,0 +1,2 @@
FROM titanscout2022/tra-analysis-base:latest
WORKDIR /

View File

@ -1,7 +1,7 @@
{ {
"name": "TRA Analysis Development Environment", "name": "TRA Analysis Development Environment",
"build": { "build": {
"dockerfile": "Dockerfile", "dockerfile": "dev-dockerfile",
}, },
"settings": { "settings": {
"terminal.integrated.shell.linux": "/bin/bash", "terminal.integrated.shell.linux": "/bin/bash",
@ -24,5 +24,5 @@
"ms-python.python", "ms-python.python",
"waderyan.gitblame" "waderyan.gitblame"
], ],
"postCreateCommand": "apt install vim -y ; pip install -r data-analysis/requirements.txt ; pip install -r analysis-master/requirements.txt ; pip install --no-cache-dir pylint ; pip install --no-cache-dir tra-analysis" "postCreateCommand": "/usr/bin/pip3 install -r /workspaces/red-alliance-analysis/data-analysis/requirements.txt && /usr/bin/pip3 install -r /workspaces/red-alliance-analysis/analysis-master/requirements.txt && /usr/bin/pip3 install --no-cache-dir pylint && pip3 install pytest"
} }

View File

@ -1,16 +1,42 @@
from tra_analysis import analysis as an import numpy as np
from tra_analysis import metrics import sklearn
from tra_analysis import fits from sklearn import metrics
from tra_analysis.equation.parser import BNF
from tra_analysis import Analysis as an
from tra_analysis import Array
from tra_analysis import ClassificationMetric
from tra_analysis import CorrelationTest
from tra_analysis import Fit
from tra_analysis import KNN
from tra_analysis import NaiveBayes
from tra_analysis import RandomForest
from tra_analysis import RegressionMetric
from tra_analysis import Sort
from tra_analysis import StatisticalTest
from tra_analysis import SVM
def test_(): def test_():
test_data_linear = [1, 3, 6, 7, 9] test_data_linear = [1, 3, 6, 7, 9]
test_data_linear2 = [2, 2, 5, 7, 13]
test_data_array = Array(test_data_linear)
x_data_circular = [] x_data_circular = []
y_data_circular = [] y_data_circular = []
y_data_ccu = [1, 3, 7, 14, 21] y_data_ccu = [1, 3, 7, 14, 21]
y_data_ccd = [1, 5, 7, 8.5, 8.66] y_data_ccd = [1, 5, 7, 8.5, 8.66]
test_data_scrambled = [-32, 34, 19, 72, -65, -11, -43, 6, 85, -17, -98, -26, 12, 20, 9, -92, -40, 98, -78, 17, -20, 49, 93, -27, -24, -66, 40, 84, 1, -64, -68, -25, -42, -46, -76, 43, -3, 30, -14, -34, -55, -13, 41, -30, 0, -61, 48, 23, 60, 87, 80, 77, 53, 73, 79, 24, -52, 82, 8, -44, 65, 47, -77, 94, 7, 37, -79, 36, -94, 91, 59, 10, 97, -38, -67, 83, 54, 31, -95, -63, 16, -45, 21, -12, 66, -48, -18, -96, -90, -21, -83, -74, 39, 64, 69, -97, 13, 55, 27, -39] test_data_scrambled = [-32, 34, 19, 72, -65, -11, -43, 6, 85, -17, -98, -26, 12, 20, 9, -92, -40, 98, -78, 17, -20, 49, 93, -27, -24, -66, 40, 84, 1, -64, -68, -25, -42, -46, -76, 43, -3, 30, -14, -34, -55, -13, 41, -30, 0, -61, 48, 23, 60, 87, 80, 77, 53, 73, 79, 24, -52, 82, 8, -44, 65, 47, -77, 94, 7, 37, -79, 36, -94, 91, 59, 10, 97, -38, -67, 83, 54, 31, -95, -63, 16, -45, 21, -12, 66, -48, -18, -96, -90, -21, -83, -74, 39, 64, 69, -97, 13, 55, 27, -39]
test_data_sorted = [-98, -97, -96, -95, -94, -92, -90, -83, -79, -78, -77, -76, -74, -68, -67, -66, -65, -64, -63, -61, -55, -52, -48, -46, -45, -44, -43, -42, -40, -39, -38, -34, -32, -30, -27, -26, -25, -24, -21, -20, -18, -17, -14, -13, -12, -11, -3, 0, 1, 6, 7, 8, 9, 10, 12, 13, 16, 17, 19, 20, 21, 23, 24, 27, 30, 31, 34, 36, 37, 39, 40, 41, 43, 47, 48, 49, 53, 54, 55, 59, 60, 64, 65, 66, 69, 72, 73, 77, 79, 80, 82, 83, 84, 85, 87, 91, 93, 94, 97, 98] test_data_sorted = [-98, -97, -96, -95, -94, -92, -90, -83, -79, -78, -77, -76, -74, -68, -67, -66, -65, -64, -63, -61, -55, -52, -48, -46, -45, -44, -43, -42, -40, -39, -38, -34, -32, -30, -27, -26, -25, -24, -21, -20, -18, -17, -14, -13, -12, -11, -3, 0, 1, 6, 7, 8, 9, 10, 12, 13, 16, 17, 19, 20, 21, 23, 24, 27, 30, 31, 34, 36, 37, 39, 40, 41, 43, 47, 48, 49, 53, 54, 55, 59, 60, 64, 65, 66, 69, 72, 73, 77, 79, 80, 82, 83, 84, 85, 87, 91, 93, 94, 97, 98]
test_data_2D_pairs = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
test_data_2D_positive = np.array([[23, 51], [21, 32], [15, 25], [17, 31]])
test_output = np.array([1, 3, 4, 5])
test_labels_2D_pairs = np.array([1, 1, 2, 2])
validation_data_2D_pairs = np.array([[-0.8, -1], [0.8, 1.2]])
validation_labels_2D_pairs = np.array([1, 2])
assert an.basic_stats(test_data_linear) == {"mean": 5.2, "median": 6.0, "standard-deviation": 2.85657137141714, "variance": 8.16, "minimum": 1.0, "maximum": 9.0} assert an.basic_stats(test_data_linear) == {"mean": 5.2, "median": 6.0, "standard-deviation": 2.85657137141714, "variance": 8.16, "minimum": 1.0, "maximum": 9.0}
assert an.z_score(3.2, 6, 1.5) == -1.8666666666666665 assert an.z_score(3.2, 6, 1.5) == -1.8666666666666665
assert an.z_normalize([test_data_linear], 1).tolist() == [[0.07537783614444091, 0.22613350843332272, 0.45226701686664544, 0.5276448530110863, 0.6784005252999682]] assert an.z_normalize([test_data_linear], 1).tolist() == [[0.07537783614444091, 0.22613350843332272, 0.45226701686664544, 0.5276448530110863, 0.6784005252999682]]
@ -22,18 +48,102 @@ def test_():
assert an.Metric().elo(1500, 1500, [1, 0], 400, 24) == 1512.0 assert an.Metric().elo(1500, 1500, [1, 0], 400, 24) == 1512.0
assert an.Metric().glicko2(1500, 250, 0.06, [1500, 1400], [250, 240], [1, 0]) == (1478.864307445517, 195.99122679202452, 0.05999602937563585) assert an.Metric().glicko2(1500, 250, 0.06, [1500, 1400], [250, 240], [1, 0]) == (1478.864307445517, 195.99122679202452, 0.05999602937563585)
#assert an.Metric().trueskill([[(25, 8.33), (24, 8.25), (32, 7.5)], [(25, 8.33), (25, 8.33), (21, 6.5)]], [1, 0]) == [(metrics.trueskill.Rating(mu=21.346, sigma=7.875), metrics.trueskill.Rating(mu=20.415, sigma=7.808), metrics.trueskill.Rating(mu=29.037, sigma=7.170)), (metrics.trueskill.Rating(mu=28.654, sigma=7.875), metrics.trueskill.Rating(mu=28.654, sigma=7.875), metrics.trueskill.Rating(mu=23.225, sigma=6.287))] #assert an.Metric().trueskill([[(25, 8.33), (24, 8.25), (32, 7.5)], [(25, 8.33), (25, 8.33), (21, 6.5)]], [1, 0]) == [(metrics.trueskill.Rating(mu=21.346, sigma=7.875), metrics.trueskill.Rating(mu=20.415, sigma=7.808), metrics.trueskill.Rating(mu=29.037, sigma=7.170)), (metrics.trueskill.Rating(mu=28.654, sigma=7.875), metrics.trueskill.Rating(mu=28.654, sigma=7.875), metrics.trueskill.Rating(mu=23.225, sigma=6.287))]
assert all(a == b for a, b in zip(an.Sort().quicksort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(an.Sort().mergesort(test_data_scrambled), test_data_sorted)) assert test_data_array.elementwise_mean() == 5.2
assert all(a == b for a, b in zip(an.Sort().introsort(test_data_scrambled), test_data_sorted)) assert test_data_array.elementwise_median() == 6.0
assert all(a == b for a, b in zip(an.Sort().heapsort(test_data_scrambled), test_data_sorted)) assert test_data_array.elementwise_stdev() == 2.85657137141714
assert all(a == b for a, b in zip(an.Sort().insertionsort(test_data_scrambled), test_data_sorted)) assert test_data_array.elementwise_variance() == 8.16
assert all(a == b for a, b in zip(an.Sort().timsort(test_data_scrambled), test_data_sorted)) assert test_data_array.elementwise_npmin() == 1
assert all(a == b for a, b in zip(an.Sort().selectionsort(test_data_scrambled), test_data_sorted)) assert test_data_array.elementwise_npmax() == 9
assert all(a == b for a, b in zip(an.Sort().shellsort(test_data_scrambled), test_data_sorted)) assert test_data_array.elementwise_stats() == (5.2, 6.0, 2.85657137141714, 8.16, 1, 9)
assert all(a == b for a, b in zip(an.Sort().bubblesort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(an.Sort().cyclesort(test_data_scrambled), test_data_sorted)) classif_metric = ClassificationMetric(test_data_linear2, test_data_linear)
assert all(a == b for a, b in zip(an.Sort().cocktailsort(test_data_scrambled), test_data_sorted)) assert classif_metric[0].all() == metrics.confusion_matrix(test_data_linear, test_data_linear2).all()
assert fits.CircleFit(x=[0,0,-1,1], y=[1, -1, 0, 0]).LSC() == (0.0, 0.0, 1.0, 0.0) assert classif_metric[1] == metrics.classification_report(test_data_linear, test_data_linear2)
assert all(np.isclose(list(CorrelationTest.anova_oneway(test_data_linear, test_data_linear2).values()), [0.05825242718446602, 0.8153507906592907], rtol=1e-10))
assert all(np.isclose(list(CorrelationTest.pearson(test_data_linear, test_data_linear2).values()), [0.9153061540753287, 0.02920895440940868], rtol=1e-10))
assert all(np.isclose(list(CorrelationTest.spearman(test_data_linear, test_data_linear2).values()), [0.9746794344808964, 0.004818230468198537], rtol=1e-10))
assert all(np.isclose(list(CorrelationTest.point_biserial(test_data_linear, test_data_linear2).values()), [0.9153061540753287, 0.02920895440940868], rtol=1e-10))
assert all(np.isclose(list(CorrelationTest.kendall(test_data_linear, test_data_linear2).values()), [0.9486832980505137, 0.022977401503206086], rtol=1e-10))
assert all(np.isclose(list(CorrelationTest.kendall_weighted(test_data_linear, test_data_linear2).values()), [0.9750538072369643, np.nan], rtol=1e-10, equal_nan=True))
assert Fit.CircleFit(x=[0,0,-1,1], y=[1, -1, 0, 0]).LSC() == (0.0, 0.0, 1.0, 0.0)
model, metric = KNN.knn_classifier(test_data_2D_pairs, test_labels_2D_pairs, 2)
assert isinstance(model, sklearn.neighbors.KNeighborsClassifier)
assert np.array([[0,0], [2,0]]).all() == metric[0].all()
assert ' precision recall f1-score support\n\n 1 0.00 0.00 0.00 0.0\n 2 0.00 0.00 0.00 2.0\n\n accuracy 0.00 2.0\n macro avg 0.00 0.00 0.00 2.0\nweighted avg 0.00 0.00 0.00 2.0\n' == metric[1]
model, metric = KNN.knn_regressor(test_data_2D_pairs, test_output, 2)
assert isinstance(model, sklearn.neighbors.KNeighborsRegressor)
assert (-25.0, 6.5, 2.5495097567963922) == metric
model, metric = NaiveBayes.gaussian(test_data_2D_pairs, test_labels_2D_pairs)
assert isinstance(model, sklearn.naive_bayes.GaussianNB)
assert metric[0].all() == np.array([[0, 0], [2, 0]]).all()
model, metric = NaiveBayes.multinomial(test_data_2D_positive, test_labels_2D_pairs)
assert isinstance(model, sklearn.naive_bayes.MultinomialNB)
assert metric[0].all() == np.array([[0, 0], [2, 0]]).all()
model, metric = NaiveBayes.bernoulli(test_data_2D_pairs, test_labels_2D_pairs)
assert isinstance(model, sklearn.naive_bayes.BernoulliNB)
assert metric[0].all() == np.array([[0, 0], [2, 0]]).all()
model, metric = NaiveBayes.complement(test_data_2D_positive, test_labels_2D_pairs)
assert isinstance(model, sklearn.naive_bayes.ComplementNB)
assert metric[0].all() == np.array([[0, 0], [2, 0]]).all()
model, metric = RandomForest.random_forest_classifier(test_data_2D_pairs, test_labels_2D_pairs, 0.3, 2)
assert isinstance(model, sklearn.ensemble.RandomForestClassifier)
assert metric[0].all() == np.array([[0, 0], [2, 0]]).all()
model, metric = RandomForest.random_forest_regressor(test_data_2D_pairs, test_labels_2D_pairs, 0.3, 2)
assert isinstance(model, sklearn.ensemble.RandomForestRegressor)
assert metric == (0.0, 1.0, 1.0)
assert RegressionMetric(test_data_linear, test_data_linear2)== (0.7705314009661837, 3.8, 1.9493588689617927)
assert all(a == b for a, b in zip(Sort.quicksort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(Sort.mergesort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(Sort.heapsort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(Sort.introsort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(Sort.insertionsort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(Sort.timsort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(Sort.selectionsort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(Sort.shellsort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(Sort.bubblesort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(Sort.cyclesort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(Sort.cocktailsort(test_data_scrambled), test_data_sorted))
assert Fit.CircleFit(x=[0,0,-1,1], y=[1, -1, 0, 0]).LSC() == (0.0, 0.0, 1.0, 0.0)
svm(test_data_2D_pairs, test_labels_2D_pairs, validation_data_2D_pairs, validation_labels_2D_pairs)
test_equation()
def svm(data, labels, test_data, test_labels):
lin_kernel = SVM.PrebuiltKernel.Linear()
#ply_kernel = SVM.PrebuiltKernel.Polynomial(3, 0)
rbf_kernel = SVM.PrebuiltKernel.RBF('scale')
sig_kernel = SVM.PrebuiltKernel.Sigmoid(0)
lin_kernel = SVM.fit(lin_kernel, data, labels)
#ply_kernel = SVM.fit(ply_kernel, data, labels)
rbf_kernel = SVM.fit(rbf_kernel, data, labels)
sig_kernel = SVM.fit(sig_kernel, data, labels)
for i in range(len(test_data)):
assert lin_kernel.predict([test_data[i]]).tolist() == [test_labels[i]]
#for i in range(len(test_data)):
# assert ply_kernel.predict([test_data[i]]).tolist() == [test_labels[i]]
for i in range(len(test_data)):
assert rbf_kernel.predict([test_data[i]]).tolist() == [test_labels[i]]
for i in range(len(test_data)):
assert sig_kernel.predict([test_data[i]]).tolist() == [test_labels[i]]
test_equation() test_equation()

View File

@ -397,7 +397,7 @@ from .RandomForest_obj import RandomForest
from .RegressionMetric import RegressionMetric from .RegressionMetric import RegressionMetric
from .Sort_obj import Sort from .Sort_obj import Sort
from .StatisticalTest_obj import StatisticalTest from .StatisticalTest_obj import StatisticalTest
from .SVM import SVM from . import SVM
class error(ValueError): class error(ValueError):
pass pass

View File

@ -27,55 +27,37 @@ class Array(): # tests on nd arrays independent of basic_stats
return str(self.array) return str(self.array)
def elementwise_mean(self, *args, axis = 0): # expects arrays that are size normalized def elementwise_mean(self, axis = 0): # expects arrays that are size normalized
if len(*args) == 0:
return np.mean(self.array, axis = axis) return np.mean(self.array, axis = axis)
else:
return np.mean([*args], axis = axis)
def elementwise_median(self, *args, axis = 0): def elementwise_median(self, axis = 0):
if len(*args) == 0:
return np.median(self.array, axis = axis) return np.median(self.array, axis = axis)
else:
return np.median([*args], axis = axis)
def elementwise_stdev(self, *args, axis = 0): def elementwise_stdev(self, axis = 0):
if len(*args) == 0:
return np.std(self.array, axis = axis) return np.std(self.array, axis = axis)
else:
return np.std([*args], axis = axis)
def elementwise_variance(self, *args, axis = 0): def elementwise_variance(self, axis = 0):
if len(*args) == 0:
return np.var(self.array, axis = axis) return np.var(self.array, axis = axis)
else:
return np.var([*args], axis = axis)
def elementwise_npmin(self, *args, axis = 0): def elementwise_npmin(self, axis = 0):
if len(*args) == 0:
return np.amin(self.array, axis = axis) return np.amin(self.array, axis = axis)
else:
return np.amin([*args], axis = axis)
def elementwise_npmax(self, *args, axis = 0):
if len(*args) == 0: def elementwise_npmax(self, axis = 0):
return np.amax(self.array, axis = axis) return np.amax(self.array, axis = axis)
else:
return np.amax([*args], axis = axis)
def elementwise_stats(self, *args, axis = 0): def elementwise_stats(self, axis = 0):
_mean = self.elementwise_mean(*args, axis = axis) _mean = self.elementwise_mean(axis = axis)
_median = self.elementwise_median(*args, axis = axis) _median = self.elementwise_median(axis = axis)
_stdev = self.elementwise_stdev(*args, axis = axis) _stdev = self.elementwise_stdev(axis = axis)
_variance = self.elementwise_variance(*args, axis = axis) _variance = self.elementwise_variance(axis = axis)
_min = self.elementwise_npmin(*args, axis = axis) _min = self.elementwise_npmin(axis = axis)
_max = self.elementwise_npmax(*args, axis = axis) _max = self.elementwise_npmax(axis = axis)
return _mean, _median, _stdev, _variance, _min, _max return _mean, _median, _stdev, _variance, _min, _max

View File

@ -22,37 +22,37 @@ __all__ = [
import scipy import scipy
from scipy import stats from scipy import stats
def anova_oneway(self, *args): #expects arrays of samples def anova_oneway(*args): #expects arrays of samples
results = scipy.stats.f_oneway(*args) results = scipy.stats.f_oneway(*args)
return {"f-value": results[0], "p-value": results[1]} return {"f-value": results[0], "p-value": results[1]}
def pearson(self, x, y): def pearson(x, y):
results = scipy.stats.pearsonr(x, y) results = scipy.stats.pearsonr(x, y)
return {"r-value": results[0], "p-value": results[1]} return {"r-value": results[0], "p-value": results[1]}
def spearman(self, a, b = None, axis = 0, nan_policy = 'propagate'): def spearman(a, b = None, axis = 0, nan_policy = 'propagate'):
results = scipy.stats.spearmanr(a, b = b, axis = axis, nan_policy = nan_policy) results = scipy.stats.spearmanr(a, b = b, axis = axis, nan_policy = nan_policy)
return {"r-value": results[0], "p-value": results[1]} return {"r-value": results[0], "p-value": results[1]}
def point_biserial(self, x,y): def point_biserial(x, y):
results = scipy.stats.pointbiserialr(x, y) results = scipy.stats.pointbiserialr(x, y)
return {"r-value": results[0], "p-value": results[1]} return {"r-value": results[0], "p-value": results[1]}
def kendall(self, x, y, initial_lexsort = None, nan_policy = 'propagate', method = 'auto'): def kendall(x, y, initial_lexsort = None, nan_policy = 'propagate', method = 'auto'):
results = scipy.stats.kendalltau(x, y, initial_lexsort = initial_lexsort, nan_policy = nan_policy, method = method) results = scipy.stats.kendalltau(x, y, initial_lexsort = initial_lexsort, nan_policy = nan_policy, method = method)
return {"tau": results[0], "p-value": results[1]} return {"tau": results[0], "p-value": results[1]}
def kendall_weighted(self, x, y, rank = True, weigher = None, additive = True): def kendall_weighted(x, y, rank = True, weigher = None, additive = True):
results = scipy.stats.weightedtau(x, y, rank = rank, weigher = weigher, additive = additive) results = scipy.stats.weightedtau(x, y, rank = rank, weigher = weigher, additive = additive)
return {"tau": results[0], "p-value": results[1]} return {"tau": results[0], "p-value": results[1]}
def mgc(self, x, y, compute_distance = None, reps = 1000, workers = 1, is_twosamp = False, random_state = None): def mgc(x, y, compute_distance = None, reps = 1000, workers = 1, is_twosamp = False, random_state = None):
results = scipy.stats.multiscale_graphcorr(x, y, compute_distance = compute_distance, reps = reps, workers = workers, is_twosamp = is_twosamp, random_state = random_state) results = scipy.stats.multiscale_graphcorr(x, y, compute_distance = compute_distance, reps = reps, workers = workers, is_twosamp = is_twosamp, random_state = random_state)
return {"k-value": results[0], "p-value": results[1], "data": results[2]} # unsure if MGC test returns a k value return {"k-value": results[0], "p-value": results[1], "data": results[2]} # unsure if MGC test returns a k value

View File

@ -14,29 +14,32 @@ __changelog__ = """changelog:
__author__ = ( __author__ = (
"Arthur Lu <learthurgo@gmail.com>", "Arthur Lu <learthurgo@gmail.com>",
"James Pan <zpan@imsa.edu>"
) )
__all__ = [ __all__ = [
'knn_classifier',
'knn_regressor'
] ]
import sklearn import sklearn
from sklearn import model_selection, neighbors from sklearn import model_selection, neighbors
from . import ClassificationMetric, RegressionMetric from . import ClassificationMetric, RegressionMetric
def knn_classifier(self, data, labels, n_neighbors, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling def knn_classifier(data, labels, n_neighbors = 5, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
model = sklearn.neighbors.KNeighborsClassifier() model = sklearn.neighbors.KNeighborsClassifier(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs)
model.fit(data_train, labels_train) model.fit(data_train, labels_train)
predictions = model.predict(data_test) predictions = model.predict(data_test)
return model, ClassificationMetric(predictions, labels_test) return model, ClassificationMetric(predictions, labels_test)
def knn_regressor(self, data, outputs, n_neighbors, test_size = 0.3, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None): def knn_regressor(data, outputs, n_neighbors = 5, test_size = 0.3, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None):
data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1) data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1)
model = sklearn.neighbors.KNeighborsRegressor(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs) model = sklearn.neighbors.KNeighborsRegressor(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs)
model.fit(data_train, outputs_train) model.fit(data_train, outputs_train)
predictions = model.predict(data_test) predictions = model.predict(data_test)
return model, RegressionMetric(predictions, outputs_test) return model, RegressionMetric.RegressionMetric(predictions, outputs_test)

View File

@ -16,13 +16,17 @@ __author__ = (
) )
__all__ = [ __all__ = [
'gaussian',
'multinomial'
'bernoulli',
'complement'
] ]
import sklearn import sklearn
from sklearn import model_selection, naive_bayes from sklearn import model_selection, naive_bayes
from . import ClassificationMetric, RegressionMetric from . import ClassificationMetric, RegressionMetric
def guassian(data, labels, test_size = 0.3, priors = None, var_smoothing = 1e-09): def gaussian(data, labels, test_size = 0.3, priors = None, var_smoothing = 1e-09):
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
model = sklearn.naive_bayes.GaussianNB(priors = priors, var_smoothing = var_smoothing) model = sklearn.naive_bayes.GaussianNB(priors = priors, var_smoothing = var_smoothing)

View File

@ -39,4 +39,4 @@ def random_forest_regressor(data, outputs, test_size, n_estimators, criterion="m
kernel.fit(data_train, outputs_train) kernel.fit(data_train, outputs_train)
predictions = kernel.predict(data_test) predictions = kernel.predict(data_test)
return kernel, RegressionMetric(predictions, outputs_test) return kernel, RegressionMetric.RegressionMetric(predictions, outputs_test)

View File

@ -16,8 +16,10 @@ __author__ = (
) )
__all__ = [ __all__ = [
'RegressionMetric'
] ]
import numpy as np
import sklearn import sklearn
from sklearn import metrics from sklearn import metrics
@ -37,4 +39,4 @@ class RegressionMetric():
def rms(self, predictions, targets): def rms(self, predictions, targets):
return math.sqrt(sklearn.metrics.mean_squared_error(targets, predictions)) return np.sqrt(sklearn.metrics.mean_squared_error(targets, predictions))

View File

@ -4,9 +4,12 @@
# this should be imported as a python module using 'from tra_analysis import SVM' # this should be imported as a python module using 'from tra_analysis import SVM'
# setup: # setup:
__version__ = "1.0.0" __version__ = "1.0.1"
__changelog__ = """changelog: __changelog__ = """changelog:
1.0.1:
- removed unessasary self calls
- removed classness
1.0.0: 1.0.0:
- ported analysis.SVM() here - ported analysis.SVM() here
""" """
@ -22,21 +25,19 @@ import sklearn
from sklearn import svm from sklearn import svm
from . import ClassificationMetric, RegressionMetric from . import ClassificationMetric, RegressionMetric
class SVM: class CustomKernel:
class CustomKernel:
def __new__(cls, C, kernel, degre, gamma, coef0, shrinking, probability, tol, cache_size, class_weight, verbose, max_iter, decision_function_shape, random_state): def __new__(cls, C, kernel, degre, gamma, coef0, shrinking, probability, tol, cache_size, class_weight, verbose, max_iter, decision_function_shape, random_state):
return sklearn.svm.SVC(C = C, kernel = kernel, gamma = gamma, coef0 = coef0, shrinking = shrinking, probability = probability, tol = tol, cache_size = cache_size, class_weight = class_weight, verbose = verbose, max_iter = max_iter, decision_function_shape = decision_function_shape, random_state = random_state) return sklearn.svm.SVC(C = C, kernel = kernel, gamma = gamma, coef0 = coef0, shrinking = shrinking, probability = probability, tol = tol, cache_size = cache_size, class_weight = class_weight, verbose = verbose, max_iter = max_iter, decision_function_shape = decision_function_shape, random_state = random_state)
class StandardKernel: class StandardKernel:
def __new__(cls, kernel, C=1.0, degree=3, gamma='auto_deprecated', coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, decision_function_shape='ovr', random_state=None): def __new__(cls, kernel, C=1.0, degree=3, gamma='auto_deprecated', coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, decision_function_shape='ovr', random_state=None):
return sklearn.svm.SVC(C = C, kernel = kernel, gamma = gamma, coef0 = coef0, shrinking = shrinking, probability = probability, tol = tol, cache_size = cache_size, class_weight = class_weight, verbose = verbose, max_iter = max_iter, decision_function_shape = decision_function_shape, random_state = random_state) return sklearn.svm.SVC(C = C, kernel = kernel, gamma = gamma, coef0 = coef0, shrinking = shrinking, probability = probability, tol = tol, cache_size = cache_size, class_weight = class_weight, verbose = verbose, max_iter = max_iter, decision_function_shape = decision_function_shape, random_state = random_state)
class PrebuiltKernel: class PrebuiltKernel:
class Linear: class Linear:
@ -62,17 +63,17 @@ class SVM:
return sklearn.svm.SVC(kernel = 'sigmoid', coef0 = r_bias) return sklearn.svm.SVC(kernel = 'sigmoid', coef0 = r_bias)
def fit(self, kernel, train_data, train_outputs): # expects *2d data, 1d labels or outputs def fit(kernel, train_data, train_outputs): # expects *2d data, 1d labels or outputs
return kernel.fit(train_data, train_outputs) return kernel.fit(train_data, train_outputs)
def eval_classification(self, kernel, test_data, test_outputs): def eval_classification(kernel, test_data, test_outputs):
predictions = kernel.predict(test_data) predictions = kernel.predict(test_data)
return ClassificationMetric(predictions, test_outputs) return ClassificationMetric(predictions, test_outputs)
def eval_regression(self, kernel, test_data, test_outputs): def eval_regression(kernel, test_data, test_outputs):
predictions = kernel.predict(test_data) predictions = kernel.predict(test_data)

View File

@ -1,5 +1,5 @@
# Titan Robotics Team 2022: Sort submodule # Titan Robotics Team 2022: Sort submodule
# Written by Arthur Lu # Written by Arthur Lu and James Pan
# Notes: # Notes:
# this should be imported as a python module using 'from tra_analysis import Sort' # this should be imported as a python module using 'from tra_analysis import Sort'
# setup: # setup:
@ -14,11 +14,14 @@ __changelog__ = """changelog:
__author__ = ( __author__ = (
"Arthur Lu <learthurgo@gmail.com>", "Arthur Lu <learthurgo@gmail.com>",
"James Pan <zpan@imsa.edu>"
) )
__all__ = [ __all__ = [
] ]
import numpy as np
def quicksort(a): def quicksort(a):
def sort(array): def sort(array):

View File

@ -4,9 +4,11 @@
# this should be imported as a python module using 'from tra_analysis import StatisticalTest' # this should be imported as a python module using 'from tra_analysis import StatisticalTest'
# setup: # setup:
__version__ = "1.0.0" __version__ = "1.0.1"
__changelog__ = """changelog: __changelog__ = """changelog:
1.0.1:
- fixed typo in __all__
1.0.0: 1.0.0:
- ported analysis.StatisticalTest() here - ported analysis.StatisticalTest() here
- removed classness - removed classness
@ -17,6 +19,39 @@ __author__ = (
) )
__all__ = [ __all__ = [
'ttest_onesample',
'ttest_independent',
'ttest_statistic',
'ttest_related',
'ks_fitness',
'chisquare',
'powerdivergence'
'ks_twosample',
'es_twosample',
'mw_rank',
'mw_tiecorrection',
'rankdata',
'wilcoxon_ranksum',
'wilcoxon_signedrank',
'kw_htest',
'friedman_chisquare',
'bm_wtest',
'combine_pvalues',
'jb_fitness',
'ab_equality',
'bartlett_variance',
'levene_variance',
'sw_normality',
'shapiro',
'ad_onesample',
'ad_ksample',
'binomial',
'fk_variance',
'mood_mediantest',
'mood_equalscale',
'skewtest',
'kurtosistest',
'normaltest'
] ]
import scipy import scipy

View File

@ -1,5 +1,5 @@
# Titan Robotics Team 2022: tra_analysis package # Titan Robotics Team 2022: tra_analysis package
# Written by Arthur Lu, Jacob Levine, and Dev Singh # Written by Arthur Lu, Jacob Levine, Dev Singh, and James Pan
# Notes: # Notes:
# this should be imported as a python package using 'import tra_analysis' # this should be imported as a python package using 'import tra_analysis'
# this should be included in the local directory or environment variable # this should be included in the local directory or environment variable
@ -7,10 +7,14 @@
# current benchmark of optimization: 1.33 times faster # current benchmark of optimization: 1.33 times faster
# setup: # setup:
__version__ = "2.1.0-alpha.1" __version__ = "2.1.0-alpha.3"
# changelog should be viewed using print(analysis.__changelog__) # changelog should be viewed using print(analysis.__changelog__)
__changelog__ = """changelog: __changelog__ = """changelog:
2.1.0-alpha.3:
- fixed indentation in meta data
2.1.0-alpha.2:
- updated SVM import
2.1.0-alpha.1: 2.1.0-alpha.1:
- moved multiple submodules under analysis to their own modules/files - moved multiple submodules under analysis to their own modules/files
- added header, __version__, __changelog__, __author__, __all__ (unpopulated) - added header, __version__, __changelog__, __author__, __all__ (unpopulated)
@ -20,6 +24,7 @@ __author__ = (
"Arthur Lu <learthurgo@gmail.com>", "Arthur Lu <learthurgo@gmail.com>",
"Jacob Levine <jlevine@imsa.edu>", "Jacob Levine <jlevine@imsa.edu>",
"Dev Singh <dev@devksingh.com>", "Dev Singh <dev@devksingh.com>",
"James Pan <zpan@imsa.edu>"
) )
__all__ = [ __all__ = [
@ -37,4 +42,4 @@ from . import RandomForest
from .RegressionMetric import RegressionMetric from .RegressionMetric import RegressionMetric
from . import Sort from . import Sort
from . import StatisticalTest from . import StatisticalTest
from .SVM import SVM from . import SVM

46
data-analysis/design.kv Normal file
View File

@ -0,0 +1,46 @@
<HomeScreen>:
GridLayout:
cols: 1
GridLayout:
cols: 1
padding: 15, 15
spacing: 20, 20
Label:
text: "User Login"
font_size: "20sp"
TextInput:
id: username
hint_text: "Username"
TextInput:
id: password
password: True
hint_text: "Password"
RelativeLayout:
Button:
text: "Login"
on_press: root.login(root.ids.username.text, root.ids.password.text)
size_hint: 0.3, 0.5
pos_hint: {"center_x": 0.5, "center_y": 0.6}
Label:
id: login_wrong
text: ""
GridLayout:
cols: 2
size_hint: 0.2, 0.2
padding: 10, 10
spacing: 10, 0
Button:
text: "Forgot Password?"
background_color: 1, 1, 1, 0
opacity: 1 if self.state == "normal" else 0.5
color: 0.1, 0.7, 1, 1
Button:
text: "Sign Up"
on_press: root.sign_up()
background_color: 1, 1, 1 , 0
opacity: 1 if self.state == "normal" else 0.5
color: 0.1, 0.7, 1, 1
<RootWidget>:
HomeScreen:
name: "home_screen"

41
data-analysis/main.py Normal file
View File

@ -0,0 +1,41 @@
from kivy.app import App
from kivy.lang import Builder
from kivy.uix.screenmanager import ScreenManager , Screen
from kivy.animation import Animation
from hoverable import HoverBehavior
from kivy.uix.image import Image
from kivy.uix.behaviors import ButtonBehavior
import json
from datetime import datetime
import glob
from pathlib import Path
import random
import superscript as ss
Builder.load_file('design.kv')
class HomeScreen(Screen):
# def sign_up(self):
# self.manager.transition.direction = "left"
# self.manager.current = "sign_up_screen"
# def login(self, uname, pword):
# with open ("users.json") as file:
# users = json.load(file)
# if uname in users and users[uname]["password"] == pword:
# self.manager.transition.direction = "left"
# self.manager.current = "login_screen_success"
# else:
# self.ids.login_wrong.text = "Incorrect Username or Password"
class RootWidget(ScreenManager):
pass
class MainApp(App):
def build(self):
return RootWidget()
if __name__ == "__main__":
MainApp().run()