mirror of
https://github.com/titanscouting/tra-analysis.git
synced 2024-11-10 06:54:44 +00:00
tests: New unit tests for submoduling (#66)
* feat: created kivy gui boilerplate * migrated docker base image to debian Signed-off-by: ltcptgeneral <learthurgo@gmail.com> * migrated to ubuntu Signed-off-by: ltcptgeneral <learthurgo@gmail.com> * fixed issues Signed-off-by: ltcptgeneral <learthurgo@gmail.com> * fix: docker build? * fix: use ubuntu bionic * fix: get kivy installed * @ltcptgeneral can't spell * optim dockerfile for not installing unused packages * install basic stuff while building the container * use prebuilt image for development * install pylint on base image * rename and use new kivy * tests: added tests for Array and CorrelationTest Both are not working due to errors * fix: Array no longer has *args and CorrelationTest functions no longer have self in the arguments * use new thing * use 20.04 base * symlink pip3 to pip * use pip instead of pip3 * tra_analysis v 2.1.0-alpha.2 SVM v 1.0.1 added unvalidated SVM unit tests Signed-off-by: ltcptgeneral <learthurgo@gmail.com> * fixed version number Signed-off-by: ltcptgeneral <learthurgo@gmail.com> * tests: added tests for ClassificationMetric * partially fixed and commented out svm unit tests * fixed some SVM unit tests * added installing pytest to devcontainer.json * fix: small fixes to KNN Namely, removing self from parameters and passing correct arguments to KNeighborsClassifier constructor * fix, test: Added tests for KNN and NaiveBayes. Also made some small fixes in KNN, NaiveBayes, and RegressionMetric * test: finished unit tests except for StatisticalTest Also made various small fixes and style changes * StatisticalTest v 1.0.1 * fixed RegressionMetric unit test temporarily disabled CorrelationTest unit tests * tra_analysis v 2.1.0-alpha.3 * readded __all__ * fix: floating point issues in unit tests for CorrelationTest Co-authored-by: AGawde05 <agawde05@gmail.com> Co-authored-by: ltcptgeneral <learthurgo@gmail.com> Co-authored-by: Dev Singh <dev@devksingh.com> Co-authored-by: jzpan1 <panzhenyu2014@gmail.com>
This commit is contained in:
parent
f111c1c865
commit
f72d8457a7
@ -1,2 +1,7 @@
|
||||
FROM python:3.8
|
||||
WORKDIR ~/
|
||||
FROM ubuntu:20.04
|
||||
WORKDIR /
|
||||
RUN apt-get -y update
|
||||
RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata
|
||||
RUN apt-get install -y python3 python3-dev git python3-pip python3-kivy python-is-python3 libgl1-mesa-dev build-essential
|
||||
RUN ln -s $(which pip3) /usr/bin/pip
|
||||
RUN pip install pymongo pandas numpy scipy scikit-learn matplotlib pylint kivy
|
2
.devcontainer/dev-dockerfile
Normal file
2
.devcontainer/dev-dockerfile
Normal file
@ -0,0 +1,2 @@
|
||||
FROM titanscout2022/tra-analysis-base:latest
|
||||
WORKDIR /
|
@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "TRA Analysis Development Environment",
|
||||
"build": {
|
||||
"dockerfile": "Dockerfile",
|
||||
"dockerfile": "dev-dockerfile",
|
||||
},
|
||||
"settings": {
|
||||
"terminal.integrated.shell.linux": "/bin/bash",
|
||||
@ -24,5 +24,5 @@
|
||||
"ms-python.python",
|
||||
"waderyan.gitblame"
|
||||
],
|
||||
"postCreateCommand": "apt install vim -y ; pip install -r data-analysis/requirements.txt ; pip install -r analysis-master/requirements.txt ; pip install --no-cache-dir pylint ; pip install --no-cache-dir tra-analysis"
|
||||
"postCreateCommand": "/usr/bin/pip3 install -r /workspaces/red-alliance-analysis/data-analysis/requirements.txt && /usr/bin/pip3 install -r /workspaces/red-alliance-analysis/analysis-master/requirements.txt && /usr/bin/pip3 install --no-cache-dir pylint && pip3 install pytest"
|
||||
}
|
@ -1,3 +1,7 @@
|
||||
import numpy as np
|
||||
import sklearn
|
||||
from sklearn import metrics
|
||||
|
||||
from tra_analysis import Analysis as an
|
||||
from tra_analysis import Array
|
||||
from tra_analysis import ClassificationMetric
|
||||
@ -12,13 +16,27 @@ from tra_analysis import StatisticalTest
|
||||
from tra_analysis import SVM
|
||||
|
||||
def test_():
|
||||
|
||||
test_data_linear = [1, 3, 6, 7, 9]
|
||||
test_data_linear2 = [2, 2, 5, 7, 13]
|
||||
test_data_array = Array(test_data_linear)
|
||||
|
||||
x_data_circular = []
|
||||
y_data_circular = []
|
||||
|
||||
y_data_ccu = [1, 3, 7, 14, 21]
|
||||
y_data_ccd = [1, 5, 7, 8.5, 8.66]
|
||||
|
||||
test_data_scrambled = [-32, 34, 19, 72, -65, -11, -43, 6, 85, -17, -98, -26, 12, 20, 9, -92, -40, 98, -78, 17, -20, 49, 93, -27, -24, -66, 40, 84, 1, -64, -68, -25, -42, -46, -76, 43, -3, 30, -14, -34, -55, -13, 41, -30, 0, -61, 48, 23, 60, 87, 80, 77, 53, 73, 79, 24, -52, 82, 8, -44, 65, 47, -77, 94, 7, 37, -79, 36, -94, 91, 59, 10, 97, -38, -67, 83, 54, 31, -95, -63, 16, -45, 21, -12, 66, -48, -18, -96, -90, -21, -83, -74, 39, 64, 69, -97, 13, 55, 27, -39]
|
||||
test_data_sorted = [-98, -97, -96, -95, -94, -92, -90, -83, -79, -78, -77, -76, -74, -68, -67, -66, -65, -64, -63, -61, -55, -52, -48, -46, -45, -44, -43, -42, -40, -39, -38, -34, -32, -30, -27, -26, -25, -24, -21, -20, -18, -17, -14, -13, -12, -11, -3, 0, 1, 6, 7, 8, 9, 10, 12, 13, 16, 17, 19, 20, 21, 23, 24, 27, 30, 31, 34, 36, 37, 39, 40, 41, 43, 47, 48, 49, 53, 54, 55, 59, 60, 64, 65, 66, 69, 72, 73, 77, 79, 80, 82, 83, 84, 85, 87, 91, 93, 94, 97, 98]
|
||||
|
||||
test_data_2D_pairs = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
|
||||
test_data_2D_positive = np.array([[23, 51], [21, 32], [15, 25], [17, 31]])
|
||||
test_output = np.array([1, 3, 4, 5])
|
||||
test_labels_2D_pairs = np.array([1, 1, 2, 2])
|
||||
validation_data_2D_pairs = np.array([[-0.8, -1], [0.8, 1.2]])
|
||||
validation_labels_2D_pairs = np.array([1, 2])
|
||||
|
||||
assert an.basic_stats(test_data_linear) == {"mean": 5.2, "median": 6.0, "standard-deviation": 2.85657137141714, "variance": 8.16, "minimum": 1.0, "maximum": 9.0}
|
||||
assert an.z_score(3.2, 6, 1.5) == -1.8666666666666665
|
||||
assert an.z_normalize([test_data_linear], 1).tolist() == [[0.07537783614444091, 0.22613350843332272, 0.45226701686664544, 0.5276448530110863, 0.6784005252999682]]
|
||||
@ -30,6 +48,58 @@ def test_():
|
||||
assert an.Metric().elo(1500, 1500, [1, 0], 400, 24) == 1512.0
|
||||
assert an.Metric().glicko2(1500, 250, 0.06, [1500, 1400], [250, 240], [1, 0]) == (1478.864307445517, 195.99122679202452, 0.05999602937563585)
|
||||
#assert an.Metric().trueskill([[(25, 8.33), (24, 8.25), (32, 7.5)], [(25, 8.33), (25, 8.33), (21, 6.5)]], [1, 0]) == [(metrics.trueskill.Rating(mu=21.346, sigma=7.875), metrics.trueskill.Rating(mu=20.415, sigma=7.808), metrics.trueskill.Rating(mu=29.037, sigma=7.170)), (metrics.trueskill.Rating(mu=28.654, sigma=7.875), metrics.trueskill.Rating(mu=28.654, sigma=7.875), metrics.trueskill.Rating(mu=23.225, sigma=6.287))]
|
||||
|
||||
assert test_data_array.elementwise_mean() == 5.2
|
||||
assert test_data_array.elementwise_median() == 6.0
|
||||
assert test_data_array.elementwise_stdev() == 2.85657137141714
|
||||
assert test_data_array.elementwise_variance() == 8.16
|
||||
assert test_data_array.elementwise_npmin() == 1
|
||||
assert test_data_array.elementwise_npmax() == 9
|
||||
assert test_data_array.elementwise_stats() == (5.2, 6.0, 2.85657137141714, 8.16, 1, 9)
|
||||
|
||||
classif_metric = ClassificationMetric(test_data_linear2, test_data_linear)
|
||||
assert classif_metric[0].all() == metrics.confusion_matrix(test_data_linear, test_data_linear2).all()
|
||||
assert classif_metric[1] == metrics.classification_report(test_data_linear, test_data_linear2)
|
||||
|
||||
assert all(np.isclose(list(CorrelationTest.anova_oneway(test_data_linear, test_data_linear2).values()), [0.05825242718446602, 0.8153507906592907], rtol=1e-10))
|
||||
assert all(np.isclose(list(CorrelationTest.pearson(test_data_linear, test_data_linear2).values()), [0.9153061540753287, 0.02920895440940868], rtol=1e-10))
|
||||
assert all(np.isclose(list(CorrelationTest.spearman(test_data_linear, test_data_linear2).values()), [0.9746794344808964, 0.004818230468198537], rtol=1e-10))
|
||||
assert all(np.isclose(list(CorrelationTest.point_biserial(test_data_linear, test_data_linear2).values()), [0.9153061540753287, 0.02920895440940868], rtol=1e-10))
|
||||
assert all(np.isclose(list(CorrelationTest.kendall(test_data_linear, test_data_linear2).values()), [0.9486832980505137, 0.022977401503206086], rtol=1e-10))
|
||||
assert all(np.isclose(list(CorrelationTest.kendall_weighted(test_data_linear, test_data_linear2).values()), [0.9750538072369643, np.nan], rtol=1e-10, equal_nan=True))
|
||||
|
||||
assert Fit.CircleFit(x=[0,0,-1,1], y=[1, -1, 0, 0]).LSC() == (0.0, 0.0, 1.0, 0.0)
|
||||
|
||||
model, metric = KNN.knn_classifier(test_data_2D_pairs, test_labels_2D_pairs, 2)
|
||||
assert isinstance(model, sklearn.neighbors.KNeighborsClassifier)
|
||||
assert np.array([[0,0], [2,0]]).all() == metric[0].all()
|
||||
assert ' precision recall f1-score support\n\n 1 0.00 0.00 0.00 0.0\n 2 0.00 0.00 0.00 2.0\n\n accuracy 0.00 2.0\n macro avg 0.00 0.00 0.00 2.0\nweighted avg 0.00 0.00 0.00 2.0\n' == metric[1]
|
||||
model, metric = KNN.knn_regressor(test_data_2D_pairs, test_output, 2)
|
||||
assert isinstance(model, sklearn.neighbors.KNeighborsRegressor)
|
||||
assert (-25.0, 6.5, 2.5495097567963922) == metric
|
||||
|
||||
model, metric = NaiveBayes.gaussian(test_data_2D_pairs, test_labels_2D_pairs)
|
||||
assert isinstance(model, sklearn.naive_bayes.GaussianNB)
|
||||
assert metric[0].all() == np.array([[0, 0], [2, 0]]).all()
|
||||
model, metric = NaiveBayes.multinomial(test_data_2D_positive, test_labels_2D_pairs)
|
||||
assert isinstance(model, sklearn.naive_bayes.MultinomialNB)
|
||||
assert metric[0].all() == np.array([[0, 0], [2, 0]]).all()
|
||||
model, metric = NaiveBayes.bernoulli(test_data_2D_pairs, test_labels_2D_pairs)
|
||||
assert isinstance(model, sklearn.naive_bayes.BernoulliNB)
|
||||
assert metric[0].all() == np.array([[0, 0], [2, 0]]).all()
|
||||
model, metric = NaiveBayes.complement(test_data_2D_positive, test_labels_2D_pairs)
|
||||
assert isinstance(model, sklearn.naive_bayes.ComplementNB)
|
||||
assert metric[0].all() == np.array([[0, 0], [2, 0]]).all()
|
||||
|
||||
model, metric = RandomForest.random_forest_classifier(test_data_2D_pairs, test_labels_2D_pairs, 0.3, 2)
|
||||
assert isinstance(model, sklearn.ensemble.RandomForestClassifier)
|
||||
assert metric[0].all() == np.array([[0, 0], [2, 0]]).all()
|
||||
model, metric = RandomForest.random_forest_regressor(test_data_2D_pairs, test_labels_2D_pairs, 0.3, 2)
|
||||
assert isinstance(model, sklearn.ensemble.RandomForestRegressor)
|
||||
assert metric == (0.0, 1.0, 1.0)
|
||||
|
||||
assert RegressionMetric(test_data_linear, test_data_linear2)== (0.7705314009661837, 3.8, 1.9493588689617927)
|
||||
|
||||
assert all(a == b for a, b in zip(Sort.quicksort(test_data_scrambled), test_data_sorted))
|
||||
assert all(a == b for a, b in zip(Sort.mergesort(test_data_scrambled), test_data_sorted))
|
||||
assert all(a == b for a, b in zip(Sort.heapsort(test_data_scrambled), test_data_sorted))
|
||||
@ -41,4 +111,35 @@ def test_():
|
||||
assert all(a == b for a, b in zip(Sort.bubblesort(test_data_scrambled), test_data_sorted))
|
||||
assert all(a == b for a, b in zip(Sort.cyclesort(test_data_scrambled), test_data_sorted))
|
||||
assert all(a == b for a, b in zip(Sort.cocktailsort(test_data_scrambled), test_data_sorted))
|
||||
|
||||
assert Fit.CircleFit(x=[0,0,-1,1], y=[1, -1, 0, 0]).LSC() == (0.0, 0.0, 1.0, 0.0)
|
||||
|
||||
svm(test_data_2D_pairs, test_labels_2D_pairs, validation_data_2D_pairs, validation_labels_2D_pairs)
|
||||
|
||||
def svm(data, labels, test_data, test_labels):
|
||||
|
||||
lin_kernel = SVM.PrebuiltKernel.Linear()
|
||||
#ply_kernel = SVM.PrebuiltKernel.Polynomial(3, 0)
|
||||
rbf_kernel = SVM.PrebuiltKernel.RBF('scale')
|
||||
sig_kernel = SVM.PrebuiltKernel.Sigmoid(0)
|
||||
|
||||
lin_kernel = SVM.fit(lin_kernel, data, labels)
|
||||
#ply_kernel = SVM.fit(ply_kernel, data, labels)
|
||||
rbf_kernel = SVM.fit(rbf_kernel, data, labels)
|
||||
sig_kernel = SVM.fit(sig_kernel, data, labels)
|
||||
|
||||
for i in range(len(test_data)):
|
||||
|
||||
assert lin_kernel.predict([test_data[i]]).tolist() == [test_labels[i]]
|
||||
|
||||
#for i in range(len(test_data)):
|
||||
|
||||
# assert ply_kernel.predict([test_data[i]]).tolist() == [test_labels[i]]
|
||||
|
||||
for i in range(len(test_data)):
|
||||
|
||||
assert rbf_kernel.predict([test_data[i]]).tolist() == [test_labels[i]]
|
||||
|
||||
for i in range(len(test_data)):
|
||||
|
||||
assert sig_kernel.predict([test_data[i]]).tolist() == [test_labels[i]]
|
||||
|
@ -397,7 +397,7 @@ from .RandomForest_obj import RandomForest
|
||||
from .RegressionMetric import RegressionMetric
|
||||
from .Sort_obj import Sort
|
||||
from .StatisticalTest_obj import StatisticalTest
|
||||
from .SVM import SVM
|
||||
from . import SVM
|
||||
|
||||
class error(ValueError):
|
||||
pass
|
||||
|
@ -27,55 +27,37 @@ class Array(): # tests on nd arrays independent of basic_stats
|
||||
|
||||
return str(self.array)
|
||||
|
||||
def elementwise_mean(self, *args, axis = 0): # expects arrays that are size normalized
|
||||
if len(*args) == 0:
|
||||
def elementwise_mean(self, axis = 0): # expects arrays that are size normalized
|
||||
|
||||
return np.mean(self.array, axis = axis)
|
||||
else:
|
||||
return np.mean([*args], axis = axis)
|
||||
|
||||
def elementwise_median(self, *args, axis = 0):
|
||||
def elementwise_median(self, axis = 0):
|
||||
|
||||
if len(*args) == 0:
|
||||
return np.median(self.array, axis = axis)
|
||||
else:
|
||||
return np.median([*args], axis = axis)
|
||||
|
||||
def elementwise_stdev(self, *args, axis = 0):
|
||||
def elementwise_stdev(self, axis = 0):
|
||||
|
||||
if len(*args) == 0:
|
||||
return np.std(self.array, axis = axis)
|
||||
else:
|
||||
return np.std([*args], axis = axis)
|
||||
|
||||
def elementwise_variance(self, *args, axis = 0):
|
||||
def elementwise_variance(self, axis = 0):
|
||||
|
||||
if len(*args) == 0:
|
||||
return np.var(self.array, axis = axis)
|
||||
else:
|
||||
return np.var([*args], axis = axis)
|
||||
|
||||
def elementwise_npmin(self, *args, axis = 0):
|
||||
|
||||
if len(*args) == 0:
|
||||
def elementwise_npmin(self, axis = 0):
|
||||
return np.amin(self.array, axis = axis)
|
||||
else:
|
||||
return np.amin([*args], axis = axis)
|
||||
|
||||
def elementwise_npmax(self, *args, axis = 0):
|
||||
|
||||
if len(*args) == 0:
|
||||
def elementwise_npmax(self, axis = 0):
|
||||
return np.amax(self.array, axis = axis)
|
||||
else:
|
||||
return np.amax([*args], axis = axis)
|
||||
|
||||
def elementwise_stats(self, *args, axis = 0):
|
||||
def elementwise_stats(self, axis = 0):
|
||||
|
||||
_mean = self.elementwise_mean(*args, axis = axis)
|
||||
_median = self.elementwise_median(*args, axis = axis)
|
||||
_stdev = self.elementwise_stdev(*args, axis = axis)
|
||||
_variance = self.elementwise_variance(*args, axis = axis)
|
||||
_min = self.elementwise_npmin(*args, axis = axis)
|
||||
_max = self.elementwise_npmax(*args, axis = axis)
|
||||
_mean = self.elementwise_mean(axis = axis)
|
||||
_median = self.elementwise_median(axis = axis)
|
||||
_stdev = self.elementwise_stdev(axis = axis)
|
||||
_variance = self.elementwise_variance(axis = axis)
|
||||
_min = self.elementwise_npmin(axis = axis)
|
||||
_max = self.elementwise_npmax(axis = axis)
|
||||
|
||||
return _mean, _median, _stdev, _variance, _min, _max
|
||||
|
||||
|
@ -22,37 +22,37 @@ __all__ = [
|
||||
import scipy
|
||||
from scipy import stats
|
||||
|
||||
def anova_oneway(self, *args): #expects arrays of samples
|
||||
def anova_oneway(*args): #expects arrays of samples
|
||||
|
||||
results = scipy.stats.f_oneway(*args)
|
||||
return {"f-value": results[0], "p-value": results[1]}
|
||||
|
||||
def pearson(self, x, y):
|
||||
def pearson(x, y):
|
||||
|
||||
results = scipy.stats.pearsonr(x, y)
|
||||
return {"r-value": results[0], "p-value": results[1]}
|
||||
|
||||
def spearman(self, a, b = None, axis = 0, nan_policy = 'propagate'):
|
||||
def spearman(a, b = None, axis = 0, nan_policy = 'propagate'):
|
||||
|
||||
results = scipy.stats.spearmanr(a, b = b, axis = axis, nan_policy = nan_policy)
|
||||
return {"r-value": results[0], "p-value": results[1]}
|
||||
|
||||
def point_biserial(self, x,y):
|
||||
def point_biserial(x, y):
|
||||
|
||||
results = scipy.stats.pointbiserialr(x, y)
|
||||
return {"r-value": results[0], "p-value": results[1]}
|
||||
|
||||
def kendall(self, x, y, initial_lexsort = None, nan_policy = 'propagate', method = 'auto'):
|
||||
def kendall(x, y, initial_lexsort = None, nan_policy = 'propagate', method = 'auto'):
|
||||
|
||||
results = scipy.stats.kendalltau(x, y, initial_lexsort = initial_lexsort, nan_policy = nan_policy, method = method)
|
||||
return {"tau": results[0], "p-value": results[1]}
|
||||
|
||||
def kendall_weighted(self, x, y, rank = True, weigher = None, additive = True):
|
||||
def kendall_weighted(x, y, rank = True, weigher = None, additive = True):
|
||||
|
||||
results = scipy.stats.weightedtau(x, y, rank = rank, weigher = weigher, additive = additive)
|
||||
return {"tau": results[0], "p-value": results[1]}
|
||||
|
||||
def mgc(self, x, y, compute_distance = None, reps = 1000, workers = 1, is_twosamp = False, random_state = None):
|
||||
def mgc(x, y, compute_distance = None, reps = 1000, workers = 1, is_twosamp = False, random_state = None):
|
||||
|
||||
results = scipy.stats.multiscale_graphcorr(x, y, compute_distance = compute_distance, reps = reps, workers = workers, is_twosamp = is_twosamp, random_state = random_state)
|
||||
return {"k-value": results[0], "p-value": results[1], "data": results[2]} # unsure if MGC test returns a k value
|
@ -14,29 +14,32 @@ __changelog__ = """changelog:
|
||||
|
||||
__author__ = (
|
||||
"Arthur Lu <learthurgo@gmail.com>",
|
||||
"James Pan <zpan@imsa.edu>"
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'knn_classifier',
|
||||
'knn_regressor'
|
||||
]
|
||||
|
||||
import sklearn
|
||||
from sklearn import model_selection, neighbors
|
||||
from . import ClassificationMetric, RegressionMetric
|
||||
|
||||
def knn_classifier(self, data, labels, n_neighbors, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling
|
||||
def knn_classifier(data, labels, n_neighbors = 5, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling
|
||||
|
||||
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
|
||||
model = sklearn.neighbors.KNeighborsClassifier()
|
||||
model = sklearn.neighbors.KNeighborsClassifier(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs)
|
||||
model.fit(data_train, labels_train)
|
||||
predictions = model.predict(data_test)
|
||||
|
||||
return model, ClassificationMetric(predictions, labels_test)
|
||||
|
||||
def knn_regressor(self, data, outputs, n_neighbors, test_size = 0.3, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None):
|
||||
def knn_regressor(data, outputs, n_neighbors = 5, test_size = 0.3, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None):
|
||||
|
||||
data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1)
|
||||
model = sklearn.neighbors.KNeighborsRegressor(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs)
|
||||
model.fit(data_train, outputs_train)
|
||||
predictions = model.predict(data_test)
|
||||
|
||||
return model, RegressionMetric(predictions, outputs_test)
|
||||
return model, RegressionMetric.RegressionMetric(predictions, outputs_test)
|
@ -16,13 +16,17 @@ __author__ = (
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'gaussian',
|
||||
'multinomial'
|
||||
'bernoulli',
|
||||
'complement'
|
||||
]
|
||||
|
||||
import sklearn
|
||||
from sklearn import model_selection, naive_bayes
|
||||
from . import ClassificationMetric, RegressionMetric
|
||||
|
||||
def guassian(data, labels, test_size = 0.3, priors = None, var_smoothing = 1e-09):
|
||||
def gaussian(data, labels, test_size = 0.3, priors = None, var_smoothing = 1e-09):
|
||||
|
||||
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
|
||||
model = sklearn.naive_bayes.GaussianNB(priors = priors, var_smoothing = var_smoothing)
|
||||
|
@ -39,4 +39,4 @@ def random_forest_regressor(data, outputs, test_size, n_estimators, criterion="m
|
||||
kernel.fit(data_train, outputs_train)
|
||||
predictions = kernel.predict(data_test)
|
||||
|
||||
return kernel, RegressionMetric(predictions, outputs_test)
|
||||
return kernel, RegressionMetric.RegressionMetric(predictions, outputs_test)
|
@ -16,8 +16,10 @@ __author__ = (
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'RegressionMetric'
|
||||
]
|
||||
|
||||
import numpy as np
|
||||
import sklearn
|
||||
from sklearn import metrics
|
||||
|
||||
@ -37,4 +39,4 @@ class RegressionMetric():
|
||||
|
||||
def rms(self, predictions, targets):
|
||||
|
||||
return math.sqrt(sklearn.metrics.mean_squared_error(targets, predictions))
|
||||
return np.sqrt(sklearn.metrics.mean_squared_error(targets, predictions))
|
@ -4,9 +4,12 @@
|
||||
# this should be imported as a python module using 'from tra_analysis import SVM'
|
||||
# setup:
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__version__ = "1.0.1"
|
||||
|
||||
__changelog__ = """changelog:
|
||||
1.0.1:
|
||||
- removed unessasary self calls
|
||||
- removed classness
|
||||
1.0.0:
|
||||
- ported analysis.SVM() here
|
||||
"""
|
||||
@ -22,8 +25,6 @@ import sklearn
|
||||
from sklearn import svm
|
||||
from . import ClassificationMetric, RegressionMetric
|
||||
|
||||
class SVM:
|
||||
|
||||
class CustomKernel:
|
||||
|
||||
def __new__(cls, C, kernel, degre, gamma, coef0, shrinking, probability, tol, cache_size, class_weight, verbose, max_iter, decision_function_shape, random_state):
|
||||
@ -62,17 +63,17 @@ class SVM:
|
||||
|
||||
return sklearn.svm.SVC(kernel = 'sigmoid', coef0 = r_bias)
|
||||
|
||||
def fit(self, kernel, train_data, train_outputs): # expects *2d data, 1d labels or outputs
|
||||
def fit(kernel, train_data, train_outputs): # expects *2d data, 1d labels or outputs
|
||||
|
||||
return kernel.fit(train_data, train_outputs)
|
||||
|
||||
def eval_classification(self, kernel, test_data, test_outputs):
|
||||
def eval_classification(kernel, test_data, test_outputs):
|
||||
|
||||
predictions = kernel.predict(test_data)
|
||||
|
||||
return ClassificationMetric(predictions, test_outputs)
|
||||
|
||||
def eval_regression(self, kernel, test_data, test_outputs):
|
||||
def eval_regression(kernel, test_data, test_outputs):
|
||||
|
||||
predictions = kernel.predict(test_data)
|
||||
|
||||
|
@ -4,9 +4,11 @@
|
||||
# this should be imported as a python module using 'from tra_analysis import StatisticalTest'
|
||||
# setup:
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__version__ = "1.0.1"
|
||||
|
||||
__changelog__ = """changelog:
|
||||
1.0.1:
|
||||
- fixed typo in __all__
|
||||
1.0.0:
|
||||
- ported analysis.StatisticalTest() here
|
||||
- removed classness
|
||||
@ -17,6 +19,39 @@ __author__ = (
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'ttest_onesample',
|
||||
'ttest_independent',
|
||||
'ttest_statistic',
|
||||
'ttest_related',
|
||||
'ks_fitness',
|
||||
'chisquare',
|
||||
'powerdivergence'
|
||||
'ks_twosample',
|
||||
'es_twosample',
|
||||
'mw_rank',
|
||||
'mw_tiecorrection',
|
||||
'rankdata',
|
||||
'wilcoxon_ranksum',
|
||||
'wilcoxon_signedrank',
|
||||
'kw_htest',
|
||||
'friedman_chisquare',
|
||||
'bm_wtest',
|
||||
'combine_pvalues',
|
||||
'jb_fitness',
|
||||
'ab_equality',
|
||||
'bartlett_variance',
|
||||
'levene_variance',
|
||||
'sw_normality',
|
||||
'shapiro',
|
||||
'ad_onesample',
|
||||
'ad_ksample',
|
||||
'binomial',
|
||||
'fk_variance',
|
||||
'mood_mediantest',
|
||||
'mood_equalscale',
|
||||
'skewtest',
|
||||
'kurtosistest',
|
||||
'normaltest'
|
||||
]
|
||||
|
||||
import scipy
|
||||
|
@ -7,10 +7,14 @@
|
||||
# current benchmark of optimization: 1.33 times faster
|
||||
# setup:
|
||||
|
||||
__version__ = "2.1.0-alpha.1"
|
||||
__version__ = "2.1.0-alpha.3"
|
||||
|
||||
# changelog should be viewed using print(analysis.__changelog__)
|
||||
__changelog__ = """changelog:
|
||||
2.1.0-alpha.3:
|
||||
- fixed indentation in meta data
|
||||
2.1.0-alpha.2:
|
||||
- updated SVM import
|
||||
2.1.0-alpha.1:
|
||||
- moved multiple submodules under analysis to their own modules/files
|
||||
- added header, __version__, __changelog__, __author__, __all__ (unpopulated)
|
||||
@ -37,4 +41,4 @@ from . import RandomForest
|
||||
from .RegressionMetric import RegressionMetric
|
||||
from . import Sort
|
||||
from . import StatisticalTest
|
||||
from .SVM import SVM
|
||||
from . import SVM
|
46
data-analysis/design.kv
Normal file
46
data-analysis/design.kv
Normal file
@ -0,0 +1,46 @@
|
||||
<HomeScreen>:
|
||||
GridLayout:
|
||||
cols: 1
|
||||
GridLayout:
|
||||
cols: 1
|
||||
padding: 15, 15
|
||||
spacing: 20, 20
|
||||
Label:
|
||||
text: "User Login"
|
||||
font_size: "20sp"
|
||||
TextInput:
|
||||
id: username
|
||||
hint_text: "Username"
|
||||
TextInput:
|
||||
id: password
|
||||
password: True
|
||||
hint_text: "Password"
|
||||
RelativeLayout:
|
||||
Button:
|
||||
text: "Login"
|
||||
on_press: root.login(root.ids.username.text, root.ids.password.text)
|
||||
size_hint: 0.3, 0.5
|
||||
pos_hint: {"center_x": 0.5, "center_y": 0.6}
|
||||
Label:
|
||||
id: login_wrong
|
||||
text: ""
|
||||
GridLayout:
|
||||
cols: 2
|
||||
size_hint: 0.2, 0.2
|
||||
padding: 10, 10
|
||||
spacing: 10, 0
|
||||
Button:
|
||||
text: "Forgot Password?"
|
||||
background_color: 1, 1, 1, 0
|
||||
opacity: 1 if self.state == "normal" else 0.5
|
||||
color: 0.1, 0.7, 1, 1
|
||||
Button:
|
||||
text: "Sign Up"
|
||||
on_press: root.sign_up()
|
||||
background_color: 1, 1, 1 , 0
|
||||
opacity: 1 if self.state == "normal" else 0.5
|
||||
color: 0.1, 0.7, 1, 1
|
||||
|
||||
<RootWidget>:
|
||||
HomeScreen:
|
||||
name: "home_screen"
|
41
data-analysis/main.py
Normal file
41
data-analysis/main.py
Normal file
@ -0,0 +1,41 @@
|
||||
from kivy.app import App
|
||||
from kivy.lang import Builder
|
||||
from kivy.uix.screenmanager import ScreenManager , Screen
|
||||
from kivy.animation import Animation
|
||||
from hoverable import HoverBehavior
|
||||
from kivy.uix.image import Image
|
||||
from kivy.uix.behaviors import ButtonBehavior
|
||||
import json
|
||||
from datetime import datetime
|
||||
import glob
|
||||
from pathlib import Path
|
||||
import random
|
||||
|
||||
import superscript as ss
|
||||
|
||||
Builder.load_file('design.kv')
|
||||
|
||||
class HomeScreen(Screen):
|
||||
# def sign_up(self):
|
||||
# self.manager.transition.direction = "left"
|
||||
# self.manager.current = "sign_up_screen"
|
||||
|
||||
# def login(self, uname, pword):
|
||||
# with open ("users.json") as file:
|
||||
# users = json.load(file)
|
||||
# if uname in users and users[uname]["password"] == pword:
|
||||
# self.manager.transition.direction = "left"
|
||||
# self.manager.current = "login_screen_success"
|
||||
# else:
|
||||
# self.ids.login_wrong.text = "Incorrect Username or Password"
|
||||
|
||||
|
||||
class RootWidget(ScreenManager):
|
||||
pass
|
||||
|
||||
class MainApp(App):
|
||||
def build(self):
|
||||
return RootWidget()
|
||||
|
||||
if __name__ == "__main__":
|
||||
MainApp().run()
|
Loading…
Reference in New Issue
Block a user