From e6674e9c05ba11d92f296ba3fed81f7900319b45 Mon Sep 17 00:00:00 2001 From: Arthur Lu Date: Wed, 26 May 2021 07:27:09 +0000 Subject: [PATCH] started branch for analysis v4 dev deprecated old modules --- analysis-master/tra_analysis/Analysis.py | 20 +- analysis-master/tra_analysis/Array.py | 8 +- .../tra_analysis/CorrelationTest_obj.py | 41 -- analysis-master/tra_analysis/KNN_obj.py | 25 -- .../tra_analysis/NaiveBayes_obj.py | 43 -- .../tra_analysis/RandomForest_obj.py | 25 -- analysis-master/tra_analysis/Sort_obj.py | 391 ------------------ .../tra_analysis/StatisticalTest_obj.py | 170 -------- analysis-master/tra_analysis/__init__.py | 7 +- .../tra_analysis/regression_old.py | 222 ---------- analysis-master/tra_analysis/titanlearn.py | 122 ------ analysis-master/tra_analysis/visualization.py | 58 --- 12 files changed, 21 insertions(+), 1111 deletions(-) delete mode 100644 analysis-master/tra_analysis/CorrelationTest_obj.py delete mode 100644 analysis-master/tra_analysis/KNN_obj.py delete mode 100644 analysis-master/tra_analysis/NaiveBayes_obj.py delete mode 100644 analysis-master/tra_analysis/RandomForest_obj.py delete mode 100644 analysis-master/tra_analysis/Sort_obj.py delete mode 100644 analysis-master/tra_analysis/StatisticalTest_obj.py delete mode 100644 analysis-master/tra_analysis/regression_old.py delete mode 100644 analysis-master/tra_analysis/titanlearn.py delete mode 100644 analysis-master/tra_analysis/visualization.py diff --git a/analysis-master/tra_analysis/Analysis.py b/analysis-master/tra_analysis/Analysis.py index 7bf68c8d..f891393e 100644 --- a/analysis-master/tra_analysis/Analysis.py +++ b/analysis-master/tra_analysis/Analysis.py @@ -7,10 +7,12 @@ # current benchmark of optimization: 1.33 times faster # setup: -__version__ = "3.0.2" +__version__ = "3.0.3" # changelog should be viewed using print(analysis.__changelog__) __changelog__ = """changelog: + 3.0.3: + - fixed spelling of deprecate 3.0.2: - fixed __all__ 3.0.1: @@ -58,7 +60,7 @@ __changelog__ = """changelog: - cycle sort - cocktail sort - tested all sorting algorithms with both lists and numpy arrays - - depreciated sort function from Array class + - deprecated sort function from Array class - added warnings as an import 2.1.4: - added sort and search functions to Array class @@ -136,7 +138,7 @@ __changelog__ = """changelog: 1.12.4: - renamed gliko to glicko 1.12.3: - - removed depreciated code + - removed deprecated code 1.12.2: - removed team first time trueskill instantiation in favor of integration in superscript.py 1.12.1: @@ -248,10 +250,10 @@ __changelog__ = """changelog: 1.0.0: - removed c_entities,nc_entities,obstacles,objectives from __all__ - applied numba.jit to all functions - - depreciated and removed stdev_z_split + - deprecated and removed stdev_z_split - cleaned up histo_analysis to include numpy and numba.jit optimizations - - depreciated and removed all regression functions in favor of future pytorch optimizer - - depreciated and removed all nonessential functions (basic_analysis, benchmark, strip_data) + - deprecated and removed all regression functions in favor of future pytorch optimizer + - deprecated and removed all nonessential functions (basic_analysis, benchmark, strip_data) - optimized z_normalize using sklearn.preprocessing.normalize - TODO: implement kernel/function based pytorch regression optimizer 0.9.0: @@ -270,8 +272,8 @@ __changelog__ = """changelog: - refactors - bugfixes 0.8.0: - - depreciated histo_analysis_old - - depreciated debug + - deprecated histo_analysis_old + - deprecated debug - altered basic_analysis to take array data instead of filepath - refactor - optimization @@ -319,7 +321,7 @@ __changelog__ = """changelog: 0.3.5: - major bug fixes - updated historical analysis - - depreciated old historical analysis + - deprecated old historical analysis 0.3.4: - added __version__, __author__, __all__ - added polynomial regression diff --git a/analysis-master/tra_analysis/Array.py b/analysis-master/tra_analysis/Array.py index 1699c3c4..bfd39ce8 100644 --- a/analysis-master/tra_analysis/Array.py +++ b/analysis-master/tra_analysis/Array.py @@ -4,9 +4,11 @@ # this should be imported as a python module using 'from tra_analysis import Array' # setup: -__version__ = "1.0.3" +__version__ = "1.0.4" __changelog__ = """changelog: + 1.0.4: + - fixed spelling of deprecate 1.0.3: - fixed __all__ 1.0.2: @@ -135,8 +137,8 @@ class Array(): # tests on nd arrays independent of basic_stats return Array(np.transpose(self.array)) - def sort(self, array): # depreciated - warnings.warn("Array.sort has been depreciated in favor of Sort") + def sort(self, array): # deprecated + warnings.warn("Array.sort has been deprecated in favor of Sort") array_length = len(array) if array_length <= 1: return array diff --git a/analysis-master/tra_analysis/CorrelationTest_obj.py b/analysis-master/tra_analysis/CorrelationTest_obj.py deleted file mode 100644 index 16168094..00000000 --- a/analysis-master/tra_analysis/CorrelationTest_obj.py +++ /dev/null @@ -1,41 +0,0 @@ -# Only included for backwards compatibility! Do not update, CorrelationTest is preferred and supported. - -import scipy -from scipy import stats - -class CorrelationTest: - - def anova_oneway(self, *args): #expects arrays of samples - - results = scipy.stats.f_oneway(*args) - return {"f-value": results[0], "p-value": results[1]} - - def pearson(self, x, y): - - results = scipy.stats.pearsonr(x, y) - return {"r-value": results[0], "p-value": results[1]} - - def spearman(self, a, b = None, axis = 0, nan_policy = 'propagate'): - - results = scipy.stats.spearmanr(a, b = b, axis = axis, nan_policy = nan_policy) - return {"r-value": results[0], "p-value": results[1]} - - def point_biserial(self, x,y): - - results = scipy.stats.pointbiserialr(x, y) - return {"r-value": results[0], "p-value": results[1]} - - def kendall(self, x, y, initial_lexsort = None, nan_policy = 'propagate', method = 'auto'): - - results = scipy.stats.kendalltau(x, y, initial_lexsort = initial_lexsort, nan_policy = nan_policy, method = method) - return {"tau": results[0], "p-value": results[1]} - - def kendall_weighted(self, x, y, rank = True, weigher = None, additive = True): - - results = scipy.stats.weightedtau(x, y, rank = rank, weigher = weigher, additive = additive) - return {"tau": results[0], "p-value": results[1]} - - def mgc(self, x, y, compute_distance = None, reps = 1000, workers = 1, is_twosamp = False, random_state = None): - - results = scipy.stats.multiscale_graphcorr(x, y, compute_distance = compute_distance, reps = reps, workers = workers, is_twosamp = is_twosamp, random_state = random_state) - return {"k-value": results[0], "p-value": results[1], "data": results[2]} # unsure if MGC test returns a k value \ No newline at end of file diff --git a/analysis-master/tra_analysis/KNN_obj.py b/analysis-master/tra_analysis/KNN_obj.py deleted file mode 100644 index b506bf6c..00000000 --- a/analysis-master/tra_analysis/KNN_obj.py +++ /dev/null @@ -1,25 +0,0 @@ -# Only included for backwards compatibility! Do not update, NaiveBayes is preferred and supported. - -import sklearn -from sklearn import model_selection, neighbors -from . import ClassificationMetric, RegressionMetric - -class KNN: - - def knn_classifier(self, data, labels, n_neighbors, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling - - data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) - model = sklearn.neighbors.KNeighborsClassifier() - model.fit(data_train, labels_train) - predictions = model.predict(data_test) - - return model, ClassificationMetric(predictions, labels_test) - - def knn_regressor(self, data, outputs, n_neighbors, test_size = 0.3, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None): - - data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1) - model = sklearn.neighbors.KNeighborsRegressor(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs) - model.fit(data_train, outputs_train) - predictions = model.predict(data_test) - - return model, RegressionMetric(predictions, outputs_test) \ No newline at end of file diff --git a/analysis-master/tra_analysis/NaiveBayes_obj.py b/analysis-master/tra_analysis/NaiveBayes_obj.py deleted file mode 100644 index 70e4d437..00000000 --- a/analysis-master/tra_analysis/NaiveBayes_obj.py +++ /dev/null @@ -1,43 +0,0 @@ -# Only included for backwards compatibility! Do not update, NaiveBayes is preferred and supported. - -import sklearn -from sklearn import model_selection, naive_bayes -from . import ClassificationMetric, RegressionMetric - -class NaiveBayes: - - def guassian(self, data, labels, test_size = 0.3, priors = None, var_smoothing = 1e-09): - - data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) - model = sklearn.naive_bayes.GaussianNB(priors = priors, var_smoothing = var_smoothing) - model.fit(data_train, labels_train) - predictions = model.predict(data_test) - - return model, ClassificationMetric(predictions, labels_test) - - def multinomial(self, data, labels, test_size = 0.3, alpha=1.0, fit_prior=True, class_prior=None): - - data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) - model = sklearn.naive_bayes.MultinomialNB(alpha = alpha, fit_prior = fit_prior, class_prior = class_prior) - model.fit(data_train, labels_train) - predictions = model.predict(data_test) - - return model, ClassificationMetric(predictions, labels_test) - - def bernoulli(self, data, labels, test_size = 0.3, alpha=1.0, binarize=0.0, fit_prior=True, class_prior=None): - - data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) - model = sklearn.naive_bayes.BernoulliNB(alpha = alpha, binarize = binarize, fit_prior = fit_prior, class_prior = class_prior) - model.fit(data_train, labels_train) - predictions = model.predict(data_test) - - return model, ClassificationMetric(predictions, labels_test) - - def complement(self, data, labels, test_size = 0.3, alpha=1.0, fit_prior=True, class_prior=None, norm=False): - - data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) - model = sklearn.naive_bayes.ComplementNB(alpha = alpha, fit_prior = fit_prior, class_prior = class_prior, norm = norm) - model.fit(data_train, labels_train) - predictions = model.predict(data_test) - - return model, ClassificationMetric(predictions, labels_test) \ No newline at end of file diff --git a/analysis-master/tra_analysis/RandomForest_obj.py b/analysis-master/tra_analysis/RandomForest_obj.py deleted file mode 100644 index c33f8dce..00000000 --- a/analysis-master/tra_analysis/RandomForest_obj.py +++ /dev/null @@ -1,25 +0,0 @@ -# Only included for backwards compatibility! Do not update, RandomForest is preferred and supported. - -import sklearn -from sklearn import ensemble, model_selection -from . import ClassificationMetric, RegressionMetric - -class RandomForest: - - def random_forest_classifier(self, data, labels, test_size, n_estimators, criterion="gini", max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features="auto", max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False, class_weight=None): - - data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) - kernel = sklearn.ensemble.RandomForestClassifier(n_estimators = n_estimators, criterion = criterion, max_depth = max_depth, min_samples_split = min_samples_split, min_samples_leaf = min_samples_leaf, min_weight_fraction_leaf = min_weight_fraction_leaf, max_leaf_nodes = max_leaf_nodes, min_impurity_decrease = min_impurity_decrease, bootstrap = bootstrap, oob_score = oob_score, n_jobs = n_jobs, random_state = random_state, verbose = verbose, warm_start = warm_start, class_weight = class_weight) - kernel.fit(data_train, labels_train) - predictions = kernel.predict(data_test) - - return kernel, ClassificationMetric(predictions, labels_test) - - def random_forest_regressor(self, data, outputs, test_size, n_estimators, criterion="mse", max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features="auto", max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False): - - data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1) - kernel = sklearn.ensemble.RandomForestRegressor(n_estimators = n_estimators, criterion = criterion, max_depth = max_depth, min_samples_split = min_samples_split, min_weight_fraction_leaf = min_weight_fraction_leaf, max_features = max_features, max_leaf_nodes = max_leaf_nodes, min_impurity_decrease = min_impurity_decrease, min_impurity_split = min_impurity_split, bootstrap = bootstrap, oob_score = oob_score, n_jobs = n_jobs, random_state = random_state, verbose = verbose, warm_start = warm_start) - kernel.fit(data_train, outputs_train) - predictions = kernel.predict(data_test) - - return kernel, RegressionMetric(predictions, outputs_test) \ No newline at end of file diff --git a/analysis-master/tra_analysis/Sort_obj.py b/analysis-master/tra_analysis/Sort_obj.py deleted file mode 100644 index b80b4760..00000000 --- a/analysis-master/tra_analysis/Sort_obj.py +++ /dev/null @@ -1,391 +0,0 @@ -# Only included for backwards compatibility! Do not update, Sort is preferred and supported. - -class Sort: # if you haven't used a sort, then you've never lived - - def quicksort(self, a): - - def sort(array): - - less = [] - equal = [] - greater = [] - - if len(array) > 1: - pivot = array[0] - for x in array: - if x < pivot: - less.append(x) - elif x == pivot: - equal.append(x) - elif x > pivot: - greater.append(x) - return sort(less)+equal+sort(greater) - else: - return array - - return np.array(sort(a)) - - def mergesort(self, a): - - def sort(array): - - array = array - - if len(array) >1: - middle = len(array) // 2 - L = array[:middle] - R = array[middle:] - - sort(L) - sort(R) - - i = j = k = 0 - - while i < len(L) and j < len(R): - if L[i] < R[j]: - array[k] = L[i] - i+= 1 - else: - array[k] = R[j] - j+= 1 - k+= 1 - - while i < len(L): - array[k] = L[i] - i+= 1 - k+= 1 - - while j < len(R): - array[k] = R[j] - j+= 1 - k+= 1 - - return array - - return sort(a) - - def introsort(self, a): - - def sort(array, start, end, maxdepth): - - array = array - - if end - start <= 1: - return - elif maxdepth == 0: - heapsort(array, start, end) - else: - p = partition(array, start, end) - sort(array, start, p + 1, maxdepth - 1) - sort(array, p + 1, end, maxdepth - 1) - - return array - - def partition(array, start, end): - pivot = array[start] - i = start - 1 - j = end - - while True: - i = i + 1 - while array[i] < pivot: - i = i + 1 - j = j - 1 - while array[j] > pivot: - j = j - 1 - - if i >= j: - return j - - swap(array, i, j) - - def swap(array, i, j): - array[i], array[j] = array[j], array[i] - - def heapsort(array, start, end): - build_max_heap(array, start, end) - for i in range(end - 1, start, -1): - swap(array, start, i) - max_heapify(array, index=0, start=start, end=i) - - def build_max_heap(array, start, end): - def parent(i): - return (i - 1)//2 - length = end - start - index = parent(length - 1) - while index >= 0: - max_heapify(array, index, start, end) - index = index - 1 - - def max_heapify(array, index, start, end): - def left(i): - return 2*i + 1 - def right(i): - return 2*i + 2 - - size = end - start - l = left(index) - r = right(index) - if (l < size and array[start + l] > array[start + index]): - largest = l - else: - largest = index - if (r < size and array[start + r] > array[start + largest]): - largest = r - if largest != index: - swap(array, start + largest, start + index) - max_heapify(array, largest, start, end) - - maxdepth = (len(a).bit_length() - 1)*2 - - return sort(a, 0, len(a), maxdepth) - - def heapsort(self, a): - - def sort(array): - - array = array - - n = len(array) - - for i in range(n//2 - 1, -1, -1): - heapify(array, n, i) - - for i in range(n-1, 0, -1): - array[i], array[0] = array[0], array[i] - heapify(array, i, 0) - - return array - - def heapify(array, n, i): - - array = array - - largest = i - l = 2 * i + 1 - r = 2 * i + 2 - - if l < n and array[i] < array[l]: - largest = l - - if r < n and array[largest] < array[r]: - largest = r - - if largest != i: - array[i],array[largest] = array[largest],array[i] - heapify(array, n, largest) - - return array - - return sort(a) - - def insertionsort(self, a): - - def sort(array): - - array = array - - for i in range(1, len(array)): - - key = array[i] - - j = i-1 - while j >=0 and key < array[j] : - array[j+1] = array[j] - j -= 1 - array[j+1] = key - - return array - - return sort(a) - - def timsort(self, a, block = 32): - - BLOCK = block - - def sort(array, n): - - array = array - - for i in range(0, n, BLOCK): - insertionsort(array, i, min((i+31), (n-1))) - - size = BLOCK - while size < n: - - for left in range(0, n, 2*size): - - mid = left + size - 1 - right = min((left + 2*size - 1), (n-1)) - merge(array, left, mid, right) - - size = 2*size - - return array - - def insertionsort(array, left, right): - - array = array - - for i in range(left + 1, right+1): - - temp = array[i] - j = i - 1 - while j >= left and array[j] > temp : - - array[j+1] = array[j] - j -= 1 - - array[j+1] = temp - - return array - - - def merge(array, l, m, r): - - len1, len2 = m - l + 1, r - m - left, right = [], [] - for i in range(0, len1): - left.append(array[l + i]) - for i in range(0, len2): - right.append(array[m + 1 + i]) - - i, j, k = 0, 0, l - - while i < len1 and j < len2: - - if left[i] <= right[j]: - array[k] = left[i] - i += 1 - - else: - array[k] = right[j] - j += 1 - - k += 1 - - while i < len1: - - array[k] = left[i] - k += 1 - i += 1 - - while j < len2: - array[k] = right[j] - k += 1 - j += 1 - - return sort(a, len(a)) - - def selectionsort(self, a): - array = a - for i in range(len(array)): - min_idx = i - for j in range(i+1, len(array)): - if array[min_idx] > array[j]: - min_idx = j - array[i], array[min_idx] = array[min_idx], array[i] - return array - - def shellsort(self, a): - array = a - n = len(array) - gap = n//2 - - while gap > 0: - - for i in range(gap,n): - - temp = array[i] - j = i - while j >= gap and array[j-gap] >temp: - array[j] = array[j-gap] - j -= gap - array[j] = temp - gap //= 2 - - return array - - def bubblesort(self, a): - - def sort(array): - for i, num in enumerate(array): - try: - if array[i+1] < num: - array[i] = array[i+1] - array[i+1] = num - sort(array) - except IndexError: - pass - return array - - return sort(a) - - def cyclesort(self, a): - - def sort(array): - - array = array - writes = 0 - - for cycleStart in range(0, len(array) - 1): - item = array[cycleStart] - - pos = cycleStart - for i in range(cycleStart + 1, len(array)): - if array[i] < item: - pos += 1 - - if pos == cycleStart: - continue - - while item == array[pos]: - pos += 1 - array[pos], item = item, array[pos] - writes += 1 - - while pos != cycleStart: - - pos = cycleStart - for i in range(cycleStart + 1, len(array)): - if array[i] < item: - pos += 1 - - while item == array[pos]: - pos += 1 - array[pos], item = item, array[pos] - writes += 1 - - return array - - return sort(a) - - def cocktailsort(self, a): - - def sort(array): - - array = array - - n = len(array) - swapped = True - start = 0 - end = n-1 - while (swapped == True): - swapped = False - for i in range (start, end): - if (array[i] > array[i + 1]) : - array[i], array[i + 1]= array[i + 1], array[i] - swapped = True - if (swapped == False): - break - swapped = False - end = end-1 - for i in range(end-1, start-1, -1): - if (array[i] > array[i + 1]): - array[i], array[i + 1] = array[i + 1], array[i] - swapped = True - start = start + 1 - - return array - - return sort(a) \ No newline at end of file diff --git a/analysis-master/tra_analysis/StatisticalTest_obj.py b/analysis-master/tra_analysis/StatisticalTest_obj.py deleted file mode 100644 index b45c2f9f..00000000 --- a/analysis-master/tra_analysis/StatisticalTest_obj.py +++ /dev/null @@ -1,170 +0,0 @@ -# Only included for backwards compatibility! Do not update, StatisticalTest is preferred and supported. - -import scipy -from scipy import stats - -class StatisticalTest: - - def ttest_onesample(self, a, popmean, axis = 0, nan_policy = 'propagate'): - - results = scipy.stats.ttest_1samp(a, popmean, axis = axis, nan_policy = nan_policy) - return {"t-value": results[0], "p-value": results[1]} - - def ttest_independent(self, a, b, equal = True, nan_policy = 'propagate'): - - results = scipy.stats.ttest_ind(a, b, equal_var = equal, nan_policy = nan_policy) - return {"t-value": results[0], "p-value": results[1]} - - def ttest_statistic(self, o1, o2, equal = True): - - results = scipy.stats.ttest_ind_from_stats(o1["mean"], o1["std"], o1["nobs"], o2["mean"], o2["std"], o2["nobs"], equal_var = equal) - return {"t-value": results[0], "p-value": results[1]} - - def ttest_related(self, a, b, axis = 0, nan_policy='propagate'): - - results = scipy.stats.ttest_rel(a, b, axis = axis, nan_policy = nan_policy) - return {"t-value": results[0], "p-value": results[1]} - - def ks_fitness(self, rvs, cdf, args = (), N = 20, alternative = 'two-sided', mode = 'approx'): - - results = scipy.stats.kstest(rvs, cdf, args = args, N = N, alternative = alternative, mode = mode) - return {"ks-value": results[0], "p-value": results[1]} - - def chisquare(self, f_obs, f_exp = None, ddof = None, axis = 0): - - results = scipy.stats.chisquare(f_obs, f_exp = f_exp, ddof = ddof, axis = axis) - return {"chisquared-value": results[0], "p-value": results[1]} - - def powerdivergence(self, f_obs, f_exp = None, ddof = None, axis = 0, lambda_ = None): - - results = scipy.stats.power_divergence(f_obs, f_exp = f_exp, ddof = ddof, axis = axis, lambda_ = lambda_) - return {"powerdivergence-value": results[0], "p-value": results[1]} - - def ks_twosample(self, x, y, alternative = 'two_sided', mode = 'auto'): - - results = scipy.stats.ks_2samp(x, y, alternative = alternative, mode = mode) - return {"ks-value": results[0], "p-value": results[1]} - - def es_twosample(self, x, y, t = (0.4, 0.8)): - - results = scipy.stats.epps_singleton_2samp(x, y, t = t) - return {"es-value": results[0], "p-value": results[1]} - - def mw_rank(self, x, y, use_continuity = True, alternative = None): - - results = scipy.stats.mannwhitneyu(x, y, use_continuity = use_continuity, alternative = alternative) - return {"u-value": results[0], "p-value": results[1]} - - def mw_tiecorrection(self, rank_values): - - results = scipy.stats.tiecorrect(rank_values) - return {"correction-factor": results} - - def rankdata(self, a, method = 'average'): - - results = scipy.stats.rankdata(a, method = method) - return results - - def wilcoxon_ranksum(self, a, b): # this seems to be superceded by Mann Whitney Wilcoxon U Test - - results = scipy.stats.ranksums(a, b) - return {"u-value": results[0], "p-value": results[1]} - - def wilcoxon_signedrank(self, x, y = None, zero_method = 'wilcox', correction = False, alternative = 'two-sided'): - - results = scipy.stats.wilcoxon(x, y = y, zero_method = zero_method, correction = correction, alternative = alternative) - return {"t-value": results[0], "p-value": results[1]} - - def kw_htest(self, *args, nan_policy = 'propagate'): - - results = scipy.stats.kruskal(*args, nan_policy = nan_policy) - return {"h-value": results[0], "p-value": results[1]} - - def friedman_chisquare(self, *args): - - results = scipy.stats.friedmanchisquare(*args) - return {"chisquared-value": results[0], "p-value": results[1]} - - def bm_wtest(self, x, y, alternative = 'two-sided', distribution = 't', nan_policy = 'propagate'): - - results = scipy.stats.brunnermunzel(x, y, alternative = alternative, distribution = distribution, nan_policy = nan_policy) - return {"w-value": results[0], "p-value": results[1]} - - def combine_pvalues(self, pvalues, method = 'fisher', weights = None): - - results = scipy.stats.combine_pvalues(pvalues, method = method, weights = weights) - return {"combined-statistic": results[0], "p-value": results[1]} - - def jb_fitness(self, x): - - results = scipy.stats.jarque_bera(x) - return {"jb-value": results[0], "p-value": results[1]} - - def ab_equality(self, x, y): - - results = scipy.stats.ansari(x, y) - return {"ab-value": results[0], "p-value": results[1]} - - def bartlett_variance(self, *args): - - results = scipy.stats.bartlett(*args) - return {"t-value": results[0], "p-value": results[1]} - - def levene_variance(self, *args, center = 'median', proportiontocut = 0.05): - - results = scipy.stats.levene(*args, center = center, proportiontocut = proportiontocut) - return {"w-value": results[0], "p-value": results[1]} - - def sw_normality(self, x): - - results = scipy.stats.shapiro(x) - return {"w-value": results[0], "p-value": results[1]} - - def shapiro(self, x): - - return "destroyed by facts and logic" - - def ad_onesample(self, x, dist = 'norm'): - - results = scipy.stats.anderson(x, dist = dist) - return {"d-value": results[0], "critical-values": results[1], "significance-value": results[2]} - - def ad_ksample(self, samples, midrank = True): - - results = scipy.stats.anderson_ksamp(samples, midrank = midrank) - return {"d-value": results[0], "critical-values": results[1], "significance-value": results[2]} - - def binomial(self, x, n = None, p = 0.5, alternative = 'two-sided'): - - results = scipy.stats.binom_test(x, n = n, p = p, alternative = alternative) - return {"p-value": results} - - def fk_variance(self, *args, center = 'median', proportiontocut = 0.05): - - results = scipy.stats.fligner(*args, center = center, proportiontocut = proportiontocut) - return {"h-value": results[0], "p-value": results[1]} # unknown if the statistic is an h value - - def mood_mediantest(self, *args, ties = 'below', correction = True, lambda_ = 1, nan_policy = 'propagate'): - - results = scipy.stats.median_test(*args, ties = ties, correction = correction, lambda_ = lambda_, nan_policy = nan_policy) - return {"chisquared-value": results[0], "p-value": results[1], "m-value": results[2], "table": results[3]} - - def mood_equalscale(self, x, y, axis = 0): - - results = scipy.stats.mood(x, y, axis = axis) - return {"z-score": results[0], "p-value": results[1]} - - def skewtest(self, a, axis = 0, nan_policy = 'propogate'): - - results = scipy.stats.skewtest(a, axis = axis, nan_policy = nan_policy) - return {"z-score": results[0], "p-value": results[1]} - - def kurtosistest(self, a, axis = 0, nan_policy = 'propogate'): - - results = scipy.stats.kurtosistest(a, axis = axis, nan_policy = nan_policy) - return {"z-score": results[0], "p-value": results[1]} - - def normaltest(self, a, axis = 0, nan_policy = 'propogate'): - - results = scipy.stats.normaltest(a, axis = axis, nan_policy = nan_policy) - return {"z-score": results[0], "p-value": results[1]} \ No newline at end of file diff --git a/analysis-master/tra_analysis/__init__.py b/analysis-master/tra_analysis/__init__.py index 2aef5c1b..a13507b8 100644 --- a/analysis-master/tra_analysis/__init__.py +++ b/analysis-master/tra_analysis/__init__.py @@ -7,10 +7,14 @@ # current benchmark of optimization: 1.33 times faster # setup: -__version__ = "3.0.0" +__version__ = "4.0.0-dev" # changelog should be viewed using print(analysis.__changelog__) __changelog__ = """changelog: + 4.0.0: + - deprecated all *_obj.py compatibility modules + - deprecated titanlearn.py + - deprecated visualization.py 3.0.0: - incremented version to release 3.0.0 3.0.0-rc2: @@ -53,7 +57,6 @@ __all__ = [ ] from . import Analysis as Analysis -from . import Analysis as analysis from .Array import Array from .ClassificationMetric import ClassificationMetric from . import CorrelationTest diff --git a/analysis-master/tra_analysis/regression_old.py b/analysis-master/tra_analysis/regression_old.py deleted file mode 100644 index 82e8fbb1..00000000 --- a/analysis-master/tra_analysis/regression_old.py +++ /dev/null @@ -1,222 +0,0 @@ -# Titan Robotics Team 2022: CUDA-based Regressions Module -# Not actively maintained, may be removed in future release -# Written by Arthur Lu & Jacob Levine -# Notes: -# this module has been automatically inegrated into analysis.py, and should be callable as a class from the package -# this module is cuda-optimized (as appropriate) and vectorized (except for one small part) -# setup: - -__version__ = "0.0.4" - -# changelog should be viewed using print(analysis.regression.__changelog__) -__changelog__ = """ - 0.0.4: - - bug fixes - - fixed changelog - 0.0.3: - - bug fixes - 0.0.2: - -Added more parameters to log, exponential, polynomial - -Added SigmoidalRegKernelArthur, because Arthur apparently needs - to train the scaling and shifting of sigmoids - 0.0.1: - -initial release, with linear, log, exponential, polynomial, and sigmoid kernels - -already vectorized (except for polynomial generation) and CUDA-optimized -""" - -__author__ = ( - "Jacob Levine ", - "Arthur Lu ", -) - -__all__ = [ - 'factorial', - 'take_all_pwrs', - 'num_poly_terms', - 'set_device', - 'LinearRegKernel', - 'SigmoidalRegKernel', - 'LogRegKernel', - 'PolyRegKernel', - 'ExpRegKernel', - 'SigmoidalRegKernelArthur', - 'SGDTrain', - 'CustomTrain', - 'CircleFit' -] - -import torch - -global device - -device = "cuda:0" if torch.cuda.is_available() else "cpu" - -#todo: document completely - -def set_device(self, new_device): - device=new_device - -class LinearRegKernel(): - parameters= [] - weights=None - bias=None - def __init__(self, num_vars): - self.weights=torch.rand(num_vars, requires_grad=True, device=device) - self.bias=torch.rand(1, requires_grad=True, device=device) - self.parameters=[self.weights,self.bias] - def forward(self,mtx): - long_bias=self.bias.repeat([1,mtx.size()[1]]) - return torch.matmul(self.weights,mtx)+long_bias - -class SigmoidalRegKernel(): - parameters= [] - weights=None - bias=None - sigmoid=torch.nn.Sigmoid() - def __init__(self, num_vars): - self.weights=torch.rand(num_vars, requires_grad=True, device=device) - self.bias=torch.rand(1, requires_grad=True, device=device) - self.parameters=[self.weights,self.bias] - def forward(self,mtx): - long_bias=self.bias.repeat([1,mtx.size()[1]]) - return self.sigmoid(torch.matmul(self.weights,mtx)+long_bias) - -class SigmoidalRegKernelArthur(): - parameters= [] - weights=None - in_bias=None - scal_mult=None - out_bias=None - sigmoid=torch.nn.Sigmoid() - def __init__(self, num_vars): - self.weights=torch.rand(num_vars, requires_grad=True, device=device) - self.in_bias=torch.rand(1, requires_grad=True, device=device) - self.scal_mult=torch.rand(1, requires_grad=True, device=device) - self.out_bias=torch.rand(1, requires_grad=True, device=device) - self.parameters=[self.weights,self.in_bias, self.scal_mult, self.out_bias] - def forward(self,mtx): - long_in_bias=self.in_bias.repeat([1,mtx.size()[1]]) - long_out_bias=self.out_bias.repeat([1,mtx.size()[1]]) - return (self.scal_mult*self.sigmoid(torch.matmul(self.weights,mtx)+long_in_bias))+long_out_bias - -class LogRegKernel(): - parameters= [] - weights=None - in_bias=None - scal_mult=None - out_bias=None - def __init__(self, num_vars): - self.weights=torch.rand(num_vars, requires_grad=True, device=device) - self.in_bias=torch.rand(1, requires_grad=True, device=device) - self.scal_mult=torch.rand(1, requires_grad=True, device=device) - self.out_bias=torch.rand(1, requires_grad=True, device=device) - self.parameters=[self.weights,self.in_bias, self.scal_mult, self.out_bias] - def forward(self,mtx): - long_in_bias=self.in_bias.repeat([1,mtx.size()[1]]) - long_out_bias=self.out_bias.repeat([1,mtx.size()[1]]) - return (self.scal_mult*torch.log(torch.matmul(self.weights,mtx)+long_in_bias))+long_out_bias - -class ExpRegKernel(): - parameters= [] - weights=None - in_bias=None - scal_mult=None - out_bias=None - def __init__(self, num_vars): - self.weights=torch.rand(num_vars, requires_grad=True, device=device) - self.in_bias=torch.rand(1, requires_grad=True, device=device) - self.scal_mult=torch.rand(1, requires_grad=True, device=device) - self.out_bias=torch.rand(1, requires_grad=True, device=device) - self.parameters=[self.weights,self.in_bias, self.scal_mult, self.out_bias] - def forward(self,mtx): - long_in_bias=self.in_bias.repeat([1,mtx.size()[1]]) - long_out_bias=self.out_bias.repeat([1,mtx.size()[1]]) - return (self.scal_mult*torch.exp(torch.matmul(self.weights,mtx)+long_in_bias))+long_out_bias - -class PolyRegKernel(): - parameters= [] - weights=None - bias=None - power=None - def __init__(self, num_vars, power): - self.power=power - num_terms=self.num_poly_terms(num_vars, power) - self.weights=torch.rand(num_terms, requires_grad=True, device=device) - self.bias=torch.rand(1, requires_grad=True, device=device) - self.parameters=[self.weights,self.bias] - def num_poly_terms(self,num_vars, power): - if power == 0: - return 0 - return int(self.factorial(num_vars+power-1) / self.factorial(power) / self.factorial(num_vars-1)) + self.num_poly_terms(num_vars, power-1) - def factorial(self,n): - if n==0: - return 1 - else: - return n*self.factorial(n-1) - def take_all_pwrs(self, vec, pwr): - #todo: vectorize (kinda) - combins=torch.combinations(vec, r=pwr, with_replacement=True) - out=torch.ones(combins.size()[0]).to(device).to(torch.float) - for i in torch.t(combins).to(device).to(torch.float): - out *= i - if pwr == 1: - return out - else: - return torch.cat((out,self.take_all_pwrs(vec, pwr-1))) - def forward(self,mtx): - #TODO: Vectorize the last part - cols=[] - for i in torch.t(mtx): - cols.append(self.take_all_pwrs(i,self.power)) - new_mtx=torch.t(torch.stack(cols)) - long_bias=self.bias.repeat([1,mtx.size()[1]]) - return torch.matmul(self.weights,new_mtx)+long_bias - -def SGDTrain(self, kernel, data, ground, loss=torch.nn.MSELoss(), iterations=1000, learning_rate=.1, return_losses=False): - optim=torch.optim.SGD(kernel.parameters, lr=learning_rate) - data_cuda=data.to(device) - ground_cuda=ground.to(device) - if (return_losses): - losses=[] - for i in range(iterations): - with torch.set_grad_enabled(True): - optim.zero_grad() - pred=kernel.forward(data_cuda) - ls=loss(pred,ground_cuda) - losses.append(ls.item()) - ls.backward() - optim.step() - return [kernel,losses] - else: - for i in range(iterations): - with torch.set_grad_enabled(True): - optim.zero_grad() - pred=kernel.forward(data_cuda) - ls=loss(pred,ground_cuda) - ls.backward() - optim.step() - return kernel - -def CustomTrain(self, kernel, optim, data, ground, loss=torch.nn.MSELoss(), iterations=1000, return_losses=False): - data_cuda=data.to(device) - ground_cuda=ground.to(device) - if (return_losses): - losses=[] - for i in range(iterations): - with torch.set_grad_enabled(True): - optim.zero_grad() - pred=kernel.forward(data) - ls=loss(pred,ground) - losses.append(ls.item()) - ls.backward() - optim.step() - return [kernel,losses] - else: - for i in range(iterations): - with torch.set_grad_enabled(True): - optim.zero_grad() - pred=kernel.forward(data_cuda) - ls=loss(pred,ground_cuda) - ls.backward() - optim.step() - return kernel diff --git a/analysis-master/tra_analysis/titanlearn.py b/analysis-master/tra_analysis/titanlearn.py deleted file mode 100644 index 2d853922..00000000 --- a/analysis-master/tra_analysis/titanlearn.py +++ /dev/null @@ -1,122 +0,0 @@ -# Titan Robotics Team 2022: ML Module -# Written by Arthur Lu & Jacob Levine -# Notes: -# this should be imported as a python module using 'import titanlearn' -# this should be included in the local directory or environment variable -# this module is optimized for multhreaded computing -# this module learns from its mistakes far faster than 2022's captains -# setup: - -__version__ = "1.1.1" - -#changelog should be viewed using print(analysis.__changelog__) -__changelog__ = """changelog: - 1.1.1: - - removed matplotlib import - - removed graphloss() - 1.1.0: - - added net, dataset, dataloader, and stdtrain template definitions - - added graphloss function - 1.0.1: - - added clear functions - 1.0.0: - - complete rewrite planned - - depreciated 1.0.0.xxx versions - - added simple training loop - 0.0.x: - -added generation of ANNS, basic SGD training -""" - -__author__ = ( - "Arthur Lu ," - "Jacob Levine ," - ) - -__all__ = [ - 'clear', - 'net', - 'dataset', - 'dataloader', - 'train', - 'stdtrainer', - ] - -import torch -from os import system, name -import numpy as np - -def clear(): - if name == 'nt': - _ = system('cls') - else: - _ = system('clear') - -class net(torch.nn.Module): #template for standard neural net - def __init__(self): - super(Net, self).__init__() - - def forward(self, input): - pass - -class dataset(torch.utils.data.Dataset): #template for standard dataset - - def __init__(self): - super(torch.utils.data.Dataset).__init__() - - def __getitem__(self, index): - pass - - def __len__(self): - pass - -def dataloader(dataset, batch_size, num_workers, shuffle = True): - - return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) - -def train(device, net, epochs, trainloader, optimizer, criterion): #expects standard dataloader, whch returns (inputs, labels) - - dataset_len = trainloader.dataset.__len__() - iter_count = 0 - running_loss = 0 - running_loss_list = [] - - for epoch in range(epochs): # loop over the dataset multiple times - - for i, data in enumerate(trainloader, 0): - - inputs = data[0].to(device) - labels = data[1].to(device) - - optimizer.zero_grad() - - outputs = net(inputs) - loss = criterion(outputs, labels.to(torch.float)) - - loss.backward() - optimizer.step() - - # monitoring steps below - - iter_count += 1 - running_loss += loss.item() - running_loss_list.append(running_loss) - clear() - - print("training on: " + device) - print("iteration: " + str(i) + "/" + str(int(dataset_len / trainloader.batch_size)) + " | " + "epoch: " + str(epoch) + "/" + str(epochs)) - print("current batch loss: " + str(loss.item)) - print("running loss: " + str(running_loss / iter_count)) - - return net, running_loss_list - print("finished training") - -def stdtrainer(net, criterion, optimizer, dataloader, epochs, batch_size): - - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - - net = net.to(device) - criterion = criterion.to(device) - optimizer = optimizer.to(device) - trainloader = dataloader - - return train(device, net, epochs, trainloader, optimizer, criterion) \ No newline at end of file diff --git a/analysis-master/tra_analysis/visualization.py b/analysis-master/tra_analysis/visualization.py deleted file mode 100644 index 0c3d4f5a..00000000 --- a/analysis-master/tra_analysis/visualization.py +++ /dev/null @@ -1,58 +0,0 @@ -# Titan Robotics Team 2022: Visualization Module -# Written by Arthur Lu & Jacob Levine -# Notes: -# this should be imported as a python module using 'import visualization' -# this should be included in the local directory or environment variable -# fancy -# setup: - -__version__ = "0.0.1" - -#changelog should be viewed using print(analysis.__changelog__) -__changelog__ = """changelog: - 0.0.1: - - added graphhistogram function as a fragment of visualize_pit.py - 0.0.0: - - created visualization.py - - added graphloss() - - added imports -""" - -__author__ = ( - "Arthur Lu ," - "Jacob Levine ," - ) - -__all__ = [ - 'graphloss', - ] - -import matplotlib.pyplot as plt -import numpy as np - -def graphloss(losses): - - x = range(0, len(losses)) - plt.plot(x, losses) - plt.show() - -def graphhistogram(data, figsize, sharey = True): # expects library with key as variable and contents as occurances - - fig, ax = plt.subplots(1, len(data), sharey=sharey, figsize=figsize) - - i = 0 - - for variable in data: - - ax[i].hist(data[variable]) - ax[i].invert_xaxis() - - ax[i].set_xlabel('Variable') - ax[i].set_ylabel('Frequency') - ax[i].set_title(variable) - - plt.yticks(np.arange(len(data[variable]))) - - i+=1 - - plt.show() \ No newline at end of file