Merge pull request #85 from titanscouting/analysis-v4

Analysis v4
This commit is contained in:
Arthur Lu 2022-03-13 21:15:40 -07:00 committed by GitHub
commit 12e0a263bd
30 changed files with 348 additions and 1234 deletions

View File

@ -1,7 +1,6 @@
FROM ubuntu:20.04 FROM python:slim
WORKDIR / WORKDIR /
RUN apt-get -y update RUN apt-get -y update; apt-get -y upgrade
RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata RUN apt-get -y install git
RUN apt-get install -y python3 python3-dev git python3-pip python3-kivy python-is-python3 libgl1-mesa-dev build-essential COPY requirements.txt .
RUN ln -s $(which pip3) /usr/bin/pip RUN pip install -r requirements.txt
RUN pip install pymongo pandas numpy scipy scikit-learn matplotlib pylint kivy

View File

@ -1,2 +0,0 @@
FROM titanscout2022/tra-analysis-base:latest
WORKDIR /

View File

@ -1,28 +1,22 @@
{ {
"name": "TRA Analysis Development Environment", "name": "TRA Analysis Development Environment",
"build": { "build": {
"dockerfile": "dev-dockerfile", "dockerfile": "Dockerfile",
}, },
"settings": { "settings": {
"terminal.integrated.shell.linux": "/bin/bash", "terminal.integrated.shell.linux": "/bin/bash",
"python.pythonPath": "/usr/local/bin/python", "python.pythonPath": "",
"python.linting.enabled": true, "python.linting.enabled": true,
"python.linting.pylintEnabled": true, "python.linting.pylintEnabled": true,
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8", "python.linting.pylintPath": "",
"python.formatting.blackPath": "/usr/local/py-utils/bin/black", "python.testing.pytestPath": "",
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf", "editor.tabSize": 4,
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit", "editor.insertSpaces": false
"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8",
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
"python.linting.pylintPath": "/usr/local/py-utils/bin/pylint",
"python.testing.pytestPath": "/usr/local/py-utils/bin/pytest"
}, },
"extensions": [ "extensions": [
"mhutchie.git-graph", "mhutchie.git-graph",
"ms-python.python", "ms-python.python",
"waderyan.gitblame" "waderyan.gitblame"
], ],
"postCreateCommand": "/usr/bin/pip3 install -r ${containerWorkspaceFolder}/analysis-master/requirements.txt && /usr/bin/pip3 install --no-cache-dir pylint && /usr/bin/pip3 install pytest" "postCreateCommand": ""
} }

View File

@ -0,0 +1,8 @@
numpy
scipy
scikit-learn
six
pyparsing
pylint
pytest

View File

@ -10,12 +10,12 @@ on:
branches: [ master ] branches: [ master ]
jobs: jobs:
build: unittest:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy: strategy:
matrix: matrix:
python-version: [3.7, 3.8] python-version: ["3.7", "3.8", "3.9", "3.10"]
env: env:
working-directory: ./analysis-master/ working-directory: ./analysis-master/

View File

@ -2,5 +2,7 @@ numpy
scipy scipy
scikit-learn scikit-learn
six six
matplotlib
pyparsing pyparsing
pylint
pytest

View File

@ -5,9 +5,11 @@ from sklearn import metrics
from tra_analysis import Analysis as an from tra_analysis import Analysis as an
from tra_analysis import Array from tra_analysis import Array
from tra_analysis import ClassificationMetric from tra_analysis import ClassificationMetric
from tra_analysis import Clustering
from tra_analysis import CorrelationTest from tra_analysis import CorrelationTest
from tra_analysis import Fit from tra_analysis import Fit
from tra_analysis import KNN from tra_analysis import KNN
from tra_analysis import metrics as m
from tra_analysis import NaiveBayes from tra_analysis import NaiveBayes
from tra_analysis import RandomForest from tra_analysis import RandomForest
from tra_analysis import RegressionMetric from tra_analysis import RegressionMetric
@ -26,7 +28,7 @@ x_data_circular = []
y_data_circular = [] y_data_circular = []
y_data_ccu = [1, 3, 7, 14, 21] y_data_ccu = [1, 3, 7, 14, 21]
y_data_ccd = [1, 5, 7, 8.5, 8.66] y_data_ccd = [8.66, 8.5, 7, 5, 1]
test_data_scrambled = [-32, 34, 19, 72, -65, -11, -43, 6, 85, -17, -98, -26, 12, 20, 9, -92, -40, 98, -78, 17, -20, 49, 93, -27, -24, -66, 40, 84, 1, -64, -68, -25, -42, -46, -76, 43, -3, 30, -14, -34, -55, -13, 41, -30, 0, -61, 48, 23, 60, 87, 80, 77, 53, 73, 79, 24, -52, 82, 8, -44, 65, 47, -77, 94, 7, 37, -79, 36, -94, 91, 59, 10, 97, -38, -67, 83, 54, 31, -95, -63, 16, -45, 21, -12, 66, -48, -18, -96, -90, -21, -83, -74, 39, 64, 69, -97, 13, 55, 27, -39] test_data_scrambled = [-32, 34, 19, 72, -65, -11, -43, 6, 85, -17, -98, -26, 12, 20, 9, -92, -40, 98, -78, 17, -20, 49, 93, -27, -24, -66, 40, 84, 1, -64, -68, -25, -42, -46, -76, 43, -3, 30, -14, -34, -55, -13, 41, -30, 0, -61, 48, 23, 60, 87, 80, 77, 53, 73, 79, 24, -52, 82, 8, -44, 65, 47, -77, 94, 7, 37, -79, 36, -94, 91, 59, 10, 97, -38, -67, 83, 54, 31, -95, -63, 16, -45, 21, -12, 66, -48, -18, -96, -90, -21, -83, -74, 39, 64, 69, -97, 13, 55, 27, -39]
test_data_sorted = [-98, -97, -96, -95, -94, -92, -90, -83, -79, -78, -77, -76, -74, -68, -67, -66, -65, -64, -63, -61, -55, -52, -48, -46, -45, -44, -43, -42, -40, -39, -38, -34, -32, -30, -27, -26, -25, -24, -21, -20, -18, -17, -14, -13, -12, -11, -3, 0, 1, 6, 7, 8, 9, 10, 12, 13, 16, 17, 19, 20, 21, 23, 24, 27, 30, 31, 34, 36, 37, 39, 40, 41, 43, 47, 48, 49, 53, 54, 55, 59, 60, 64, 65, 66, 69, 72, 73, 77, 79, 80, 82, 83, 84, 85, 87, 91, 93, 94, 97, 98] test_data_sorted = [-98, -97, -96, -95, -94, -92, -90, -83, -79, -78, -77, -76, -74, -68, -67, -66, -65, -64, -63, -61, -55, -52, -48, -46, -45, -44, -43, -42, -40, -39, -38, -34, -32, -30, -27, -26, -25, -24, -21, -20, -18, -17, -14, -13, -12, -11, -3, 0, 1, 6, 7, 8, 9, 10, 12, 13, 16, 17, 19, 20, 21, 23, 24, 27, 30, 31, 34, 36, 37, 39, 40, 41, 43, 47, 48, 49, 53, 54, 55, 59, 60, 64, 65, 66, 69, 72, 73, 77, 79, 80, 82, 83, 84, 85, 87, 91, 93, 94, 97, 98]
@ -47,16 +49,25 @@ def test_basicstats():
def test_regression(): def test_regression():
assert all(isinstance(item, str) for item in an.regression(test_data_linear, y_data_ccu, ["lin"])) == True assert all(isinstance(item, str) for item in an.regression(test_data_linear, y_data_ccu, ["lin"])) == True
#assert all(isinstance(item, str) for item in an.regression(test_data_linear, y_data_ccd, ["log"])) == True assert all(isinstance(item, str) for item in an.regression(test_data_linear, y_data_ccd, ["log"])) == True
#assert all(isinstance(item, str) for item in an.regression(test_data_linear, y_data_ccu, ["exp"])) == True assert all(isinstance(item, str) for item in an.regression(test_data_linear, y_data_ccu, ["exp"])) == True
#assert all(isinstance(item, str) for item in an.regression(test_data_linear, y_data_ccu, ["ply"])) == True assert all(isinstance(item, str) for item in an.regression(test_data_linear, y_data_ccu, ["ply"])) == True
#assert all(isinstance(item, str) for item in an.regression(test_data_linear, y_data_ccd, ["sig"])) == True assert all(isinstance(item, str) for item in an.regression(test_data_linear, y_data_ccd, ["sig"])) == True
def test_metrics(): def test_metrics():
assert an.Metric().elo(1500, 1500, [1, 0], 400, 24) == 1512.0 assert an.Metric().elo(1500, 1500, [1, 0], 400, 24) == 1512.0
assert an.Metric().glicko2(1500, 250, 0.06, [1500, 1400], [250, 240], [1, 0]) == (1478.864307445517, 195.99122679202452, 0.05999602937563585) assert an.Metric().glicko2(1500, 250, 0.06, [1500, 1400], [250, 240], [1, 0]) == (1478.864307445517, 195.99122679202452, 0.05999602937563585)
#assert an.Metric().trueskill([[(25, 8.33), (24, 8.25), (32, 7.5)], [(25, 8.33), (25, 8.33), (21, 6.5)]], [1, 0]) == [(metrics.trueskill.Rating(mu=21.346, sigma=7.875), metrics.trueskill.Rating(mu=20.415, sigma=7.808), metrics.trueskill.Rating(mu=29.037, sigma=7.170)), (metrics.trueskill.Rating(mu=28.654, sigma=7.875), metrics.trueskill.Rating(mu=28.654, sigma=7.875), metrics.trueskill.Rating(mu=23.225, sigma=6.287))] e = [[(21.346, 7.875), (20.415, 7.808), (29.037, 7.170)], [(28.654, 7.875), (28.654, 7.875), (23.225, 6.287)]]
r = an.Metric().trueskill([[(25, 8.33), (24, 8.25), (32, 7.5)], [(25, 8.33), (25, 8.33), (21, 6.5)]], [1, 0])
i = 0
for group in r:
j = 0
for team in group:
assert abs(team.mu - e[i][j][0]) < 0.001
assert abs(team.sigma - e[i][j][1]) < 0.001
j+=1
i+=1
def test_array(): def test_array():
@ -143,13 +154,8 @@ def test_sort():
def test_statisticaltest(): def test_statisticaltest():
#print(StatisticalTest.tukey_multicomparison([test_data_linear, test_data_linear2, test_data_linear3]))
assert StatisticalTest.tukey_multicomparison([test_data_linear, test_data_linear2, test_data_linear3]) == \ assert StatisticalTest.tukey_multicomparison([test_data_linear, test_data_linear2, test_data_linear3]) == \
{'group 1 and group 2': [0.32571517201527916, False], 'group 1 and group 3': [0.977145516045838, False], 'group 2 and group 3': [0.6514303440305589, False]} {'group 1 and group 2': [0.32571517201527916, False], 'group 1 and group 3': [0.977145516045838, False], 'group 2 and group 3': [0.6514303440305589, False]}
#assert all(np.isclose([i[0] for i in list(StatisticalTest.tukey_multicomparison([test_data_linear, test_data_linear2, test_data_linear3]).values],
# [0.32571517201527916, 0.977145516045838, 0.6514303440305589]))
#assert [i[1] for i in StatisticalTest.tukey_multicomparison([test_data_linear, test_data_linear2, test_data_linear3]).values] == \
# [False, False, False]
def test_svm(): def test_svm():
@ -231,3 +237,17 @@ def test_equation():
} }
for key in list(correctParse.keys()): for key in list(correctParse.keys()):
assert parser.eval(key) == correctParse[key] assert parser.eval(key) == correctParse[key]
def test_clustering():
normalizer = sklearn.preprocessing.Normalizer()
data = X = np.array([[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]])
assert Clustering.dbscan(data, eps=3, min_samples=2).tolist() == [0, 0, 0, 1, 1, -1]
assert Clustering.dbscan(data, normalizer=normalizer, eps=3, min_samples=2).tolist() == [0, 0, 0, 0, 0, 0]
data = np.array([[1, 1], [2, 1], [1, 0], [4, 7], [3, 5], [3, 6]])
assert Clustering.spectral(data, n_clusters=2, assign_labels='discretize', random_state=0).tolist() == [1, 1, 1, 0, 0, 0]
assert Clustering.spectral(data, normalizer=normalizer, n_clusters=2, assign_labels='discretize', random_state=0).tolist() == [0, 1, 1, 0, 0, 0]

View File

@ -7,10 +7,19 @@
# current benchmark of optimization: 1.33 times faster # current benchmark of optimization: 1.33 times faster
# setup: # setup:
__version__ = "3.0.2" __version__ = "3.0.6"
# changelog should be viewed using print(analysis.__changelog__) # changelog should be viewed using print(analysis.__changelog__)
__changelog__ = """changelog: __changelog__ = """changelog:
3.0.6:
- added docstrings
3.0.5:
- removed extra submodule imports
- fixed/optimized header
3.0.4:
- removed -_obj imports
3.0.3:
- fixed spelling of deprecate
3.0.2: 3.0.2:
- fixed __all__ - fixed __all__
3.0.1: 3.0.1:
@ -58,7 +67,7 @@ __changelog__ = """changelog:
- cycle sort - cycle sort
- cocktail sort - cocktail sort
- tested all sorting algorithms with both lists and numpy arrays - tested all sorting algorithms with both lists and numpy arrays
- depreciated sort function from Array class - deprecated sort function from Array class
- added warnings as an import - added warnings as an import
2.1.4: 2.1.4:
- added sort and search functions to Array class - added sort and search functions to Array class
@ -136,7 +145,7 @@ __changelog__ = """changelog:
1.12.4: 1.12.4:
- renamed gliko to glicko - renamed gliko to glicko
1.12.3: 1.12.3:
- removed depreciated code - removed deprecated code
1.12.2: 1.12.2:
- removed team first time trueskill instantiation in favor of integration in superscript.py - removed team first time trueskill instantiation in favor of integration in superscript.py
1.12.1: 1.12.1:
@ -248,10 +257,10 @@ __changelog__ = """changelog:
1.0.0: 1.0.0:
- removed c_entities,nc_entities,obstacles,objectives from __all__ - removed c_entities,nc_entities,obstacles,objectives from __all__
- applied numba.jit to all functions - applied numba.jit to all functions
- depreciated and removed stdev_z_split - deprecated and removed stdev_z_split
- cleaned up histo_analysis to include numpy and numba.jit optimizations - cleaned up histo_analysis to include numpy and numba.jit optimizations
- depreciated and removed all regression functions in favor of future pytorch optimizer - deprecated and removed all regression functions in favor of future pytorch optimizer
- depreciated and removed all nonessential functions (basic_analysis, benchmark, strip_data) - deprecated and removed all nonessential functions (basic_analysis, benchmark, strip_data)
- optimized z_normalize using sklearn.preprocessing.normalize - optimized z_normalize using sklearn.preprocessing.normalize
- TODO: implement kernel/function based pytorch regression optimizer - TODO: implement kernel/function based pytorch regression optimizer
0.9.0: 0.9.0:
@ -270,8 +279,8 @@ __changelog__ = """changelog:
- refactors - refactors
- bugfixes - bugfixes
0.8.0: 0.8.0:
- depreciated histo_analysis_old - deprecated histo_analysis_old
- depreciated debug - deprecated debug
- altered basic_analysis to take array data instead of filepath - altered basic_analysis to take array data instead of filepath
- refactor - refactor
- optimization - optimization
@ -319,7 +328,7 @@ __changelog__ = """changelog:
0.3.5: 0.3.5:
- major bug fixes - major bug fixes
- updated historical analysis - updated historical analysis
- depreciated old historical analysis - deprecated old historical analysis
0.3.4: 0.3.4:
- added __version__, __author__, __all__ - added __version__, __author__, __all__
- added polynomial regression - added polynomial regression
@ -357,7 +366,6 @@ __all__ = [
'histo_analysis', 'histo_analysis',
'regression', 'regression',
'Metric', 'Metric',
'kmeans',
'pca', 'pca',
'decisiontree', 'decisiontree',
# all statistics functions left out due to integration in other functions # all statistics functions left out due to integration in other functions
@ -370,40 +378,39 @@ __all__ = [
import csv import csv
from tra_analysis.metrics import elo as Elo from tra_analysis.metrics import elo as Elo
from tra_analysis.metrics import glicko2 as Glicko2 from tra_analysis.metrics import glicko2 as Glicko2
import math
import numpy as np import numpy as np
import scipy import scipy
from scipy import optimize, stats import sklearn, sklearn.cluster, sklearn.pipeline
import sklearn
from sklearn import preprocessing, pipeline, linear_model, metrics, cluster, decomposition, tree, neighbors, naive_bayes, svm, model_selection, ensemble
from tra_analysis.metrics import trueskill as Trueskill from tra_analysis.metrics import trueskill as Trueskill
import warnings
# import submodules # import submodules
from .Array import Array
from .ClassificationMetric import ClassificationMetric from .ClassificationMetric import ClassificationMetric
from .CorrelationTest_obj import CorrelationTest
from .KNN_obj import KNN
from .NaiveBayes_obj import NaiveBayes
from .RandomForest_obj import RandomForest
from .RegressionMetric import RegressionMetric
from .Sort_obj import Sort
from .StatisticalTest_obj import StatisticalTest
from . import SVM
class error(ValueError): class error(ValueError):
pass pass
def load_csv(filepath): def load_csv(filepath):
"""
Loads csv file into 2D numpy array. Does not check csv file validity.
parameters:
filepath: String path to the csv file
return:
2D numpy array of values stored in csv file
"""
with open(filepath, newline='') as csvfile: with open(filepath, newline='') as csvfile:
file_array = np.array(list(csv.reader(csvfile))) file_array = np.array(list(csv.reader(csvfile)))
csvfile.close() csvfile.close()
return file_array return file_array
# expects 1d array
def basic_stats(data): def basic_stats(data):
"""
Calculates mean, median, standard deviation, variance, minimum, maximum of a simple set of elements.
parameters:
data: List representing set of unordered elements
return:
Dictionary with (mean, median, standard-deviation, variance, minimum, maximum) as keys and corresponding values
"""
data_t = np.array(data).astype(float) data_t = np.array(data).astype(float)
_mean = mean(data_t) _mean = mean(data_t)
@ -415,24 +422,43 @@ def basic_stats(data):
return {"mean": _mean, "median": _median, "standard-deviation": _stdev, "variance": _variance, "minimum": _min, "maximum": _max} return {"mean": _mean, "median": _median, "standard-deviation": _stdev, "variance": _variance, "minimum": _min, "maximum": _max}
# returns z score with inputs of point, mean and standard deviation of spread
def z_score(point, mean, stdev): def z_score(point, mean, stdev):
"""
Calculates z score of a specific point given mean and standard deviation of data.
parameters:
point: Real value corresponding to a single point of data
mean: Real value corresponding to the mean of the dataset
stdev: Real value corresponding to the standard deviation of the dataset
return:
Real value that is the point's z score
"""
score = (point - mean) / stdev score = (point - mean) / stdev
return score return score
# expects 2d array, normalizes across all axes
def z_normalize(array, *args): def z_normalize(array, *args):
"""
Applies sklearn.normalize(array, axis = args) on any arraylike parseable by numpy.
parameters:
array: array like structure of reals aka nested indexables
*args: arguments relating to axis normalized against
return:
numpy array of normalized values from ArrayLike input
"""
array = np.array(array) array = np.array(array)
for arg in args: for arg in args:
array = sklearn.preprocessing.normalize(array, axis = arg) array = sklearn.preprocessing.normalize(array, axis = arg)
return array return array
# expects 2d array of [x,y]
def histo_analysis(hist_data): def histo_analysis(hist_data):
"""
Calculates the mean and standard deviation of derivatives of (x,y) points. Requires at least 2 points to compute.
parameters:
hist_data: list of real coordinate point data (x, y)
return:
Dictionary with (mean, deviation) as keys to corresponding values
"""
if len(hist_data[0]) > 2: if len(hist_data[0]) > 2:
hist_data = np.array(hist_data) hist_data = np.array(hist_data)
@ -448,7 +474,15 @@ def histo_analysis(hist_data):
return None return None
def regression(inputs, outputs, args): # inputs, outputs expects N-D array def regression(inputs, outputs, args): # inputs, outputs expects N-D array
"""
Applies specified regression kernels onto input, output data pairs.
parameters:
inputs: List of Reals representing independent variable values of each point
outputs: List of Reals representing dependent variable values of each point
args: List of Strings from values (lin, log, exp, ply, sig)
return:
Dictionary with keys (lin, log, exp, ply, sig) as keys to correspondiong regression models
"""
X = np.array(inputs) X = np.array(inputs)
y = np.array(outputs) y = np.array(outputs)
@ -552,13 +586,39 @@ def regression(inputs, outputs, args): # inputs, outputs expects N-D array
return regressions return regressions
class Metric: class Metric:
"""
The metric class wraps the metrics models. Call without instantiation as Metric.<method>(...)
"""
def elo(self, starting_score, opposing_score, observed, N, K): def elo(self, starting_score, opposing_score, observed, N, K):
"""
Calculates an elo adjusted ELO score given a player's current score, opponent's score, and outcome of match.
reference: https://en.wikipedia.org/wiki/Elo_rating_system
parameters:
starting_score: Real value representing player's ELO score before a match
opposing_score: Real value representing opponent's score before the match
observed: Array of Real values representing multiple sequential match outcomes against the same opponent. 1 for match win, 0.5 for tie, 0 for loss.
N: Real value representing the normal or mean score expected (usually 1200)
K: R eal value representing a system constant, determines how quickly players will change scores (usually 24)
return:
Real value representing the player's new ELO score
"""
return Elo.calculate(starting_score, opposing_score, observed, N, K) return Elo.calculate(starting_score, opposing_score, observed, N, K)
def glicko2(self, starting_score, starting_rd, starting_vol, opposing_score, opposing_rd, observations): def glicko2(self, starting_score, starting_rd, starting_vol, opposing_score, opposing_rd, observations):
"""
Calculates an adjusted Glicko-2 score given a player's current score, multiple opponent's score, and outcome of several matches.
reference: http://www.glicko.net/glicko/glicko2.pdf
parameters:
starting_score: Real value representing the player's Glicko-2 score
starting_rd: Real value representing the player's RD
starting_vol: Real value representing the player's volatility
opposing_score: List of Real values representing multiple opponent's Glicko-2 scores
opposing_rd: List of Real values representing multiple opponent's RD
opposing_vol: List of Real values representing multiple opponent's volatility
observations: List of Real values representing the outcome of several matches, where each match's opponent corresponds with the opposing_score, opposing_rd, opposing_vol values of the same indesx. Outcomes can be a score, presuming greater score is better.
return:
Tuple of 3 Real values representing the player's new score, rd, and vol
"""
player = Glicko2.Glicko2(rating = starting_score, rd = starting_rd, vol = starting_vol) player = Glicko2.Glicko2(rating = starting_score, rd = starting_rd, vol = starting_vol)
player.update_player([x for x in opposing_score], [x for x in opposing_rd], observations) player.update_player([x for x in opposing_score], [x for x in opposing_rd], observations)
@ -566,7 +626,15 @@ class Metric:
return (player.rating, player.rd, player.vol) return (player.rating, player.rd, player.vol)
def trueskill(self, teams_data, observations): # teams_data is array of array of tuples ie. [[(mu, sigma), (mu, sigma), (mu, sigma)], [(mu, sigma), (mu, sigma), (mu, sigma)]] def trueskill(self, teams_data, observations): # teams_data is array of array of tuples ie. [[(mu, sigma), (mu, sigma), (mu, sigma)], [(mu, sigma), (mu, sigma), (mu, sigma)]]
"""
Calculates the score changes for multiple teams playing in a single match accoding to the trueskill algorithm.
reference: https://trueskill.org/
parameters:
teams_data: List of List of Tuples of 2 Real values representing multiple player ratings. List of teams, which is a List of players. Each player rating is a Tuple of 2 Real values (mu, sigma).
observations: List of Real values representing the match outcome. Each value in the List is the score corresponding to the team at the same index in teams_data.
return:
List of List of Tuples of 2 Real values representing new player ratings. Same structure as teams_data.
"""
team_ratings = [] team_ratings = []
for team in teams_data: for team in teams_data:
@ -602,23 +670,31 @@ def npmax(data):
return np.amax(data) return np.amax(data)
def kmeans(data, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0.0001, precompute_distances="auto", verbose=0, random_state=None, copy_x=True, n_jobs=None, algorithm="auto"):
kernel = sklearn.cluster.KMeans(n_clusters = n_clusters, init = init, n_init = n_init, max_iter = max_iter, tol = tol, precompute_distances = precompute_distances, verbose = verbose, random_state = random_state, copy_x = copy_x, n_jobs = n_jobs, algorithm = algorithm)
kernel.fit(data)
predictions = kernel.predict(data)
centers = kernel.cluster_centers_
return centers, predictions
def pca(data, n_components = None, copy = True, whiten = False, svd_solver = "auto", tol = 0.0, iterated_power = "auto", random_state = None): def pca(data, n_components = None, copy = True, whiten = False, svd_solver = "auto", tol = 0.0, iterated_power = "auto", random_state = None):
"""
Performs a principle component analysis on the input data.
reference: https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html
parameters:
data: Arraylike of Reals representing the set of data to perform PCA on
* : refer to reference for usage, parameters follow same usage
return:
Arraylike of Reals representing the set of data that has had PCA performed. The dimensionality of the Arraylike may be smaller or equal.
"""
kernel = sklearn.decomposition.PCA(n_components = n_components, copy = copy, whiten = whiten, svd_solver = svd_solver, tol = tol, iterated_power = iterated_power, random_state = random_state) kernel = sklearn.decomposition.PCA(n_components = n_components, copy = copy, whiten = whiten, svd_solver = svd_solver, tol = tol, iterated_power = iterated_power, random_state = random_state)
return kernel.fit_transform(data) return kernel.fit_transform(data)
def decisiontree(data, labels, test_size = 0.3, criterion = "gini", splitter = "default", max_depth = None): #expects *2d data and 1d labels def decisiontree(data, labels, test_size = 0.3, criterion = "gini", splitter = "default", max_depth = None): #expects *2d data and 1d labels
"""
Generates a decision tree classifier fitted to the given data.
reference: https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html
parameters:
data: List of values representing each data point of multiple axes
labels: List of values represeing the labels corresponding to the same index at data
* : refer to reference for usage, parameters follow same usage
return:
DecisionTreeClassifier model and corresponding classification accuracy metrics
"""
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
model = sklearn.tree.DecisionTreeClassifier(criterion = criterion, splitter = splitter, max_depth = max_depth) model = sklearn.tree.DecisionTreeClassifier(criterion = criterion, splitter = splitter, max_depth = max_depth)
model = model.fit(data_train,labels_train) model = model.fit(data_train,labels_train)

View File

@ -4,9 +4,11 @@
# this should be imported as a python module using 'from tra_analysis import Array' # this should be imported as a python module using 'from tra_analysis import Array'
# setup: # setup:
__version__ = "1.0.3" __version__ = "1.0.4"
__changelog__ = """changelog: __changelog__ = """changelog:
1.0.4:
- fixed spelling of deprecate
1.0.3: 1.0.3:
- fixed __all__ - fixed __all__
1.0.2: 1.0.2:
@ -135,8 +137,8 @@ class Array(): # tests on nd arrays independent of basic_stats
return Array(np.transpose(self.array)) return Array(np.transpose(self.array))
def sort(self, array): # depreciated def sort(self, array): # deprecated
warnings.warn("Array.sort has been depreciated in favor of Sort") warnings.warn("Array.sort has been deprecated in favor of Sort")
array_length = len(array) array_length = len(array)
if array_length <= 1: if array_length <= 1:
return array return array

View File

@ -4,9 +4,11 @@
# this should be imported as a python module using 'from tra_analysis import ClassificationMetric' # this should be imported as a python module using 'from tra_analysis import ClassificationMetric'
# setup: # setup:
__version__ = "1.0.1" __version__ = "1.0.2"
__changelog__ = """changelog: __changelog__ = """changelog:
1.0.2:
- optimized imports
1.0.1: 1.0.1:
- fixed __all__ - fixed __all__
1.0.0: 1.0.0:
@ -22,7 +24,6 @@ __all__ = [
] ]
import sklearn import sklearn
from sklearn import metrics
class ClassificationMetric(): class ClassificationMetric():

View File

@ -0,0 +1,63 @@
# Titan Robotics Team 2022: Clustering submodule
# Written by Arthur Lu
# Notes:
# this should be imported as a python module using 'from tra_analysis import Clustering'
# setup:
__version__ = "2.0.2"
# changelog should be viewed using print(analysis.__changelog__)
__changelog__ = """changelog:
2.0.2:
- generalized optional args to **kwargs
2.0.1:
- added normalization preprocessing to clustering, expects instance of sklearn.preprocessing.Normalizer()
2.0.0:
- added dbscan clustering algo
- added spectral clustering algo
1.0.0:
- created this submodule
- copied kmeans clustering from Analysis
"""
__author__ = (
"Arthur Lu <learthurgo@gmail.com>",
)
__all__ = [
"kmeans",
"dbscan",
"spectral",
]
import sklearn
def kmeans(data, normalizer = None, **kwargs):
if normalizer != None:
data = normalizer.transform(data)
kernel = sklearn.cluster.KMeans(**kwargs)
kernel.fit(data)
predictions = kernel.predict(data)
centers = kernel.cluster_centers_
return centers, predictions
def dbscan(data, normalizer=None, **kwargs):
if normalizer != None:
data = normalizer.transform(data)
model = sklearn.cluster.DBSCAN(**kwargs).fit(data)
return model.labels_
def spectral(data, normalizer=None, **kwargs):
if normalizer != None:
data = normalizer.transform(data)
model = sklearn.cluster.SpectralClustering(**kwargs).fit(data)
return model.labels_

View File

@ -4,9 +4,13 @@
# this should be imported as a python module using 'from tra_analysis import CorrelationTest' # this should be imported as a python module using 'from tra_analysis import CorrelationTest'
# setup: # setup:
__version__ = "1.0.1" __version__ = "1.0.3"
__changelog__ = """changelog: __changelog__ = """changelog:
1.0.3:
- generalized optional args to **kwargs
1.0.2:
- optimized imports
1.0.1: 1.0.1:
- fixed __all__ - fixed __all__
1.0.0: 1.0.0:
@ -29,7 +33,6 @@ __all__ = [
] ]
import scipy import scipy
from scipy import stats
def anova_oneway(*args): #expects arrays of samples def anova_oneway(*args): #expects arrays of samples
@ -41,9 +44,9 @@ def pearson(x, y):
results = scipy.stats.pearsonr(x, y) results = scipy.stats.pearsonr(x, y)
return {"r-value": results[0], "p-value": results[1]} return {"r-value": results[0], "p-value": results[1]}
def spearman(a, b = None, axis = 0, nan_policy = 'propagate'): def spearman(a, b = None, **kwargs):
results = scipy.stats.spearmanr(a, b = b, axis = axis, nan_policy = nan_policy) results = scipy.stats.spearmanr(a, b = b, **kwargs)
return {"r-value": results[0], "p-value": results[1]} return {"r-value": results[0], "p-value": results[1]}
def point_biserial(x, y): def point_biserial(x, y):
@ -51,17 +54,17 @@ def point_biserial(x, y):
results = scipy.stats.pointbiserialr(x, y) results = scipy.stats.pointbiserialr(x, y)
return {"r-value": results[0], "p-value": results[1]} return {"r-value": results[0], "p-value": results[1]}
def kendall(x, y, initial_lexsort = None, nan_policy = 'propagate', method = 'auto'): def kendall(x, y, **kwargs):
results = scipy.stats.kendalltau(x, y, initial_lexsort = initial_lexsort, nan_policy = nan_policy, method = method) results = scipy.stats.kendalltau(x, y, **kwargs)
return {"tau": results[0], "p-value": results[1]} return {"tau": results[0], "p-value": results[1]}
def kendall_weighted(x, y, rank = True, weigher = None, additive = True): def kendall_weighted(x, y, **kwargs):
results = scipy.stats.weightedtau(x, y, rank = rank, weigher = weigher, additive = additive) results = scipy.stats.weightedtau(x, y, **kwargs)
return {"tau": results[0], "p-value": results[1]} return {"tau": results[0], "p-value": results[1]}
def mgc(x, y, compute_distance = None, reps = 1000, workers = 1, is_twosamp = False, random_state = None): def mgc(x, y, **kwargs):
results = scipy.stats.multiscale_graphcorr(x, y, compute_distance = compute_distance, reps = reps, workers = workers, is_twosamp = is_twosamp, random_state = random_state) results = scipy.stats.multiscale_graphcorr(x, y, **kwargs)
return {"k-value": results[0], "p-value": results[1], "data": results[2]} # unsure if MGC test returns a k value return {"k-value": results[0], "p-value": results[1], "data": results[2]} # unsure if MGC test returns a k value

View File

@ -1,41 +0,0 @@
# Only included for backwards compatibility! Do not update, CorrelationTest is preferred and supported.
import scipy
from scipy import stats
class CorrelationTest:
def anova_oneway(self, *args): #expects arrays of samples
results = scipy.stats.f_oneway(*args)
return {"f-value": results[0], "p-value": results[1]}
def pearson(self, x, y):
results = scipy.stats.pearsonr(x, y)
return {"r-value": results[0], "p-value": results[1]}
def spearman(self, a, b = None, axis = 0, nan_policy = 'propagate'):
results = scipy.stats.spearmanr(a, b = b, axis = axis, nan_policy = nan_policy)
return {"r-value": results[0], "p-value": results[1]}
def point_biserial(self, x,y):
results = scipy.stats.pointbiserialr(x, y)
return {"r-value": results[0], "p-value": results[1]}
def kendall(self, x, y, initial_lexsort = None, nan_policy = 'propagate', method = 'auto'):
results = scipy.stats.kendalltau(x, y, initial_lexsort = initial_lexsort, nan_policy = nan_policy, method = method)
return {"tau": results[0], "p-value": results[1]}
def kendall_weighted(self, x, y, rank = True, weigher = None, additive = True):
results = scipy.stats.weightedtau(x, y, rank = rank, weigher = weigher, additive = additive)
return {"tau": results[0], "p-value": results[1]}
def mgc(self, x, y, compute_distance = None, reps = 1000, workers = 1, is_twosamp = False, random_state = None):
results = scipy.stats.multiscale_graphcorr(x, y, compute_distance = compute_distance, reps = reps, workers = workers, is_twosamp = is_twosamp, random_state = random_state)
return {"k-value": results[0], "p-value": results[1], "data": results[2]} # unsure if MGC test returns a k value

View File

@ -4,9 +4,13 @@
# this should be imported as a python module using 'from tra_analysis import KNN' # this should be imported as a python module using 'from tra_analysis import KNN'
# setup: # setup:
__version__ = "1.0.0" __version__ = "1.0.2"
__changelog__ = """changelog: __changelog__ = """changelog:
1.0.2:
- generalized optional args to **kwargs
1.0.1:
- optimized imports
1.0.0: 1.0.0:
- ported analysis.KNN() here - ported analysis.KNN() here
- removed classness - removed classness
@ -23,22 +27,21 @@ __all__ = [
] ]
import sklearn import sklearn
from sklearn import model_selection, neighbors
from . import ClassificationMetric, RegressionMetric from . import ClassificationMetric, RegressionMetric
def knn_classifier(data, labels, n_neighbors = 5, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling def knn_classifier(data, labels, n_neighbors = 5, test_size = 0.3, **kwargs): #expects *2d data and 1d labels post-scaling
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
model = sklearn.neighbors.KNeighborsClassifier(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs) model = sklearn.neighbors.KNeighborsClassifier(n_neighbors = n_neighbors, **kwargs)
model.fit(data_train, labels_train) model.fit(data_train, labels_train)
predictions = model.predict(data_test) predictions = model.predict(data_test)
return model, ClassificationMetric(predictions, labels_test) return model, ClassificationMetric(predictions, labels_test)
def knn_regressor(data, outputs, n_neighbors = 5, test_size = 0.3, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None): def knn_regressor(data, outputs, n_neighbors = 5, test_size = 0.3, **kwargs):
data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1) data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1)
model = sklearn.neighbors.KNeighborsRegressor(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs) model = sklearn.neighbors.KNeighborsRegressor(n_neighbors = n_neighbors, **kwargs)
model.fit(data_train, outputs_train) model.fit(data_train, outputs_train)
predictions = model.predict(data_test) predictions = model.predict(data_test)

View File

@ -1,25 +0,0 @@
# Only included for backwards compatibility! Do not update, NaiveBayes is preferred and supported.
import sklearn
from sklearn import model_selection, neighbors
from . import ClassificationMetric, RegressionMetric
class KNN:
def knn_classifier(self, data, labels, n_neighbors, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
model = sklearn.neighbors.KNeighborsClassifier()
model.fit(data_train, labels_train)
predictions = model.predict(data_test)
return model, ClassificationMetric(predictions, labels_test)
def knn_regressor(self, data, outputs, n_neighbors, test_size = 0.3, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None):
data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1)
model = sklearn.neighbors.KNeighborsRegressor(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs)
model.fit(data_train, outputs_train)
predictions = model.predict(data_test)
return model, RegressionMetric(predictions, outputs_test)

View File

@ -4,9 +4,13 @@
# this should be imported as a python module using 'from tra_analysis import NaiveBayes' # this should be imported as a python module using 'from tra_analysis import NaiveBayes'
# setup: # setup:
__version__ = "1.0.0" __version__ = "1.0.2"
__changelog__ = """changelog: __changelog__ = """changelog:
1.0.2:
- generalized optional args to **kwargs
1.0.1:
- optimized imports
1.0.0: 1.0.0:
- ported analysis.NaiveBayes() here - ported analysis.NaiveBayes() here
- removed classness - removed classness
@ -18,46 +22,45 @@ __author__ = (
__all__ = [ __all__ = [
'gaussian', 'gaussian',
'multinomial' 'multinomial',
'bernoulli', 'bernoulli',
'complement' 'complement',
] ]
import sklearn import sklearn
from sklearn import model_selection, naive_bayes from . import ClassificationMetric
from . import ClassificationMetric, RegressionMetric
def gaussian(data, labels, test_size = 0.3, priors = None, var_smoothing = 1e-09): def gaussian(data, labels, test_size = 0.3, **kwargs):
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
model = sklearn.naive_bayes.GaussianNB(priors = priors, var_smoothing = var_smoothing) model = sklearn.naive_bayes.GaussianNB(**kwargs)
model.fit(data_train, labels_train) model.fit(data_train, labels_train)
predictions = model.predict(data_test) predictions = model.predict(data_test)
return model, ClassificationMetric(predictions, labels_test) return model, ClassificationMetric(predictions, labels_test)
def multinomial(data, labels, test_size = 0.3, alpha=1.0, fit_prior=True, class_prior=None): def multinomial(data, labels, test_size = 0.3, **kwargs):
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
model = sklearn.naive_bayes.MultinomialNB(alpha = alpha, fit_prior = fit_prior, class_prior = class_prior) model = sklearn.naive_bayes.MultinomialNB(**kwargs)
model.fit(data_train, labels_train) model.fit(data_train, labels_train)
predictions = model.predict(data_test) predictions = model.predict(data_test)
return model, ClassificationMetric(predictions, labels_test) return model, ClassificationMetric(predictions, labels_test)
def bernoulli(data, labels, test_size = 0.3, alpha=1.0, binarize=0.0, fit_prior=True, class_prior=None): def bernoulli(data, labels, test_size = 0.3, **kwargs):
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
model = sklearn.naive_bayes.BernoulliNB(alpha = alpha, binarize = binarize, fit_prior = fit_prior, class_prior = class_prior) model = sklearn.naive_bayes.BernoulliNB(**kwargs)
model.fit(data_train, labels_train) model.fit(data_train, labels_train)
predictions = model.predict(data_test) predictions = model.predict(data_test)
return model, ClassificationMetric(predictions, labels_test) return model, ClassificationMetric(predictions, labels_test)
def complement(data, labels, test_size = 0.3, alpha=1.0, fit_prior=True, class_prior=None, norm=False): def complement(data, labels, test_size = 0.3, **kwargs):
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
model = sklearn.naive_bayes.ComplementNB(alpha = alpha, fit_prior = fit_prior, class_prior = class_prior, norm = norm) model = sklearn.naive_bayes.ComplementNB(**kwargs)
model.fit(data_train, labels_train) model.fit(data_train, labels_train)
predictions = model.predict(data_test) predictions = model.predict(data_test)

View File

@ -1,43 +0,0 @@
# Only included for backwards compatibility! Do not update, NaiveBayes is preferred and supported.
import sklearn
from sklearn import model_selection, naive_bayes
from . import ClassificationMetric, RegressionMetric
class NaiveBayes:
def guassian(self, data, labels, test_size = 0.3, priors = None, var_smoothing = 1e-09):
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
model = sklearn.naive_bayes.GaussianNB(priors = priors, var_smoothing = var_smoothing)
model.fit(data_train, labels_train)
predictions = model.predict(data_test)
return model, ClassificationMetric(predictions, labels_test)
def multinomial(self, data, labels, test_size = 0.3, alpha=1.0, fit_prior=True, class_prior=None):
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
model = sklearn.naive_bayes.MultinomialNB(alpha = alpha, fit_prior = fit_prior, class_prior = class_prior)
model.fit(data_train, labels_train)
predictions = model.predict(data_test)
return model, ClassificationMetric(predictions, labels_test)
def bernoulli(self, data, labels, test_size = 0.3, alpha=1.0, binarize=0.0, fit_prior=True, class_prior=None):
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
model = sklearn.naive_bayes.BernoulliNB(alpha = alpha, binarize = binarize, fit_prior = fit_prior, class_prior = class_prior)
model.fit(data_train, labels_train)
predictions = model.predict(data_test)
return model, ClassificationMetric(predictions, labels_test)
def complement(self, data, labels, test_size = 0.3, alpha=1.0, fit_prior=True, class_prior=None, norm=False):
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
model = sklearn.naive_bayes.ComplementNB(alpha = alpha, fit_prior = fit_prior, class_prior = class_prior, norm = norm)
model.fit(data_train, labels_train)
predictions = model.predict(data_test)
return model, ClassificationMetric(predictions, labels_test)

View File

@ -4,9 +4,14 @@
# this should be imported as a python module using 'from tra_analysis import RandomForest' # this should be imported as a python module using 'from tra_analysis import RandomForest'
# setup: # setup:
__version__ = "1.0.1" __version__ = "1.0.3"
__changelog__ = """changelog: __changelog__ = """changelog:
1.0.3:
- updated RandomForestClassifier and RandomForestRegressor parameters to match sklearn v 1.0.2
- changed default values for kwargs to rely on sklearn
1.0.2:
- optimized imports
1.0.1: 1.0.1:
- fixed __all__ - fixed __all__
1.0.0: 1.0.0:
@ -23,23 +28,22 @@ __all__ = [
"random_forest_regressor", "random_forest_regressor",
] ]
import sklearn import sklearn, sklearn.ensemble, sklearn.naive_bayes
from sklearn import ensemble, model_selection
from . import ClassificationMetric, RegressionMetric from . import ClassificationMetric, RegressionMetric
def random_forest_classifier(data, labels, test_size, n_estimators, criterion="gini", max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features="auto", max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False, class_weight=None): def random_forest_classifier(data, labels, test_size, n_estimators, **kwargs):
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1) data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
kernel = sklearn.ensemble.RandomForestClassifier(n_estimators = n_estimators, criterion = criterion, max_depth = max_depth, min_samples_split = min_samples_split, min_samples_leaf = min_samples_leaf, min_weight_fraction_leaf = min_weight_fraction_leaf, max_leaf_nodes = max_leaf_nodes, min_impurity_decrease = min_impurity_decrease, bootstrap = bootstrap, oob_score = oob_score, n_jobs = n_jobs, random_state = random_state, verbose = verbose, warm_start = warm_start, class_weight = class_weight) kernel = sklearn.ensemble.RandomForestClassifier(n_estimators = n_estimators, **kwargs)
kernel.fit(data_train, labels_train) kernel.fit(data_train, labels_train)
predictions = kernel.predict(data_test) predictions = kernel.predict(data_test)
return kernel, ClassificationMetric(predictions, labels_test) return kernel, ClassificationMetric(predictions, labels_test)
def random_forest_regressor(data, outputs, test_size, n_estimators, criterion="mse", max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features="auto", max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False): def random_forest_regressor(data, outputs, test_size, n_estimators, **kwargs):
data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1) data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1)
kernel = sklearn.ensemble.RandomForestRegressor(n_estimators = n_estimators, criterion = criterion, max_depth = max_depth, min_samples_split = min_samples_split, min_weight_fraction_leaf = min_weight_fraction_leaf, max_features = max_features, max_leaf_nodes = max_leaf_nodes, min_impurity_decrease = min_impurity_decrease, min_impurity_split = min_impurity_split, bootstrap = bootstrap, oob_score = oob_score, n_jobs = n_jobs, random_state = random_state, verbose = verbose, warm_start = warm_start) kernel = sklearn.ensemble.RandomForestRegressor(n_estimators = n_estimators, **kwargs)
kernel.fit(data_train, outputs_train) kernel.fit(data_train, outputs_train)
predictions = kernel.predict(data_test) predictions = kernel.predict(data_test)

View File

@ -1,25 +0,0 @@
# Only included for backwards compatibility! Do not update, RandomForest is preferred and supported.
import sklearn
from sklearn import ensemble, model_selection
from . import ClassificationMetric, RegressionMetric
class RandomForest:
def random_forest_classifier(self, data, labels, test_size, n_estimators, criterion="gini", max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features="auto", max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False, class_weight=None):
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
kernel = sklearn.ensemble.RandomForestClassifier(n_estimators = n_estimators, criterion = criterion, max_depth = max_depth, min_samples_split = min_samples_split, min_samples_leaf = min_samples_leaf, min_weight_fraction_leaf = min_weight_fraction_leaf, max_leaf_nodes = max_leaf_nodes, min_impurity_decrease = min_impurity_decrease, bootstrap = bootstrap, oob_score = oob_score, n_jobs = n_jobs, random_state = random_state, verbose = verbose, warm_start = warm_start, class_weight = class_weight)
kernel.fit(data_train, labels_train)
predictions = kernel.predict(data_test)
return kernel, ClassificationMetric(predictions, labels_test)
def random_forest_regressor(self, data, outputs, test_size, n_estimators, criterion="mse", max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features="auto", max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False):
data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1)
kernel = sklearn.ensemble.RandomForestRegressor(n_estimators = n_estimators, criterion = criterion, max_depth = max_depth, min_samples_split = min_samples_split, min_weight_fraction_leaf = min_weight_fraction_leaf, max_features = max_features, max_leaf_nodes = max_leaf_nodes, min_impurity_decrease = min_impurity_decrease, min_impurity_split = min_impurity_split, bootstrap = bootstrap, oob_score = oob_score, n_jobs = n_jobs, random_state = random_state, verbose = verbose, warm_start = warm_start)
kernel.fit(data_train, outputs_train)
predictions = kernel.predict(data_test)
return kernel, RegressionMetric(predictions, outputs_test)

View File

@ -4,9 +4,11 @@
# this should be imported as a python module using 'from tra_analysis import RegressionMetric' # this should be imported as a python module using 'from tra_analysis import RegressionMetric'
# setup: # setup:
__version__ = "1.0.0" __version__ = "1.0.1"
__changelog__ = """changelog: __changelog__ = """changelog:
1.0.1:
- optimized imports
1.0.0: 1.0.0:
- ported analysis.RegressionMetric() here - ported analysis.RegressionMetric() here
""" """
@ -21,7 +23,6 @@ __all__ = [
import numpy as np import numpy as np
import sklearn import sklearn
from sklearn import metrics
class RegressionMetric(): class RegressionMetric():

View File

@ -4,9 +4,11 @@
# this should be imported as a python module using 'from tra_analysis import SVM' # this should be imported as a python module using 'from tra_analysis import SVM'
# setup: # setup:
__version__ = "1.0.2" __version__ = "1.0.3"
__changelog__ = """changelog: __changelog__ = """changelog:
1.0.3:
- optimized imports
1.0.2: 1.0.2:
- fixed __all__ - fixed __all__
1.0.1: 1.0.1:
@ -30,7 +32,6 @@ __all__ = [
] ]
import sklearn import sklearn
from sklearn import svm
from . import ClassificationMetric, RegressionMetric from . import ClassificationMetric, RegressionMetric
class CustomKernel: class CustomKernel:

View File

@ -16,7 +16,7 @@ __changelog__ = """changelog:
__author__ = ( __author__ = (
"Arthur Lu <learthurgo@gmail.com>", "Arthur Lu <learthurgo@gmail.com>",
"James Pan <zpan@imsa.edu>" "James Pan <zpan@imsa.edu>",
) )
__all__ = [ __all__ = [

View File

@ -1,391 +0,0 @@
# Only included for backwards compatibility! Do not update, Sort is preferred and supported.
class Sort: # if you haven't used a sort, then you've never lived
def quicksort(self, a):
def sort(array):
less = []
equal = []
greater = []
if len(array) > 1:
pivot = array[0]
for x in array:
if x < pivot:
less.append(x)
elif x == pivot:
equal.append(x)
elif x > pivot:
greater.append(x)
return sort(less)+equal+sort(greater)
else:
return array
return np.array(sort(a))
def mergesort(self, a):
def sort(array):
array = array
if len(array) >1:
middle = len(array) // 2
L = array[:middle]
R = array[middle:]
sort(L)
sort(R)
i = j = k = 0
while i < len(L) and j < len(R):
if L[i] < R[j]:
array[k] = L[i]
i+= 1
else:
array[k] = R[j]
j+= 1
k+= 1
while i < len(L):
array[k] = L[i]
i+= 1
k+= 1
while j < len(R):
array[k] = R[j]
j+= 1
k+= 1
return array
return sort(a)
def introsort(self, a):
def sort(array, start, end, maxdepth):
array = array
if end - start <= 1:
return
elif maxdepth == 0:
heapsort(array, start, end)
else:
p = partition(array, start, end)
sort(array, start, p + 1, maxdepth - 1)
sort(array, p + 1, end, maxdepth - 1)
return array
def partition(array, start, end):
pivot = array[start]
i = start - 1
j = end
while True:
i = i + 1
while array[i] < pivot:
i = i + 1
j = j - 1
while array[j] > pivot:
j = j - 1
if i >= j:
return j
swap(array, i, j)
def swap(array, i, j):
array[i], array[j] = array[j], array[i]
def heapsort(array, start, end):
build_max_heap(array, start, end)
for i in range(end - 1, start, -1):
swap(array, start, i)
max_heapify(array, index=0, start=start, end=i)
def build_max_heap(array, start, end):
def parent(i):
return (i - 1)//2
length = end - start
index = parent(length - 1)
while index >= 0:
max_heapify(array, index, start, end)
index = index - 1
def max_heapify(array, index, start, end):
def left(i):
return 2*i + 1
def right(i):
return 2*i + 2
size = end - start
l = left(index)
r = right(index)
if (l < size and array[start + l] > array[start + index]):
largest = l
else:
largest = index
if (r < size and array[start + r] > array[start + largest]):
largest = r
if largest != index:
swap(array, start + largest, start + index)
max_heapify(array, largest, start, end)
maxdepth = (len(a).bit_length() - 1)*2
return sort(a, 0, len(a), maxdepth)
def heapsort(self, a):
def sort(array):
array = array
n = len(array)
for i in range(n//2 - 1, -1, -1):
heapify(array, n, i)
for i in range(n-1, 0, -1):
array[i], array[0] = array[0], array[i]
heapify(array, i, 0)
return array
def heapify(array, n, i):
array = array
largest = i
l = 2 * i + 1
r = 2 * i + 2
if l < n and array[i] < array[l]:
largest = l
if r < n and array[largest] < array[r]:
largest = r
if largest != i:
array[i],array[largest] = array[largest],array[i]
heapify(array, n, largest)
return array
return sort(a)
def insertionsort(self, a):
def sort(array):
array = array
for i in range(1, len(array)):
key = array[i]
j = i-1
while j >=0 and key < array[j] :
array[j+1] = array[j]
j -= 1
array[j+1] = key
return array
return sort(a)
def timsort(self, a, block = 32):
BLOCK = block
def sort(array, n):
array = array
for i in range(0, n, BLOCK):
insertionsort(array, i, min((i+31), (n-1)))
size = BLOCK
while size < n:
for left in range(0, n, 2*size):
mid = left + size - 1
right = min((left + 2*size - 1), (n-1))
merge(array, left, mid, right)
size = 2*size
return array
def insertionsort(array, left, right):
array = array
for i in range(left + 1, right+1):
temp = array[i]
j = i - 1
while j >= left and array[j] > temp :
array[j+1] = array[j]
j -= 1
array[j+1] = temp
return array
def merge(array, l, m, r):
len1, len2 = m - l + 1, r - m
left, right = [], []
for i in range(0, len1):
left.append(array[l + i])
for i in range(0, len2):
right.append(array[m + 1 + i])
i, j, k = 0, 0, l
while i < len1 and j < len2:
if left[i] <= right[j]:
array[k] = left[i]
i += 1
else:
array[k] = right[j]
j += 1
k += 1
while i < len1:
array[k] = left[i]
k += 1
i += 1
while j < len2:
array[k] = right[j]
k += 1
j += 1
return sort(a, len(a))
def selectionsort(self, a):
array = a
for i in range(len(array)):
min_idx = i
for j in range(i+1, len(array)):
if array[min_idx] > array[j]:
min_idx = j
array[i], array[min_idx] = array[min_idx], array[i]
return array
def shellsort(self, a):
array = a
n = len(array)
gap = n//2
while gap > 0:
for i in range(gap,n):
temp = array[i]
j = i
while j >= gap and array[j-gap] >temp:
array[j] = array[j-gap]
j -= gap
array[j] = temp
gap //= 2
return array
def bubblesort(self, a):
def sort(array):
for i, num in enumerate(array):
try:
if array[i+1] < num:
array[i] = array[i+1]
array[i+1] = num
sort(array)
except IndexError:
pass
return array
return sort(a)
def cyclesort(self, a):
def sort(array):
array = array
writes = 0
for cycleStart in range(0, len(array) - 1):
item = array[cycleStart]
pos = cycleStart
for i in range(cycleStart + 1, len(array)):
if array[i] < item:
pos += 1
if pos == cycleStart:
continue
while item == array[pos]:
pos += 1
array[pos], item = item, array[pos]
writes += 1
while pos != cycleStart:
pos = cycleStart
for i in range(cycleStart + 1, len(array)):
if array[i] < item:
pos += 1
while item == array[pos]:
pos += 1
array[pos], item = item, array[pos]
writes += 1
return array
return sort(a)
def cocktailsort(self, a):
def sort(array):
array = array
n = len(array)
swapped = True
start = 0
end = n-1
while (swapped == True):
swapped = False
for i in range (start, end):
if (array[i] > array[i + 1]) :
array[i], array[i + 1]= array[i + 1], array[i]
swapped = True
if (swapped == False):
break
swapped = False
end = end-1
for i in range(end-1, start-1, -1):
if (array[i] > array[i + 1]):
array[i], array[i + 1] = array[i + 1], array[i]
swapped = True
start = start + 1
return array
return sort(a)

View File

@ -4,9 +4,11 @@
# this should be imported as a python module using 'from tra_analysis import StatisticalTest' # this should be imported as a python module using 'from tra_analysis import StatisticalTest'
# setup: # setup:
__version__ = "1.0.2" __version__ = "1.0.3"
__changelog__ = """changelog: __changelog__ = """changelog:
1.0.3:
- optimized imports
1.0.2: 1.0.2:
- added tukey_multicomparison - added tukey_multicomparison
- fixed styling - fixed styling
@ -61,7 +63,6 @@ __all__ = [
import numpy as np import numpy as np
import scipy import scipy
from scipy import stats, interpolate
def ttest_onesample(a, popmean, axis = 0, nan_policy = 'propagate'): def ttest_onesample(a, popmean, axis = 0, nan_policy = 'propagate'):
@ -279,9 +280,9 @@ def get_tukeyQcrit(k, df, alpha=0.05):
cv001 = c[:, 2::2] cv001 = c[:, 2::2]
if alpha == 0.05: if alpha == 0.05:
intp = interpolate.interp1d(crows, cv005[:,k-2]) intp = scipy.interpolate.interp1d(crows, cv005[:,k-2])
elif alpha == 0.01: elif alpha == 0.01:
intp = interpolate.interp1d(crows, cv001[:,k-2]) intp = scipy.interpolate.interp1d(crows, cv001[:,k-2])
else: else:
raise ValueError('only implemented for alpha equal to 0.01 and 0.05') raise ValueError('only implemented for alpha equal to 0.01 and 0.05')
return intp(df) return intp(df)

View File

@ -1,170 +0,0 @@
# Only included for backwards compatibility! Do not update, StatisticalTest is preferred and supported.
import scipy
from scipy import stats
class StatisticalTest:
def ttest_onesample(self, a, popmean, axis = 0, nan_policy = 'propagate'):
results = scipy.stats.ttest_1samp(a, popmean, axis = axis, nan_policy = nan_policy)
return {"t-value": results[0], "p-value": results[1]}
def ttest_independent(self, a, b, equal = True, nan_policy = 'propagate'):
results = scipy.stats.ttest_ind(a, b, equal_var = equal, nan_policy = nan_policy)
return {"t-value": results[0], "p-value": results[1]}
def ttest_statistic(self, o1, o2, equal = True):
results = scipy.stats.ttest_ind_from_stats(o1["mean"], o1["std"], o1["nobs"], o2["mean"], o2["std"], o2["nobs"], equal_var = equal)
return {"t-value": results[0], "p-value": results[1]}
def ttest_related(self, a, b, axis = 0, nan_policy='propagate'):
results = scipy.stats.ttest_rel(a, b, axis = axis, nan_policy = nan_policy)
return {"t-value": results[0], "p-value": results[1]}
def ks_fitness(self, rvs, cdf, args = (), N = 20, alternative = 'two-sided', mode = 'approx'):
results = scipy.stats.kstest(rvs, cdf, args = args, N = N, alternative = alternative, mode = mode)
return {"ks-value": results[0], "p-value": results[1]}
def chisquare(self, f_obs, f_exp = None, ddof = None, axis = 0):
results = scipy.stats.chisquare(f_obs, f_exp = f_exp, ddof = ddof, axis = axis)
return {"chisquared-value": results[0], "p-value": results[1]}
def powerdivergence(self, f_obs, f_exp = None, ddof = None, axis = 0, lambda_ = None):
results = scipy.stats.power_divergence(f_obs, f_exp = f_exp, ddof = ddof, axis = axis, lambda_ = lambda_)
return {"powerdivergence-value": results[0], "p-value": results[1]}
def ks_twosample(self, x, y, alternative = 'two_sided', mode = 'auto'):
results = scipy.stats.ks_2samp(x, y, alternative = alternative, mode = mode)
return {"ks-value": results[0], "p-value": results[1]}
def es_twosample(self, x, y, t = (0.4, 0.8)):
results = scipy.stats.epps_singleton_2samp(x, y, t = t)
return {"es-value": results[0], "p-value": results[1]}
def mw_rank(self, x, y, use_continuity = True, alternative = None):
results = scipy.stats.mannwhitneyu(x, y, use_continuity = use_continuity, alternative = alternative)
return {"u-value": results[0], "p-value": results[1]}
def mw_tiecorrection(self, rank_values):
results = scipy.stats.tiecorrect(rank_values)
return {"correction-factor": results}
def rankdata(self, a, method = 'average'):
results = scipy.stats.rankdata(a, method = method)
return results
def wilcoxon_ranksum(self, a, b): # this seems to be superceded by Mann Whitney Wilcoxon U Test
results = scipy.stats.ranksums(a, b)
return {"u-value": results[0], "p-value": results[1]}
def wilcoxon_signedrank(self, x, y = None, zero_method = 'wilcox', correction = False, alternative = 'two-sided'):
results = scipy.stats.wilcoxon(x, y = y, zero_method = zero_method, correction = correction, alternative = alternative)
return {"t-value": results[0], "p-value": results[1]}
def kw_htest(self, *args, nan_policy = 'propagate'):
results = scipy.stats.kruskal(*args, nan_policy = nan_policy)
return {"h-value": results[0], "p-value": results[1]}
def friedman_chisquare(self, *args):
results = scipy.stats.friedmanchisquare(*args)
return {"chisquared-value": results[0], "p-value": results[1]}
def bm_wtest(self, x, y, alternative = 'two-sided', distribution = 't', nan_policy = 'propagate'):
results = scipy.stats.brunnermunzel(x, y, alternative = alternative, distribution = distribution, nan_policy = nan_policy)
return {"w-value": results[0], "p-value": results[1]}
def combine_pvalues(self, pvalues, method = 'fisher', weights = None):
results = scipy.stats.combine_pvalues(pvalues, method = method, weights = weights)
return {"combined-statistic": results[0], "p-value": results[1]}
def jb_fitness(self, x):
results = scipy.stats.jarque_bera(x)
return {"jb-value": results[0], "p-value": results[1]}
def ab_equality(self, x, y):
results = scipy.stats.ansari(x, y)
return {"ab-value": results[0], "p-value": results[1]}
def bartlett_variance(self, *args):
results = scipy.stats.bartlett(*args)
return {"t-value": results[0], "p-value": results[1]}
def levene_variance(self, *args, center = 'median', proportiontocut = 0.05):
results = scipy.stats.levene(*args, center = center, proportiontocut = proportiontocut)
return {"w-value": results[0], "p-value": results[1]}
def sw_normality(self, x):
results = scipy.stats.shapiro(x)
return {"w-value": results[0], "p-value": results[1]}
def shapiro(self, x):
return "destroyed by facts and logic"
def ad_onesample(self, x, dist = 'norm'):
results = scipy.stats.anderson(x, dist = dist)
return {"d-value": results[0], "critical-values": results[1], "significance-value": results[2]}
def ad_ksample(self, samples, midrank = True):
results = scipy.stats.anderson_ksamp(samples, midrank = midrank)
return {"d-value": results[0], "critical-values": results[1], "significance-value": results[2]}
def binomial(self, x, n = None, p = 0.5, alternative = 'two-sided'):
results = scipy.stats.binom_test(x, n = n, p = p, alternative = alternative)
return {"p-value": results}
def fk_variance(self, *args, center = 'median', proportiontocut = 0.05):
results = scipy.stats.fligner(*args, center = center, proportiontocut = proportiontocut)
return {"h-value": results[0], "p-value": results[1]} # unknown if the statistic is an h value
def mood_mediantest(self, *args, ties = 'below', correction = True, lambda_ = 1, nan_policy = 'propagate'):
results = scipy.stats.median_test(*args, ties = ties, correction = correction, lambda_ = lambda_, nan_policy = nan_policy)
return {"chisquared-value": results[0], "p-value": results[1], "m-value": results[2], "table": results[3]}
def mood_equalscale(self, x, y, axis = 0):
results = scipy.stats.mood(x, y, axis = axis)
return {"z-score": results[0], "p-value": results[1]}
def skewtest(self, a, axis = 0, nan_policy = 'propogate'):
results = scipy.stats.skewtest(a, axis = axis, nan_policy = nan_policy)
return {"z-score": results[0], "p-value": results[1]}
def kurtosistest(self, a, axis = 0, nan_policy = 'propogate'):
results = scipy.stats.kurtosistest(a, axis = axis, nan_policy = nan_policy)
return {"z-score": results[0], "p-value": results[1]}
def normaltest(self, a, axis = 0, nan_policy = 'propogate'):
results = scipy.stats.normaltest(a, axis = axis, nan_policy = nan_policy)
return {"z-score": results[0], "p-value": results[1]}

View File

@ -7,10 +7,17 @@
# current benchmark of optimization: 1.33 times faster # current benchmark of optimization: 1.33 times faster
# setup: # setup:
__version__ = "3.0.0" __version__ = "4.0.0-dev"
# changelog should be viewed using print(analysis.__changelog__) # changelog should be viewed using print(analysis.__changelog__)
__changelog__ = """changelog: __changelog__ = """changelog:
4.0.0:
- deprecated all *_obj.py compatibility modules
- deprecated titanlearn.py
- deprecated visualization.py
- removed matplotlib from requirements
- removed extra submodule imports in Analysis
- added typehinting, docstrings for each function
3.0.0: 3.0.0:
- incremented version to release 3.0.0 - incremented version to release 3.0.0
3.0.0-rc2: 3.0.0-rc2:
@ -40,6 +47,7 @@ __all__ = [
"Analysis", "Analysis",
"Array", "Array",
"ClassificationMetric", "ClassificationMetric",
"Clustering",
"CorrelationTest", "CorrelationTest",
"Expression", "Expression",
"Fit", "Fit",
@ -53,9 +61,9 @@ __all__ = [
] ]
from . import Analysis as Analysis from . import Analysis as Analysis
from . import Analysis as analysis
from .Array import Array from .Array import Array
from .ClassificationMetric import ClassificationMetric from .ClassificationMetric import ClassificationMetric
from . import Clustering
from . import CorrelationTest from . import CorrelationTest
from .equation import Expression from .equation import Expression
from . import Fit from . import Fit

View File

@ -0,0 +1,24 @@
# Titan Robotics Team 2022: Metrics submodule
# Written by Arthur Lu
# Notes:
# this should be imported as a python module using 'from tra_analysis import metrics'
# setup:
__version__ = "1.0.0"
__changelog__ = """changelog:
1.0.0:
- implemented elo, glicko2, trueskill
"""
__author__ = (
"Arthur Lu <learthurgo@gmail.com>",
)
__all__ = {
"Expression"
}
from . import elo
from . import glicko2
from . import trueskill

View File

@ -1,222 +0,0 @@
# Titan Robotics Team 2022: CUDA-based Regressions Module
# Not actively maintained, may be removed in future release
# Written by Arthur Lu & Jacob Levine
# Notes:
# this module has been automatically inegrated into analysis.py, and should be callable as a class from the package
# this module is cuda-optimized (as appropriate) and vectorized (except for one small part)
# setup:
__version__ = "0.0.4"
# changelog should be viewed using print(analysis.regression.__changelog__)
__changelog__ = """
0.0.4:
- bug fixes
- fixed changelog
0.0.3:
- bug fixes
0.0.2:
-Added more parameters to log, exponential, polynomial
-Added SigmoidalRegKernelArthur, because Arthur apparently needs
to train the scaling and shifting of sigmoids
0.0.1:
-initial release, with linear, log, exponential, polynomial, and sigmoid kernels
-already vectorized (except for polynomial generation) and CUDA-optimized
"""
__author__ = (
"Jacob Levine <jlevine@imsa.edu>",
"Arthur Lu <learthurgo@gmail.com>",
)
__all__ = [
'factorial',
'take_all_pwrs',
'num_poly_terms',
'set_device',
'LinearRegKernel',
'SigmoidalRegKernel',
'LogRegKernel',
'PolyRegKernel',
'ExpRegKernel',
'SigmoidalRegKernelArthur',
'SGDTrain',
'CustomTrain',
'CircleFit'
]
import torch
global device
device = "cuda:0" if torch.cuda.is_available() else "cpu"
#todo: document completely
def set_device(self, new_device):
device=new_device
class LinearRegKernel():
parameters= []
weights=None
bias=None
def __init__(self, num_vars):
self.weights=torch.rand(num_vars, requires_grad=True, device=device)
self.bias=torch.rand(1, requires_grad=True, device=device)
self.parameters=[self.weights,self.bias]
def forward(self,mtx):
long_bias=self.bias.repeat([1,mtx.size()[1]])
return torch.matmul(self.weights,mtx)+long_bias
class SigmoidalRegKernel():
parameters= []
weights=None
bias=None
sigmoid=torch.nn.Sigmoid()
def __init__(self, num_vars):
self.weights=torch.rand(num_vars, requires_grad=True, device=device)
self.bias=torch.rand(1, requires_grad=True, device=device)
self.parameters=[self.weights,self.bias]
def forward(self,mtx):
long_bias=self.bias.repeat([1,mtx.size()[1]])
return self.sigmoid(torch.matmul(self.weights,mtx)+long_bias)
class SigmoidalRegKernelArthur():
parameters= []
weights=None
in_bias=None
scal_mult=None
out_bias=None
sigmoid=torch.nn.Sigmoid()
def __init__(self, num_vars):
self.weights=torch.rand(num_vars, requires_grad=True, device=device)
self.in_bias=torch.rand(1, requires_grad=True, device=device)
self.scal_mult=torch.rand(1, requires_grad=True, device=device)
self.out_bias=torch.rand(1, requires_grad=True, device=device)
self.parameters=[self.weights,self.in_bias, self.scal_mult, self.out_bias]
def forward(self,mtx):
long_in_bias=self.in_bias.repeat([1,mtx.size()[1]])
long_out_bias=self.out_bias.repeat([1,mtx.size()[1]])
return (self.scal_mult*self.sigmoid(torch.matmul(self.weights,mtx)+long_in_bias))+long_out_bias
class LogRegKernel():
parameters= []
weights=None
in_bias=None
scal_mult=None
out_bias=None
def __init__(self, num_vars):
self.weights=torch.rand(num_vars, requires_grad=True, device=device)
self.in_bias=torch.rand(1, requires_grad=True, device=device)
self.scal_mult=torch.rand(1, requires_grad=True, device=device)
self.out_bias=torch.rand(1, requires_grad=True, device=device)
self.parameters=[self.weights,self.in_bias, self.scal_mult, self.out_bias]
def forward(self,mtx):
long_in_bias=self.in_bias.repeat([1,mtx.size()[1]])
long_out_bias=self.out_bias.repeat([1,mtx.size()[1]])
return (self.scal_mult*torch.log(torch.matmul(self.weights,mtx)+long_in_bias))+long_out_bias
class ExpRegKernel():
parameters= []
weights=None
in_bias=None
scal_mult=None
out_bias=None
def __init__(self, num_vars):
self.weights=torch.rand(num_vars, requires_grad=True, device=device)
self.in_bias=torch.rand(1, requires_grad=True, device=device)
self.scal_mult=torch.rand(1, requires_grad=True, device=device)
self.out_bias=torch.rand(1, requires_grad=True, device=device)
self.parameters=[self.weights,self.in_bias, self.scal_mult, self.out_bias]
def forward(self,mtx):
long_in_bias=self.in_bias.repeat([1,mtx.size()[1]])
long_out_bias=self.out_bias.repeat([1,mtx.size()[1]])
return (self.scal_mult*torch.exp(torch.matmul(self.weights,mtx)+long_in_bias))+long_out_bias
class PolyRegKernel():
parameters= []
weights=None
bias=None
power=None
def __init__(self, num_vars, power):
self.power=power
num_terms=self.num_poly_terms(num_vars, power)
self.weights=torch.rand(num_terms, requires_grad=True, device=device)
self.bias=torch.rand(1, requires_grad=True, device=device)
self.parameters=[self.weights,self.bias]
def num_poly_terms(self,num_vars, power):
if power == 0:
return 0
return int(self.factorial(num_vars+power-1) / self.factorial(power) / self.factorial(num_vars-1)) + self.num_poly_terms(num_vars, power-1)
def factorial(self,n):
if n==0:
return 1
else:
return n*self.factorial(n-1)
def take_all_pwrs(self, vec, pwr):
#todo: vectorize (kinda)
combins=torch.combinations(vec, r=pwr, with_replacement=True)
out=torch.ones(combins.size()[0]).to(device).to(torch.float)
for i in torch.t(combins).to(device).to(torch.float):
out *= i
if pwr == 1:
return out
else:
return torch.cat((out,self.take_all_pwrs(vec, pwr-1)))
def forward(self,mtx):
#TODO: Vectorize the last part
cols=[]
for i in torch.t(mtx):
cols.append(self.take_all_pwrs(i,self.power))
new_mtx=torch.t(torch.stack(cols))
long_bias=self.bias.repeat([1,mtx.size()[1]])
return torch.matmul(self.weights,new_mtx)+long_bias
def SGDTrain(self, kernel, data, ground, loss=torch.nn.MSELoss(), iterations=1000, learning_rate=.1, return_losses=False):
optim=torch.optim.SGD(kernel.parameters, lr=learning_rate)
data_cuda=data.to(device)
ground_cuda=ground.to(device)
if (return_losses):
losses=[]
for i in range(iterations):
with torch.set_grad_enabled(True):
optim.zero_grad()
pred=kernel.forward(data_cuda)
ls=loss(pred,ground_cuda)
losses.append(ls.item())
ls.backward()
optim.step()
return [kernel,losses]
else:
for i in range(iterations):
with torch.set_grad_enabled(True):
optim.zero_grad()
pred=kernel.forward(data_cuda)
ls=loss(pred,ground_cuda)
ls.backward()
optim.step()
return kernel
def CustomTrain(self, kernel, optim, data, ground, loss=torch.nn.MSELoss(), iterations=1000, return_losses=False):
data_cuda=data.to(device)
ground_cuda=ground.to(device)
if (return_losses):
losses=[]
for i in range(iterations):
with torch.set_grad_enabled(True):
optim.zero_grad()
pred=kernel.forward(data)
ls=loss(pred,ground)
losses.append(ls.item())
ls.backward()
optim.step()
return [kernel,losses]
else:
for i in range(iterations):
with torch.set_grad_enabled(True):
optim.zero_grad()
pred=kernel.forward(data_cuda)
ls=loss(pred,ground_cuda)
ls.backward()
optim.step()
return kernel

View File

@ -1,122 +0,0 @@
# Titan Robotics Team 2022: ML Module
# Written by Arthur Lu & Jacob Levine
# Notes:
# this should be imported as a python module using 'import titanlearn'
# this should be included in the local directory or environment variable
# this module is optimized for multhreaded computing
# this module learns from its mistakes far faster than 2022's captains
# setup:
__version__ = "1.1.1"
#changelog should be viewed using print(analysis.__changelog__)
__changelog__ = """changelog:
1.1.1:
- removed matplotlib import
- removed graphloss()
1.1.0:
- added net, dataset, dataloader, and stdtrain template definitions
- added graphloss function
1.0.1:
- added clear functions
1.0.0:
- complete rewrite planned
- depreciated 1.0.0.xxx versions
- added simple training loop
0.0.x:
-added generation of ANNS, basic SGD training
"""
__author__ = (
"Arthur Lu <arthurlu@ttic.edu>,"
"Jacob Levine <jlevine@ttic.edu>,"
)
__all__ = [
'clear',
'net',
'dataset',
'dataloader',
'train',
'stdtrainer',
]
import torch
from os import system, name
import numpy as np
def clear():
if name == 'nt':
_ = system('cls')
else:
_ = system('clear')
class net(torch.nn.Module): #template for standard neural net
def __init__(self):
super(Net, self).__init__()
def forward(self, input):
pass
class dataset(torch.utils.data.Dataset): #template for standard dataset
def __init__(self):
super(torch.utils.data.Dataset).__init__()
def __getitem__(self, index):
pass
def __len__(self):
pass
def dataloader(dataset, batch_size, num_workers, shuffle = True):
return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
def train(device, net, epochs, trainloader, optimizer, criterion): #expects standard dataloader, whch returns (inputs, labels)
dataset_len = trainloader.dataset.__len__()
iter_count = 0
running_loss = 0
running_loss_list = []
for epoch in range(epochs): # loop over the dataset multiple times
for i, data in enumerate(trainloader, 0):
inputs = data[0].to(device)
labels = data[1].to(device)
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels.to(torch.float))
loss.backward()
optimizer.step()
# monitoring steps below
iter_count += 1
running_loss += loss.item()
running_loss_list.append(running_loss)
clear()
print("training on: " + device)
print("iteration: " + str(i) + "/" + str(int(dataset_len / trainloader.batch_size)) + " | " + "epoch: " + str(epoch) + "/" + str(epochs))
print("current batch loss: " + str(loss.item))
print("running loss: " + str(running_loss / iter_count))
return net, running_loss_list
print("finished training")
def stdtrainer(net, criterion, optimizer, dataloader, epochs, batch_size):
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = net.to(device)
criterion = criterion.to(device)
optimizer = optimizer.to(device)
trainloader = dataloader
return train(device, net, epochs, trainloader, optimizer, criterion)

View File

@ -1,58 +0,0 @@
# Titan Robotics Team 2022: Visualization Module
# Written by Arthur Lu & Jacob Levine
# Notes:
# this should be imported as a python module using 'import visualization'
# this should be included in the local directory or environment variable
# fancy
# setup:
__version__ = "0.0.1"
#changelog should be viewed using print(analysis.__changelog__)
__changelog__ = """changelog:
0.0.1:
- added graphhistogram function as a fragment of visualize_pit.py
0.0.0:
- created visualization.py
- added graphloss()
- added imports
"""
__author__ = (
"Arthur Lu <arthurlu@ttic.edu>,"
"Jacob Levine <jlevine@ttic.edu>,"
)
__all__ = [
'graphloss',
]
import matplotlib.pyplot as plt
import numpy as np
def graphloss(losses):
x = range(0, len(losses))
plt.plot(x, losses)
plt.show()
def graphhistogram(data, figsize, sharey = True): # expects library with key as variable and contents as occurances
fig, ax = plt.subplots(1, len(data), sharey=sharey, figsize=figsize)
i = 0
for variable in data:
ax[i].hist(data[variable])
ax[i].invert_xaxis()
ax[i].set_xlabel('Variable')
ax[i].set_ylabel('Frequency')
ax[i].set_title(variable)
plt.yticks(np.arange(len(data[variable])))
i+=1
plt.show()