mirror of
https://github.com/titanscouting/tra-analysis.git
synced 2025-09-06 23:17:22 +00:00
Compare commits
58 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
aeb4990c81 | ||
|
4545f5721a | ||
|
8d703b10b3 | ||
|
df305f30f0 | ||
|
a123b71ac9 | ||
|
a02668e59c | ||
|
4d6372f620 | ||
|
9d0b6e68d8 | ||
|
b8d51811e0 | ||
|
7a58cd08e2 | ||
|
337fae68ee | ||
|
5e71d05626 | ||
|
01df42aa49 | ||
|
33eea153c1 | ||
|
114eee5d57 | ||
|
06f008746a | ||
|
4f9c4e0dbb | ||
|
5697e8b79e | ||
|
e054e66743 | ||
|
c914bd3754 | ||
|
6c08885a53 | ||
|
375befd0c4 | ||
|
893d1fb1d0 | ||
|
6a426ae4cd | ||
|
50c064ffa4 | ||
|
1b0a9967c8 | ||
|
2605f7c29f | ||
|
6f5a3edd88 | ||
|
457146b0e4 | ||
|
f7fd8ffcf9 | ||
|
77bc792426 | ||
|
39146cc555 | ||
|
04141bbec8 | ||
|
40e5899972 | ||
|
025c7f9b3c | ||
|
2daa09c040 | ||
|
9776136649 | ||
|
68d27a6302 | ||
|
7fc18b7c35 | ||
|
9b412b51a8 | ||
|
b6ac05a66e | ||
|
435c8a7bc6 | ||
|
a69b18354b | ||
|
7b9e6921d0 | ||
|
fb2800cf9e | ||
|
12cbb21077 | ||
|
46d1a48999 | ||
|
ad0a761d53 | ||
|
43f503a38d | ||
|
d38744438b | ||
|
eb8914aa26 | ||
|
283140094f | ||
|
66ac1c304e | ||
|
0eb9e07711 | ||
|
f56c85b298 | ||
|
6a9a17c5b4 | ||
|
e24c49bedb | ||
|
2daed73aaa |
2
.devcontainer/Dockerfile
Normal file
2
.devcontainer/Dockerfile
Normal file
@@ -0,0 +1,2 @@
|
||||
FROM python
|
||||
WORKDIR ~/
|
26
.devcontainer/devcontainer.json
Normal file
26
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"name": "TRA Analysis Development Environment",
|
||||
"build": {
|
||||
"dockerfile": "Dockerfile",
|
||||
},
|
||||
"settings": {
|
||||
"terminal.integrated.shell.linux": "/bin/bash",
|
||||
"python.pythonPath": "/usr/local/bin/python",
|
||||
"python.linting.enabled": true,
|
||||
"python.linting.pylintEnabled": true,
|
||||
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
|
||||
"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
|
||||
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
|
||||
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
|
||||
"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8",
|
||||
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
|
||||
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
|
||||
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
|
||||
"python.linting.pylintPath": "/usr/local/py-utils/bin/pylint",
|
||||
"python.testing.pytestPath": "/usr/local/py-utils/bin/pytest"
|
||||
},
|
||||
"extensions": [
|
||||
"mhutchie.git-graph",
|
||||
],
|
||||
"postCreateCommand": "pip install -r analysis-master/analysis-amd64/requirements.txt"
|
||||
}
|
5
.gitignore
vendored
5
.gitignore
vendored
@@ -18,4 +18,7 @@ data analysis/arthur_pull.ipynb
|
||||
data analysis/keys.txt
|
||||
data analysis/check_for_new_matches.ipynb
|
||||
data analysis/test.ipynb
|
||||
data analysis/visualize_pit.ipynb
|
||||
data analysis/visualize_pit.ipynb
|
||||
data analysis/config/keys.config
|
||||
analysis-master/analysis/__pycache__/
|
||||
data analysis/__pycache__/
|
@@ -1,3 +1,2 @@
|
||||
# tr2022-strategy
|
||||
Titan Robotics 2022 Strategy Team Repository
|
||||
Use at your own risk
|
||||
# red-alliance-analysis
|
||||
Titan Robotics 2022 Strategy Team Repository for Data Analysis Tools. Included with these tools are the backend data analysis engine formatted as a python package, associated binaries for the analysis package, and premade scripts that can be pulled directly from this repository and will integrate with other Red Alliance applications to quickly deploy FRC scouting tools.
|
||||
|
@@ -1,6 +1,6 @@
|
||||
Metadata-Version: 2.1
|
||||
Name: analysis
|
||||
Version: 1.0.0.6
|
||||
Version: 1.0.0.12
|
||||
Summary: analysis package developed by Titan Scouting for The Red Alliance
|
||||
Home-page: https://github.com/titanscout2022/tr2022-strategy
|
||||
Author: The Titan Scouting Team
|
@@ -3,9 +3,13 @@ analysis/__init__.py
|
||||
analysis/analysis.py
|
||||
analysis/regression.py
|
||||
analysis/titanlearn.py
|
||||
analysis/trueskill.py
|
||||
analysis/visualization.py
|
||||
analysis.egg-info/PKG-INFO
|
||||
analysis.egg-info/SOURCES.txt
|
||||
analysis.egg-info/dependency_links.txt
|
||||
analysis.egg-info/top_level.txt
|
||||
analysis.egg-info/requires.txt
|
||||
analysis.egg-info/top_level.txt
|
||||
analysis/metrics/__init__.py
|
||||
analysis/metrics/elo.py
|
||||
analysis/metrics/glicko2.py
|
||||
analysis/metrics/trueskill.py
|
@@ -0,0 +1,6 @@
|
||||
numba
|
||||
numpy
|
||||
scipy
|
||||
scikit-learn
|
||||
six
|
||||
matplotlib
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,16 +1,53 @@
|
||||
# Titan Robotics Team 2022: Data Analysis Module
|
||||
# Written by Arthur Lu & Jacob Levine
|
||||
# Notes:
|
||||
# this should be imported as a python module using 'import analysis'
|
||||
# this should be imported as a python module using 'from analysis import analysis'
|
||||
# this should be included in the local directory or environment variable
|
||||
# this module has been optimized for multhreaded computing
|
||||
# current benchmark of optimization: 1.33 times faster
|
||||
# setup:
|
||||
|
||||
__version__ = "1.1.13.001"
|
||||
__version__ = "1.2.0.004"
|
||||
|
||||
# changelog should be viewed using print(analysis.__changelog__)
|
||||
__changelog__ = """changelog:
|
||||
1.2.0.004:
|
||||
- fixed __all__ to reflected the correct functions and classes
|
||||
- fixed CorrelationTests and StatisticalTests class functions to require self invocation
|
||||
- added missing math import
|
||||
- fixed KNN class functions to require self invocation
|
||||
- fixed Metrics class functions to require self invocation
|
||||
- various spelling fixes in CorrelationTests and StatisticalTests
|
||||
1.2.0.003:
|
||||
- bug fixes with CorrelationTests and StatisticalTests
|
||||
- moved glicko2 and trueskill to the metrics subpackage
|
||||
- moved elo to a new metrics subpackage
|
||||
1.2.0.002:
|
||||
- fixed docs
|
||||
1.2.0.001:
|
||||
- fixed docs
|
||||
1.2.0.000:
|
||||
- cleaned up wild card imports with scipy and sklearn
|
||||
- added CorrelationTests class
|
||||
- added StatisticalTests class
|
||||
- added several correlation tests to CorrelationTests
|
||||
- added several statistical tests to StatisticalTests
|
||||
1.1.13.009:
|
||||
- moved elo, glicko2, trueskill functions under class Metrics
|
||||
1.1.13.008:
|
||||
- moved Glicko2 to a seperate package
|
||||
1.1.13.007:
|
||||
- fixed bug with trueskill
|
||||
1.1.13.006:
|
||||
- cleaned up imports
|
||||
1.1.13.005:
|
||||
- cleaned up package
|
||||
1.1.13.004:
|
||||
- small fixes to regression to improve performance
|
||||
1.1.13.003:
|
||||
- filtered nans from regression
|
||||
1.1.13.002:
|
||||
- removed torch requirement, and moved Regression back to regression.py
|
||||
1.1.13.001:
|
||||
- bug fix with linear regression not returning a proper value
|
||||
- cleaned up regression
|
||||
@@ -239,29 +276,25 @@ __author__ = (
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'_init_device',
|
||||
'load_csv',
|
||||
'basic_stats',
|
||||
'z_score',
|
||||
'z_normalize',
|
||||
'histo_analysis',
|
||||
'regression',
|
||||
'elo',
|
||||
'glicko2',
|
||||
'trueskill',
|
||||
'Metrics',
|
||||
'RegressionMetrics',
|
||||
'ClassificationMetrics',
|
||||
'kmeans',
|
||||
'pca',
|
||||
'decisiontree',
|
||||
'knn_classifier',
|
||||
'knn_regressor',
|
||||
'KNN',
|
||||
'NaiveBayes',
|
||||
'SVM',
|
||||
'random_forest_classifier',
|
||||
'random_forest_regressor',
|
||||
'Regression',
|
||||
'Glicko2',
|
||||
'CorrelationTests',
|
||||
'StatisticalTests',
|
||||
# all statistics functions left out due to integration in other functions
|
||||
]
|
||||
|
||||
@@ -270,27 +303,21 @@ __all__ = [
|
||||
# imports (now in alphabetical order! v 1.0.3.006):
|
||||
|
||||
import csv
|
||||
from analysis.metrics import elo as Elo
|
||||
from analysis.metrics import glicko2 as Glicko2
|
||||
import math
|
||||
import numba
|
||||
from numba import jit
|
||||
import numpy as np
|
||||
import math
|
||||
import scipy
|
||||
from scipy import *
|
||||
from scipy import optimize, stats
|
||||
import sklearn
|
||||
from sklearn import *
|
||||
import torch
|
||||
try:
|
||||
from analysis import trueskill as Trueskill
|
||||
except:
|
||||
import trueskill as Trueskill
|
||||
from sklearn import preprocessing, pipeline, linear_model, metrics, cluster, decomposition, tree, neighbors, naive_bayes, svm, model_selection, ensemble
|
||||
from analysis.metrics import trueskill as Trueskill
|
||||
|
||||
class error(ValueError):
|
||||
pass
|
||||
|
||||
def _init_device(): # initiates computation device for ANNs
|
||||
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
|
||||
return device
|
||||
|
||||
def load_csv(filepath):
|
||||
with open(filepath, newline='') as csvfile:
|
||||
file_array = np.array(list(csv.reader(csvfile)))
|
||||
@@ -349,15 +376,15 @@ def histo_analysis(hist_data):
|
||||
|
||||
def regression(inputs, outputs, args): # inputs, outputs expects N-D array
|
||||
|
||||
X = np.array(inputs)
|
||||
y = np.array(outputs)
|
||||
|
||||
regressions = []
|
||||
|
||||
if 'lin' in args: # formula: ax + b
|
||||
|
||||
try:
|
||||
|
||||
X = np.array(inputs)
|
||||
y = np.array(outputs)
|
||||
|
||||
def func(x, a, b):
|
||||
|
||||
return a * x + b
|
||||
@@ -374,9 +401,6 @@ def regression(inputs, outputs, args): # inputs, outputs expects N-D array
|
||||
|
||||
try:
|
||||
|
||||
X = np.array(inputs)
|
||||
y = np.array(outputs)
|
||||
|
||||
def func(x, a, b, c, d):
|
||||
|
||||
return a * np.log(b*(x + c)) + d
|
||||
@@ -391,10 +415,7 @@ def regression(inputs, outputs, args): # inputs, outputs expects N-D array
|
||||
|
||||
if 'exp' in args: # formula: a e ^ (b(x + c)) + d
|
||||
|
||||
try:
|
||||
|
||||
X = np.array(inputs)
|
||||
y = np.array(outputs)
|
||||
try:
|
||||
|
||||
def func(x, a, b, c, d):
|
||||
|
||||
@@ -410,8 +431,8 @@ def regression(inputs, outputs, args): # inputs, outputs expects N-D array
|
||||
|
||||
if 'ply' in args: # formula: a + bx^1 + cx^2 + dx^3 + ...
|
||||
|
||||
inputs = [inputs]
|
||||
outputs = [outputs]
|
||||
inputs = np.array([inputs])
|
||||
outputs = np.array([outputs])
|
||||
|
||||
plys = []
|
||||
limit = len(outputs[0])
|
||||
@@ -433,10 +454,7 @@ def regression(inputs, outputs, args): # inputs, outputs expects N-D array
|
||||
|
||||
if 'sig' in args: # formula: a tanh (b(x + c)) + d
|
||||
|
||||
try:
|
||||
|
||||
X = np.array(inputs)
|
||||
y = np.array(outputs)
|
||||
try:
|
||||
|
||||
def func(x, a, b, c, d):
|
||||
|
||||
@@ -452,32 +470,32 @@ def regression(inputs, outputs, args): # inputs, outputs expects N-D array
|
||||
|
||||
return regressions
|
||||
|
||||
def elo(starting_score, opposing_score, observed, N, K):
|
||||
class Metrics:
|
||||
|
||||
expected = 1/(1+10**((np.array(opposing_score) - starting_score)/N))
|
||||
def elo(self, starting_score, opposing_score, observed, N, K):
|
||||
|
||||
return starting_score + K*(np.sum(observed) - np.sum(expected))
|
||||
return Elo.calculate(starting_score, opposing_score, observed, N, K)
|
||||
|
||||
def glicko2(starting_score, starting_rd, starting_vol, opposing_score, opposing_rd, observations):
|
||||
def glicko2(self, starting_score, starting_rd, starting_vol, opposing_score, opposing_rd, observations):
|
||||
|
||||
player = Glicko2(rating = starting_score, rd = starting_rd, vol = starting_vol)
|
||||
player = Glicko2.Glicko2(rating = starting_score, rd = starting_rd, vol = starting_vol)
|
||||
|
||||
player.update_player([x for x in opposing_score], [x for x in opposing_rd], observations)
|
||||
player.update_player([x for x in opposing_score], [x for x in opposing_rd], observations)
|
||||
|
||||
return (player.rating, player.rd, player.vol)
|
||||
return (player.rating, player.rd, player.vol)
|
||||
|
||||
def trueskill(teams_data, observations): # teams_data is array of array of tuples ie. [[(mu, sigma), (mu, sigma), (mu, sigma)], [(mu, sigma), (mu, sigma), (mu, sigma)]]
|
||||
def trueskill(self, teams_data, observations): # teams_data is array of array of tuples ie. [[(mu, sigma), (mu, sigma), (mu, sigma)], [(mu, sigma), (mu, sigma), (mu, sigma)]]
|
||||
|
||||
team_ratings = []
|
||||
team_ratings = []
|
||||
|
||||
for team in teams_data:
|
||||
team_temp = []
|
||||
for player in team:
|
||||
player = Trueskill.Rating(player[0], player[1])
|
||||
team_temp.append(player)
|
||||
team_ratings.append(team_temp)
|
||||
for team in teams_data:
|
||||
team_temp = ()
|
||||
for player in team:
|
||||
player = Trueskill.Rating(player[0], player[1])
|
||||
team_temp = team_temp + (player,)
|
||||
team_ratings.append(team_temp)
|
||||
|
||||
return Trueskill.rate(teams_data, observations)
|
||||
return Trueskill.rate(team_ratings, ranks=observations)
|
||||
|
||||
class RegressionMetrics():
|
||||
|
||||
@@ -569,24 +587,25 @@ def decisiontree(data, labels, test_size = 0.3, criterion = "gini", splitter = "
|
||||
|
||||
return model, metrics
|
||||
|
||||
@jit(forceobj=True)
|
||||
def knn_classifier(data, labels, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, n_neighbors=5, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling
|
||||
class KNN:
|
||||
|
||||
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
|
||||
model = sklearn.neighbors.KNeighborsClassifier()
|
||||
model.fit(data_train, labels_train)
|
||||
predictions = model.predict(data_test)
|
||||
def knn_classifier(self, data, labels, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, n_neighbors=5, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling
|
||||
|
||||
return model, ClassificationMetrics(predictions, labels_test)
|
||||
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
|
||||
model = sklearn.neighbors.KNeighborsClassifier()
|
||||
model.fit(data_train, labels_train)
|
||||
predictions = model.predict(data_test)
|
||||
|
||||
def knn_regressor(data, outputs, test_size, n_neighbors = 5, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None):
|
||||
return model, ClassificationMetrics(predictions, labels_test)
|
||||
|
||||
data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1)
|
||||
model = sklearn.neighbors.KNeighborsRegressor(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs)
|
||||
model.fit(data_train, outputs_train)
|
||||
predictions = model.predict(data_test)
|
||||
def knn_regressor(self, data, outputs, test_size, n_neighbors = 5, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None):
|
||||
|
||||
return model, RegressionMetrics(predictions, outputs_test)
|
||||
data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1)
|
||||
model = sklearn.neighbors.KNeighborsRegressor(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs)
|
||||
model.fit(data_train, outputs_train)
|
||||
predictions = model.predict(data_test)
|
||||
|
||||
return model, RegressionMetrics(predictions, outputs_test)
|
||||
|
||||
class NaiveBayes:
|
||||
|
||||
@@ -700,320 +719,205 @@ def random_forest_regressor(data, outputs, test_size, n_estimators="warn", crite
|
||||
|
||||
return kernel, RegressionMetrics(predictions, outputs_test)
|
||||
|
||||
class Regression:
|
||||
class CorrelationTests:
|
||||
|
||||
# Titan Robotics Team 2022: CUDA-based Regressions Module
|
||||
# Written by Arthur Lu & Jacob Levine
|
||||
# Notes:
|
||||
# this module has been automatically inegrated into analysis.py, and should be callable as a class from the package
|
||||
# this module is cuda-optimized and vectorized (except for one small part)
|
||||
# setup:
|
||||
def anova_oneway(self, *args): #expects arrays of samples
|
||||
|
||||
__version__ = "1.0.0.003"
|
||||
results = scipy.stats.f_oneway(*args)
|
||||
return {"F-value": results[0], "p-value": results[1]}
|
||||
|
||||
# changelog should be viewed using print(analysis.regression.__changelog__)
|
||||
__changelog__ = """
|
||||
1.0.0.003:
|
||||
- bug fixes
|
||||
1.0.0.002:
|
||||
-Added more parameters to log, exponential, polynomial
|
||||
-Added SigmoidalRegKernelArthur, because Arthur apparently needs
|
||||
to train the scaling and shifting of sigmoids
|
||||
def pearson(self, x, y):
|
||||
|
||||
1.0.0.001:
|
||||
-initial release, with linear, log, exponential, polynomial, and sigmoid kernels
|
||||
-already vectorized (except for polynomial generation) and CUDA-optimized
|
||||
"""
|
||||
results = scipy.stats.pearsonr(x, y)
|
||||
return {"r-value": results[0], "p-value": results[1]}
|
||||
|
||||
__author__ = (
|
||||
"Jacob Levine <jlevine@imsa.edu>",
|
||||
"Arthur Lu <learthurgo@gmail.com>"
|
||||
)
|
||||
def spearman(self, a, b = None, axis = 0, nan_policy = 'propagate'):
|
||||
|
||||
__all__ = [
|
||||
'factorial',
|
||||
'take_all_pwrs',
|
||||
'num_poly_terms',
|
||||
'set_device',
|
||||
'LinearRegKernel',
|
||||
'SigmoidalRegKernel',
|
||||
'LogRegKernel',
|
||||
'PolyRegKernel',
|
||||
'ExpRegKernel',
|
||||
'SigmoidalRegKernelArthur',
|
||||
'SGDTrain',
|
||||
'CustomTrain'
|
||||
]
|
||||
results = scipy.stats.spearmanr(a, b = b, axis = axis, nan_policy = nan_policy)
|
||||
return {"r-value": results[0], "p-value": results[1]}
|
||||
|
||||
global device
|
||||
def point_biserial(self, x,y):
|
||||
|
||||
device = "cuda:0" if torch.torch.cuda.is_available() else "cpu"
|
||||
results = scipy.stats.pointbiserialr(x, y)
|
||||
return {"r-value": results[0], "p-value": results[1]}
|
||||
|
||||
#todo: document completely
|
||||
def kendall(self, x, y, initial_lexsort = None, nan_policy = 'propagate', method = 'auto'):
|
||||
|
||||
def set_device(self, new_device):
|
||||
device=new_device
|
||||
results = scipy.stats.kendalltau(x, y, initial_lexsort = initial_lexsort, nan_policy = nan_policy, method = method)
|
||||
return {"tau": results[0], "p-value": results[1]}
|
||||
|
||||
class LinearRegKernel():
|
||||
parameters= []
|
||||
weights=None
|
||||
bias=None
|
||||
def __init__(self, num_vars):
|
||||
self.weights=torch.rand(num_vars, requires_grad=True, device=device)
|
||||
self.bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.parameters=[self.weights,self.bias]
|
||||
def forward(self,mtx):
|
||||
long_bias=self.bias.repeat([1,mtx.size()[1]])
|
||||
return torch.matmul(self.weights,mtx)+long_bias
|
||||
def kendall_weighted(self, x, y, rank = True, weigher = None, additive = True):
|
||||
|
||||
class SigmoidalRegKernel():
|
||||
parameters= []
|
||||
weights=None
|
||||
bias=None
|
||||
sigmoid=torch.nn.Sigmoid()
|
||||
def __init__(self, num_vars):
|
||||
self.weights=torch.rand(num_vars, requires_grad=True, device=device)
|
||||
self.bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.parameters=[self.weights,self.bias]
|
||||
def forward(self,mtx):
|
||||
long_bias=self.bias.repeat([1,mtx.size()[1]])
|
||||
return self.sigmoid(torch.matmul(self.weights,mtx)+long_bias)
|
||||
results = scipy.stats.weightedtau(x, y, rank = rank, weigher = weigher, additive = additive)
|
||||
return {"tau": results[0], "p-value": results[1]}
|
||||
|
||||
class SigmoidalRegKernelArthur():
|
||||
parameters= []
|
||||
weights=None
|
||||
in_bias=None
|
||||
scal_mult=None
|
||||
out_bias=None
|
||||
sigmoid=torch.nn.Sigmoid()
|
||||
def __init__(self, num_vars):
|
||||
self.weights=torch.rand(num_vars, requires_grad=True, device=device)
|
||||
self.in_bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.scal_mult=torch.rand(1, requires_grad=True, device=device)
|
||||
self.out_bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.parameters=[self.weights,self.in_bias, self.scal_mult, self.out_bias]
|
||||
def forward(self,mtx):
|
||||
long_in_bias=self.in_bias.repeat([1,mtx.size()[1]])
|
||||
long_out_bias=self.out_bias.repeat([1,mtx.size()[1]])
|
||||
return (self.scal_mult*self.sigmoid(torch.matmul(self.weights,mtx)+long_in_bias))+long_out_bias
|
||||
def mgc(self, x, y, compute_distance = None, reps = 1000, workers = 1, is_twosamp = False, random_state = None):
|
||||
|
||||
class LogRegKernel():
|
||||
parameters= []
|
||||
weights=None
|
||||
in_bias=None
|
||||
scal_mult=None
|
||||
out_bias=None
|
||||
def __init__(self, num_vars):
|
||||
self.weights=torch.rand(num_vars, requires_grad=True, device=device)
|
||||
self.in_bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.scal_mult=torch.rand(1, requires_grad=True, device=device)
|
||||
self.out_bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.parameters=[self.weights,self.in_bias, self.scal_mult, self.out_bias]
|
||||
def forward(self,mtx):
|
||||
long_in_bias=self.in_bias.repeat([1,mtx.size()[1]])
|
||||
long_out_bias=self.out_bias.repeat([1,mtx.size()[1]])
|
||||
return (self.scal_mult*torch.log(torch.matmul(self.weights,mtx)+long_in_bias))+long_out_bias
|
||||
results = scipy.stats.multiscale_graphcorr(x, y, compute_distance = compute_distance, reps = reps, workers = workers, is_twosamp = is_twosamp, random_state = random_state)
|
||||
return {"k-value": results[0], "p-value": results[1], "data": results[2]} # unsure if MGC test returns a k value
|
||||
|
||||
class ExpRegKernel():
|
||||
parameters= []
|
||||
weights=None
|
||||
in_bias=None
|
||||
scal_mult=None
|
||||
out_bias=None
|
||||
def __init__(self, num_vars):
|
||||
self.weights=torch.rand(num_vars, requires_grad=True, device=device)
|
||||
self.in_bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.scal_mult=torch.rand(1, requires_grad=True, device=device)
|
||||
self.out_bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.parameters=[self.weights,self.in_bias, self.scal_mult, self.out_bias]
|
||||
def forward(self,mtx):
|
||||
long_in_bias=self.in_bias.repeat([1,mtx.size()[1]])
|
||||
long_out_bias=self.out_bias.repeat([1,mtx.size()[1]])
|
||||
return (self.scal_mult*torch.exp(torch.matmul(self.weights,mtx)+long_in_bias))+long_out_bias
|
||||
class StatisticalTests:
|
||||
|
||||
class PolyRegKernel():
|
||||
parameters= []
|
||||
weights=None
|
||||
bias=None
|
||||
power=None
|
||||
def __init__(self, num_vars, power):
|
||||
self.power=power
|
||||
num_terms=self.num_poly_terms(num_vars, power)
|
||||
self.weights=torch.rand(num_terms, requires_grad=True, device=device)
|
||||
self.bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.parameters=[self.weights,self.bias]
|
||||
def num_poly_terms(self,num_vars, power):
|
||||
if power == 0:
|
||||
return 0
|
||||
return int(self.factorial(num_vars+power-1) / self.factorial(power) / self.factorial(num_vars-1)) + self.num_poly_terms(num_vars, power-1)
|
||||
def factorial(self,n):
|
||||
if n==0:
|
||||
return 1
|
||||
else:
|
||||
return n*self.factorial(n-1)
|
||||
def take_all_pwrs(self, vec, pwr):
|
||||
#todo: vectorize (kinda)
|
||||
combins=torch.combinations(vec, r=pwr, with_replacement=True)
|
||||
out=torch.ones(combins.size()[0]).to(device).to(torch.float)
|
||||
for i in torch.t(combins).to(device).to(torch.float):
|
||||
out *= i
|
||||
if pwr == 1:
|
||||
return out
|
||||
else:
|
||||
return torch.cat((out,self.take_all_pwrs(vec, pwr-1)))
|
||||
def forward(self,mtx):
|
||||
#TODO: Vectorize the last part
|
||||
cols=[]
|
||||
for i in torch.t(mtx):
|
||||
cols.append(self.take_all_pwrs(i,self.power))
|
||||
new_mtx=torch.t(torch.stack(cols))
|
||||
long_bias=self.bias.repeat([1,mtx.size()[1]])
|
||||
return torch.matmul(self.weights,new_mtx)+long_bias
|
||||
def ttest_onesample(self, a, popmean, axis = 0, nan_policy = 'propagate'):
|
||||
|
||||
def SGDTrain(self, kernel, data, ground, loss=torch.nn.MSELoss(), iterations=1000, learning_rate=.1, return_losses=False):
|
||||
optim=torch.optim.SGD(kernel.parameters, lr=learning_rate)
|
||||
data_cuda=data.to(device)
|
||||
ground_cuda=ground.to(device)
|
||||
if (return_losses):
|
||||
losses=[]
|
||||
for i in range(iterations):
|
||||
with torch.set_grad_enabled(True):
|
||||
optim.zero_grad()
|
||||
pred=kernel.forward(data_cuda)
|
||||
ls=loss(pred,ground_cuda)
|
||||
losses.append(ls.item())
|
||||
ls.backward()
|
||||
optim.step()
|
||||
return [kernel,losses]
|
||||
else:
|
||||
for i in range(iterations):
|
||||
with torch.set_grad_enabled(True):
|
||||
optim.zero_grad()
|
||||
pred=kernel.forward(data_cuda)
|
||||
ls=loss(pred,ground_cuda)
|
||||
ls.backward()
|
||||
optim.step()
|
||||
return kernel
|
||||
results = scipy.stats.ttest_1samp(a, popmean, axis = axis, nan_policy = nan_policy)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def CustomTrain(self, kernel, optim, data, ground, loss=torch.nn.MSELoss(), iterations=1000, return_losses=False):
|
||||
data_cuda=data.to(device)
|
||||
ground_cuda=ground.to(device)
|
||||
if (return_losses):
|
||||
losses=[]
|
||||
for i in range(iterations):
|
||||
with torch.set_grad_enabled(True):
|
||||
optim.zero_grad()
|
||||
pred=kernel.forward(data)
|
||||
ls=loss(pred,ground)
|
||||
losses.append(ls.item())
|
||||
ls.backward()
|
||||
optim.step()
|
||||
return [kernel,losses]
|
||||
else:
|
||||
for i in range(iterations):
|
||||
with torch.set_grad_enabled(True):
|
||||
optim.zero_grad()
|
||||
pred=kernel.forward(data_cuda)
|
||||
ls=loss(pred,ground_cuda)
|
||||
ls.backward()
|
||||
optim.step()
|
||||
return kernel
|
||||
def ttest_independent(self, a, b, equal = True, nan_policy = 'propagate'):
|
||||
|
||||
class Glicko2:
|
||||
results = scipy.stats.ttest_ind(a, b, equal_var = equal, nan_policy = nan_policy)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
_tau = 0.5
|
||||
def ttest_statistic(self, o1, o2, equal = True):
|
||||
|
||||
def getRating(self):
|
||||
return (self.__rating * 173.7178) + 1500
|
||||
results = scipy.stats.ttest_ind_from_stats(o1["mean"], o1["std"], o1["nobs"], o2["mean"], o2["std"], o2["nobs"], equal_var = equal)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def setRating(self, rating):
|
||||
self.__rating = (rating - 1500) / 173.7178
|
||||
def ttest_related(self, a, b, axis = 0, nan_policy='propagate'):
|
||||
|
||||
rating = property(getRating, setRating)
|
||||
results = scipy.stats.ttest_rel(a, b, axis = axis, nan_policy = nan_policy)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def getRd(self):
|
||||
return self.__rd * 173.7178
|
||||
def ks_fitness(self, rvs, cdf, args = (), N = 20, alternative = 'two-sided', mode = 'approx'):
|
||||
|
||||
def setRd(self, rd):
|
||||
self.__rd = rd / 173.7178
|
||||
results = scipy.stats.kstest(rvs, cdf, args = args, N = N, alternative = alternative, mode = mode)
|
||||
return {"ks-value": results[0], "p-value": results[1]}
|
||||
|
||||
rd = property(getRd, setRd)
|
||||
|
||||
def __init__(self, rating = 1500, rd = 350, vol = 0.06):
|
||||
def chisquare(self, f_obs, f_exp = None, ddof = None, axis = 0):
|
||||
|
||||
self.setRating(rating)
|
||||
self.setRd(rd)
|
||||
self.vol = vol
|
||||
|
||||
def _preRatingRD(self):
|
||||
results = scipy.stats.chisquare(f_obs, f_exp = f_exp, ddof = ddof, axis = axis)
|
||||
return {"chisquared-value": results[0], "p-value": results[1]}
|
||||
|
||||
self.__rd = math.sqrt(math.pow(self.__rd, 2) + math.pow(self.vol, 2))
|
||||
def powerdivergence(self, f_obs, f_exp = None, ddof = None, axis = 0, lambda_ = None):
|
||||
|
||||
results = scipy.stats.power_divergence(f_obs, f_exp = f_exp, ddof = ddof, axis = axis, lambda_ = lambda_)
|
||||
return {"powerdivergence-value": results[0], "p-value": results[1]}
|
||||
|
||||
def ks_twosample(self, x, y, alternative = 'two_sided', mode = 'auto'):
|
||||
|
||||
def update_player(self, rating_list, RD_list, outcome_list):
|
||||
results = scipy.stats.ks_2samp(x, y, alternative = alternative, mode = mode)
|
||||
return {"ks-value": results[0], "p-value": results[1]}
|
||||
|
||||
rating_list = [(x - 1500) / 173.7178 for x in rating_list]
|
||||
RD_list = [x / 173.7178 for x in RD_list]
|
||||
def es_twosample(self, x, y, t = (0.4, 0.8)):
|
||||
|
||||
v = self._v(rating_list, RD_list)
|
||||
self.vol = self._newVol(rating_list, RD_list, outcome_list, v)
|
||||
self._preRatingRD()
|
||||
|
||||
self.__rd = 1 / math.sqrt((1 / math.pow(self.__rd, 2)) + (1 / v))
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * \
|
||||
(outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
self.__rating += math.pow(self.__rd, 2) * tempSum
|
||||
|
||||
|
||||
def _newVol(self, rating_list, RD_list, outcome_list, v):
|
||||
results = scipy.stats.epps_singleton_2samp(x, y, t = t)
|
||||
return {"es-value": results[0], "p-value": results[1]}
|
||||
|
||||
i = 0
|
||||
delta = self._delta(rating_list, RD_list, outcome_list, v)
|
||||
a = math.log(math.pow(self.vol, 2))
|
||||
tau = self._tau
|
||||
x0 = a
|
||||
x1 = 0
|
||||
|
||||
while x0 != x1:
|
||||
# New iteration, so x(i) becomes x(i-1)
|
||||
x0 = x1
|
||||
d = math.pow(self.__rating, 2) + v + math.exp(x0)
|
||||
h1 = -(x0 - a) / math.pow(tau, 2) - 0.5 * math.exp(x0) \
|
||||
/ d + 0.5 * math.exp(x0) * math.pow(delta / d, 2)
|
||||
h2 = -1 / math.pow(tau, 2) - 0.5 * math.exp(x0) * \
|
||||
(math.pow(self.__rating, 2) + v) \
|
||||
/ math.pow(d, 2) + 0.5 * math.pow(delta, 2) * math.exp(x0) \
|
||||
* (math.pow(self.__rating, 2) + v - math.exp(x0)) / math.pow(d, 3)
|
||||
x1 = x0 - (h1 / h2)
|
||||
def mw_rank(self, x, y, use_continuity = True, alternative = None):
|
||||
|
||||
return math.exp(x1 / 2)
|
||||
|
||||
def _delta(self, rating_list, RD_list, outcome_list, v):
|
||||
results = scipy.stats.mannwhitneyu(x, y, use_continuity = use_continuity, alternative = alternative)
|
||||
return {"u-value": results[0], "p-value": results[1]}
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * (outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
return v * tempSum
|
||||
|
||||
def _v(self, rating_list, RD_list):
|
||||
def mw_tiecorrection(self, rank_values):
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempE = self._E(rating_list[i], RD_list[i])
|
||||
tempSum += math.pow(self._g(RD_list[i]), 2) * tempE * (1 - tempE)
|
||||
return 1 / tempSum
|
||||
|
||||
def _E(self, p2rating, p2RD):
|
||||
results = scipy.stats.tiecorrect(rank_values)
|
||||
return {"correction-factor": results}
|
||||
|
||||
return 1 / (1 + math.exp(-1 * self._g(p2RD) * \
|
||||
(self.__rating - p2rating)))
|
||||
|
||||
def _g(self, RD):
|
||||
def rankdata(self, a, method = 'average'):
|
||||
|
||||
return 1 / math.sqrt(1 + 3 * math.pow(RD, 2) / math.pow(math.pi, 2))
|
||||
|
||||
def did_not_compete(self):
|
||||
results = scipy.stats.rankdata(a, method = method)
|
||||
return results
|
||||
|
||||
self._preRatingRD()
|
||||
def wilcoxon_ranksum(self, a, b): # this seems to be superceded by Mann Whitney Wilcoxon U Test
|
||||
|
||||
results = scipy.stats.ranksums(a, b)
|
||||
return {"u-value": results[0], "p-value": results[1]}
|
||||
|
||||
def wilcoxon_signedrank(self, x, y = None, zero_method = 'wilcox', correction = False, alternative = 'two-sided'):
|
||||
|
||||
results = scipy.stats.wilcoxon(x, y = y, zero_method = zero_method, correction = correction, alternative = alternative)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def kw_htest(self, *args, nan_policy = 'propagate'):
|
||||
|
||||
results = scipy.stats.kruskal(*args, nan_policy = nan_policy)
|
||||
return {"h-value": results[0], "p-value": results[1]}
|
||||
|
||||
def friedman_chisquare(self, *args):
|
||||
|
||||
results = scipy.stats.friedmanchisquare(*args)
|
||||
return {"chisquared-value": results[0], "p-value": results[1]}
|
||||
|
||||
def bm_wtest(self, x, y, alternative = 'two-sided', distribution = 't', nan_policy = 'propagate'):
|
||||
|
||||
results = scipy.stats.brunnermunzel(x, y, alternative = alternative, distribution = distribution, nan_policy = nan_policy)
|
||||
return {"w-value": results[0], "p-value": results[1]}
|
||||
|
||||
def combine_pvalues(self, pvalues, method = 'fisher', weights = None):
|
||||
|
||||
results = scipy.stats.combine_pvalues(pvalues, method = method, weights = weights)
|
||||
return {"combined-statistic": results[0], "p-value": results[1]}
|
||||
|
||||
def jb_fitness(self, x):
|
||||
|
||||
results = scipy.stats.jarque_bera(x)
|
||||
return {"jb-value": results[0], "p-value": results[1]}
|
||||
|
||||
def ab_equality(self, x, y):
|
||||
|
||||
results = scipy.stats.ansari(x, y)
|
||||
return {"ab-value": results[0], "p-value": results[1]}
|
||||
|
||||
def bartlett_variance(self, *args):
|
||||
|
||||
results = scipy.stats.bartlett(*args)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def levene_variance(self, *args, center = 'median', proportiontocut = 0.05):
|
||||
|
||||
results = scipy.stats.levene(*args, center = center, proportiontocut = proportiontocut)
|
||||
return {"w-value": results[0], "p-value": results[1]}
|
||||
|
||||
def sw_normality(self, x):
|
||||
|
||||
results = scipy.stats.shapiro(x)
|
||||
return {"w-value": results[0], "p-value": results[1]}
|
||||
|
||||
def shapiro(self, x):
|
||||
|
||||
return "destroyed by facts and logic"
|
||||
|
||||
def ad_onesample(self, x, dist = 'norm'):
|
||||
|
||||
results = scipy.stats.anderson(x, dist = dist)
|
||||
return {"d-value": results[0], "critical-values": results[1], "significance-value": results[2]}
|
||||
|
||||
def ad_ksample(self, samples, midrank = True):
|
||||
|
||||
results = scipy.stats.anderson_ksamp(samples, midrank = midrank)
|
||||
return {"d-value": results[0], "critical-values": results[1], "significance-value": results[2]}
|
||||
|
||||
def binomial(self, x, n = None, p = 0.5, alternative = 'two-sided'):
|
||||
|
||||
results = scipy.stats.binom_test(x, n = n, p = p, alternative = alternative)
|
||||
return {"p-value": results}
|
||||
|
||||
def fk_variance(self, *args, center = 'median', proportiontocut = 0.05):
|
||||
|
||||
results = scipy.stats.fligner(*args, center = center, proportiontocut = proportiontocut)
|
||||
return {"h-value": results[0], "p-value": results[1]} # unknown if the statistic is an h value
|
||||
|
||||
def mood_mediantest(self, *args, ties = 'below', correction = True, lambda_ = 1, nan_policy = 'propagate'):
|
||||
|
||||
results = scipy.stats.median_test(*args, ties = ties, correction = correction, lambda_ = lambda_, nan_policy = nan_policy)
|
||||
return {"chisquared-value": results[0], "p-value": results[1], "m-value": results[2], "table": results[3]}
|
||||
|
||||
def mood_equalscale(self, x, y, axis = 0):
|
||||
|
||||
results = scipy.stats.mood(x, y, axis = axis)
|
||||
return {"z-score": results[0], "p-value": results[1]}
|
||||
|
||||
def skewtest(self, a, axis = 0, nan_policy = 'propogate'):
|
||||
|
||||
results = scipy.stats.skewtest(a, axis = axis, nan_policy = nan_policy)
|
||||
return {"z-score": results[0], "p-value": results[1]}
|
||||
|
||||
def kurtosistest(self, a, axis = 0, nan_policy = 'propogate'):
|
||||
|
||||
results = scipy.stats.kurtosistest(a, axis = axis, nan_policy = nan_policy)
|
||||
return {"z-score": results[0], "p-value": results[1]}
|
||||
|
||||
def normaltest(self, a, axis = 0, nan_policy = 'propogate'):
|
||||
|
||||
results = scipy.stats.normaltest(a, axis = axis, nan_policy = nan_policy)
|
||||
return {"z-score": results[0], "p-value": results[1]}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
7
analysis-master/analysis-amd64/analysis/metrics/elo.py
Normal file
7
analysis-master/analysis-amd64/analysis/metrics/elo.py
Normal file
@@ -0,0 +1,7 @@
|
||||
import numpy as np
|
||||
|
||||
def calculate(starting_score, opposing_score, observed, N, K):
|
||||
|
||||
expected = 1/(1+10**((np.array(opposing_score) - starting_score)/N))
|
||||
|
||||
return starting_score + K*(np.sum(observed) - np.sum(expected))
|
99
analysis-master/analysis-amd64/analysis/metrics/glicko2.py
Normal file
99
analysis-master/analysis-amd64/analysis/metrics/glicko2.py
Normal file
@@ -0,0 +1,99 @@
|
||||
import math
|
||||
|
||||
class Glicko2:
|
||||
_tau = 0.5
|
||||
|
||||
def getRating(self):
|
||||
return (self.__rating * 173.7178) + 1500
|
||||
|
||||
def setRating(self, rating):
|
||||
self.__rating = (rating - 1500) / 173.7178
|
||||
|
||||
rating = property(getRating, setRating)
|
||||
|
||||
def getRd(self):
|
||||
return self.__rd * 173.7178
|
||||
|
||||
def setRd(self, rd):
|
||||
self.__rd = rd / 173.7178
|
||||
|
||||
rd = property(getRd, setRd)
|
||||
|
||||
def __init__(self, rating = 1500, rd = 350, vol = 0.06):
|
||||
|
||||
self.setRating(rating)
|
||||
self.setRd(rd)
|
||||
self.vol = vol
|
||||
|
||||
def _preRatingRD(self):
|
||||
|
||||
self.__rd = math.sqrt(math.pow(self.__rd, 2) + math.pow(self.vol, 2))
|
||||
|
||||
def update_player(self, rating_list, RD_list, outcome_list):
|
||||
|
||||
rating_list = [(x - 1500) / 173.7178 for x in rating_list]
|
||||
RD_list = [x / 173.7178 for x in RD_list]
|
||||
|
||||
v = self._v(rating_list, RD_list)
|
||||
self.vol = self._newVol(rating_list, RD_list, outcome_list, v)
|
||||
self._preRatingRD()
|
||||
|
||||
self.__rd = 1 / math.sqrt((1 / math.pow(self.__rd, 2)) + (1 / v))
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * \
|
||||
(outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
self.__rating += math.pow(self.__rd, 2) * tempSum
|
||||
|
||||
|
||||
def _newVol(self, rating_list, RD_list, outcome_list, v):
|
||||
|
||||
i = 0
|
||||
delta = self._delta(rating_list, RD_list, outcome_list, v)
|
||||
a = math.log(math.pow(self.vol, 2))
|
||||
tau = self._tau
|
||||
x0 = a
|
||||
x1 = 0
|
||||
|
||||
while x0 != x1:
|
||||
# New iteration, so x(i) becomes x(i-1)
|
||||
x0 = x1
|
||||
d = math.pow(self.__rating, 2) + v + math.exp(x0)
|
||||
h1 = -(x0 - a) / math.pow(tau, 2) - 0.5 * math.exp(x0) \
|
||||
/ d + 0.5 * math.exp(x0) * math.pow(delta / d, 2)
|
||||
h2 = -1 / math.pow(tau, 2) - 0.5 * math.exp(x0) * \
|
||||
(math.pow(self.__rating, 2) + v) \
|
||||
/ math.pow(d, 2) + 0.5 * math.pow(delta, 2) * math.exp(x0) \
|
||||
* (math.pow(self.__rating, 2) + v - math.exp(x0)) / math.pow(d, 3)
|
||||
x1 = x0 - (h1 / h2)
|
||||
|
||||
return math.exp(x1 / 2)
|
||||
|
||||
def _delta(self, rating_list, RD_list, outcome_list, v):
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * (outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
return v * tempSum
|
||||
|
||||
def _v(self, rating_list, RD_list):
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempE = self._E(rating_list[i], RD_list[i])
|
||||
tempSum += math.pow(self._g(RD_list[i]), 2) * tempE * (1 - tempE)
|
||||
return 1 / tempSum
|
||||
|
||||
def _E(self, p2rating, p2RD):
|
||||
|
||||
return 1 / (1 + math.exp(-1 * self._g(p2RD) * \
|
||||
(self.__rating - p2rating)))
|
||||
|
||||
def _g(self, RD):
|
||||
|
||||
return 1 / math.sqrt(1 + 3 * math.pow(RD, 2) / math.pow(math.pi, 2))
|
||||
|
||||
def did_not_compete(self):
|
||||
|
||||
self._preRatingRD()
|
@@ -1,20 +1,23 @@
|
||||
# Titan Robotics Team 2022: CUDA-based Regressions Module
|
||||
# Written by Arthur Lu & Jacob Levine
|
||||
# Notes:
|
||||
# this should be imported as a python module using 'import regression'
|
||||
# this should be included in the local directory or environment variable
|
||||
# this module is cuda-optimized and vectorized (except for one small part)
|
||||
# this module has been automatically inegrated into analysis.py, and should be callable as a class from the package
|
||||
# this module is cuda-optimized and vectorized (except for one small part)
|
||||
# setup:
|
||||
|
||||
__version__ = "1.0.0.002"
|
||||
__version__ = "1.0.0.004"
|
||||
|
||||
# changelog should be viewed using print(regression.__changelog__)
|
||||
# changelog should be viewed using print(analysis.regression.__changelog__)
|
||||
__changelog__ = """
|
||||
1.0.0.004:
|
||||
- bug fixes
|
||||
- fixed changelog
|
||||
1.0.0.003:
|
||||
- bug fixes
|
||||
1.0.0.002:
|
||||
-Added more parameters to log, exponential, polynomial
|
||||
-Added SigmoidalRegKernelArthur, because Arthur apparently needs
|
||||
to train the scaling and shifting of sigmoids
|
||||
|
||||
1.0.0.001:
|
||||
-initial release, with linear, log, exponential, polynomial, and sigmoid kernels
|
||||
-already vectorized (except for polynomial generation) and CUDA-optimized
|
||||
@@ -22,6 +25,7 @@ __changelog__ = """
|
||||
|
||||
__author__ = (
|
||||
"Jacob Levine <jlevine@imsa.edu>",
|
||||
"Arthur Lu <learthurgo@gmail.com>"
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
@@ -39,35 +43,15 @@ __all__ = [
|
||||
'CustomTrain'
|
||||
]
|
||||
|
||||
|
||||
# imports (just one for now):
|
||||
|
||||
import torch
|
||||
|
||||
global device
|
||||
|
||||
device = "cuda:0" if torch.torch.cuda.is_available() else "cpu"
|
||||
|
||||
#todo: document completely
|
||||
|
||||
def factorial(n):
|
||||
if n==0:
|
||||
return 1
|
||||
else:
|
||||
return n*factorial(n-1)
|
||||
def num_poly_terms(num_vars, power):
|
||||
if power == 0:
|
||||
return 0
|
||||
return int(factorial(num_vars+power-1) / factorial(power) / factorial(num_vars-1)) + num_poly_terms(num_vars, power-1)
|
||||
|
||||
def take_all_pwrs(vec,pwr):
|
||||
#todo: vectorize (kinda)
|
||||
combins=torch.combinations(vec, r=pwr, with_replacement=True)
|
||||
out=torch.ones(combins.size()[0])
|
||||
for i in torch.t(combins):
|
||||
out *= i
|
||||
return torch.cat(out,take_all_pwrs(vec, pwr-1))
|
||||
|
||||
def set_device(new_device):
|
||||
global device
|
||||
def set_device(self, new_device):
|
||||
device=new_device
|
||||
|
||||
class LinearRegKernel():
|
||||
@@ -154,20 +138,39 @@ class PolyRegKernel():
|
||||
power=None
|
||||
def __init__(self, num_vars, power):
|
||||
self.power=power
|
||||
num_terms=num_poly_terms(num_vars, power)
|
||||
num_terms=self.num_poly_terms(num_vars, power)
|
||||
self.weights=torch.rand(num_terms, requires_grad=True, device=device)
|
||||
self.bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.parameters=[self.weights,self.bias]
|
||||
def num_poly_terms(self,num_vars, power):
|
||||
if power == 0:
|
||||
return 0
|
||||
return int(self.factorial(num_vars+power-1) / self.factorial(power) / self.factorial(num_vars-1)) + self.num_poly_terms(num_vars, power-1)
|
||||
def factorial(self,n):
|
||||
if n==0:
|
||||
return 1
|
||||
else:
|
||||
return n*self.factorial(n-1)
|
||||
def take_all_pwrs(self, vec, pwr):
|
||||
#todo: vectorize (kinda)
|
||||
combins=torch.combinations(vec, r=pwr, with_replacement=True)
|
||||
out=torch.ones(combins.size()[0]).to(device).to(torch.float)
|
||||
for i in torch.t(combins).to(device).to(torch.float):
|
||||
out *= i
|
||||
if pwr == 1:
|
||||
return out
|
||||
else:
|
||||
return torch.cat((out,self.take_all_pwrs(vec, pwr-1)))
|
||||
def forward(self,mtx):
|
||||
#TODO: Vectorize the last part
|
||||
cols=[]
|
||||
for i in torch.t(mtx):
|
||||
cols.append(take_all_pwrs(i,self.power))
|
||||
cols.append(self.take_all_pwrs(i,self.power))
|
||||
new_mtx=torch.t(torch.stack(cols))
|
||||
long_bias=self.bias.repeat([1,mtx.size()[1]])
|
||||
return torch.matmul(self.weights,new_mtx)+long_bias
|
||||
|
||||
def SGDTrain(kernel, data, ground, loss=torch.nn.MSELoss(), iterations=1000, learning_rate=.1, return_losses=False):
|
||||
def SGDTrain(self, kernel, data, ground, loss=torch.nn.MSELoss(), iterations=1000, learning_rate=.1, return_losses=False):
|
||||
optim=torch.optim.SGD(kernel.parameters, lr=learning_rate)
|
||||
data_cuda=data.to(device)
|
||||
ground_cuda=ground.to(device)
|
||||
@@ -192,7 +195,7 @@ def SGDTrain(kernel, data, ground, loss=torch.nn.MSELoss(), iterations=1000, lea
|
||||
optim.step()
|
||||
return kernel
|
||||
|
||||
def CustomTrain(kernel, optim, data, ground, loss=torch.nn.MSELoss(), iterations=1000, return_losses=False):
|
||||
def CustomTrain(self, kernel, optim, data, ground, loss=torch.nn.MSELoss(), iterations=1000, return_losses=False):
|
||||
data_cuda=data.to(device)
|
||||
ground_cuda=ground.to(device)
|
||||
if (return_losses):
|
||||
@@ -214,4 +217,4 @@ def CustomTrain(kernel, optim, data, ground, loss=torch.nn.MSELoss(), iterations
|
||||
ls=loss(pred,ground_cuda)
|
||||
ls.backward()
|
||||
optim.step()
|
||||
return kernel
|
||||
return kernel
|
1
analysis-master/analysis-amd64/build.sh
Executable file
1
analysis-master/analysis-amd64/build.sh
Executable file
@@ -0,0 +1 @@
|
||||
python setup.py sdist bdist_wheel || python3 setup.py sdist bdist_wheel
|
@@ -1,16 +1,53 @@
|
||||
# Titan Robotics Team 2022: Data Analysis Module
|
||||
# Written by Arthur Lu & Jacob Levine
|
||||
# Notes:
|
||||
# this should be imported as a python module using 'import analysis'
|
||||
# this should be imported as a python module using 'from analysis import analysis'
|
||||
# this should be included in the local directory or environment variable
|
||||
# this module has been optimized for multhreaded computing
|
||||
# current benchmark of optimization: 1.33 times faster
|
||||
# setup:
|
||||
|
||||
__version__ = "1.1.13.001"
|
||||
__version__ = "1.2.0.004"
|
||||
|
||||
# changelog should be viewed using print(analysis.__changelog__)
|
||||
__changelog__ = """changelog:
|
||||
1.2.0.004:
|
||||
- fixed __all__ to reflected the correct functions and classes
|
||||
- fixed CorrelationTests and StatisticalTests class functions to require self invocation
|
||||
- added missing math import
|
||||
- fixed KNN class functions to require self invocation
|
||||
- fixed Metrics class functions to require self invocation
|
||||
- various spelling fixes in CorrelationTests and StatisticalTests
|
||||
1.2.0.003:
|
||||
- bug fixes with CorrelationTests and StatisticalTests
|
||||
- moved glicko2 and trueskill to the metrics subpackage
|
||||
- moved elo to a new metrics subpackage
|
||||
1.2.0.002:
|
||||
- fixed docs
|
||||
1.2.0.001:
|
||||
- fixed docs
|
||||
1.2.0.000:
|
||||
- cleaned up wild card imports with scipy and sklearn
|
||||
- added CorrelationTests class
|
||||
- added StatisticalTests class
|
||||
- added several correlation tests to CorrelationTests
|
||||
- added several statistical tests to StatisticalTests
|
||||
1.1.13.009:
|
||||
- moved elo, glicko2, trueskill functions under class Metrics
|
||||
1.1.13.008:
|
||||
- moved Glicko2 to a seperate package
|
||||
1.1.13.007:
|
||||
- fixed bug with trueskill
|
||||
1.1.13.006:
|
||||
- cleaned up imports
|
||||
1.1.13.005:
|
||||
- cleaned up package
|
||||
1.1.13.004:
|
||||
- small fixes to regression to improve performance
|
||||
1.1.13.003:
|
||||
- filtered nans from regression
|
||||
1.1.13.002:
|
||||
- removed torch requirement, and moved Regression back to regression.py
|
||||
1.1.13.001:
|
||||
- bug fix with linear regression not returning a proper value
|
||||
- cleaned up regression
|
||||
@@ -239,29 +276,25 @@ __author__ = (
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'_init_device',
|
||||
'load_csv',
|
||||
'basic_stats',
|
||||
'z_score',
|
||||
'z_normalize',
|
||||
'histo_analysis',
|
||||
'regression',
|
||||
'elo',
|
||||
'glicko2',
|
||||
'trueskill',
|
||||
'Metrics',
|
||||
'RegressionMetrics',
|
||||
'ClassificationMetrics',
|
||||
'kmeans',
|
||||
'pca',
|
||||
'decisiontree',
|
||||
'knn_classifier',
|
||||
'knn_regressor',
|
||||
'KNN',
|
||||
'NaiveBayes',
|
||||
'SVM',
|
||||
'random_forest_classifier',
|
||||
'random_forest_regressor',
|
||||
'Regression',
|
||||
'Glicko2',
|
||||
'CorrelationTests',
|
||||
'StatisticalTests',
|
||||
# all statistics functions left out due to integration in other functions
|
||||
]
|
||||
|
||||
@@ -270,27 +303,21 @@ __all__ = [
|
||||
# imports (now in alphabetical order! v 1.0.3.006):
|
||||
|
||||
import csv
|
||||
from analysis.metrics import elo as Elo
|
||||
from analysis.metrics import glicko2 as Glicko2
|
||||
import math
|
||||
import numba
|
||||
from numba import jit
|
||||
import numpy as np
|
||||
import math
|
||||
import scipy
|
||||
from scipy import *
|
||||
from scipy import optimize, stats
|
||||
import sklearn
|
||||
from sklearn import *
|
||||
import torch
|
||||
try:
|
||||
from analysis import trueskill as Trueskill
|
||||
except:
|
||||
import trueskill as Trueskill
|
||||
from sklearn import preprocessing, pipeline, linear_model, metrics, cluster, decomposition, tree, neighbors, naive_bayes, svm, model_selection, ensemble
|
||||
from analysis.metrics import trueskill as Trueskill
|
||||
|
||||
class error(ValueError):
|
||||
pass
|
||||
|
||||
def _init_device(): # initiates computation device for ANNs
|
||||
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
|
||||
return device
|
||||
|
||||
def load_csv(filepath):
|
||||
with open(filepath, newline='') as csvfile:
|
||||
file_array = np.array(list(csv.reader(csvfile)))
|
||||
@@ -349,15 +376,15 @@ def histo_analysis(hist_data):
|
||||
|
||||
def regression(inputs, outputs, args): # inputs, outputs expects N-D array
|
||||
|
||||
X = np.array(inputs)
|
||||
y = np.array(outputs)
|
||||
|
||||
regressions = []
|
||||
|
||||
if 'lin' in args: # formula: ax + b
|
||||
|
||||
try:
|
||||
|
||||
X = np.array(inputs)
|
||||
y = np.array(outputs)
|
||||
|
||||
def func(x, a, b):
|
||||
|
||||
return a * x + b
|
||||
@@ -374,9 +401,6 @@ def regression(inputs, outputs, args): # inputs, outputs expects N-D array
|
||||
|
||||
try:
|
||||
|
||||
X = np.array(inputs)
|
||||
y = np.array(outputs)
|
||||
|
||||
def func(x, a, b, c, d):
|
||||
|
||||
return a * np.log(b*(x + c)) + d
|
||||
@@ -391,10 +415,7 @@ def regression(inputs, outputs, args): # inputs, outputs expects N-D array
|
||||
|
||||
if 'exp' in args: # formula: a e ^ (b(x + c)) + d
|
||||
|
||||
try:
|
||||
|
||||
X = np.array(inputs)
|
||||
y = np.array(outputs)
|
||||
try:
|
||||
|
||||
def func(x, a, b, c, d):
|
||||
|
||||
@@ -410,8 +431,8 @@ def regression(inputs, outputs, args): # inputs, outputs expects N-D array
|
||||
|
||||
if 'ply' in args: # formula: a + bx^1 + cx^2 + dx^3 + ...
|
||||
|
||||
inputs = [inputs]
|
||||
outputs = [outputs]
|
||||
inputs = np.array([inputs])
|
||||
outputs = np.array([outputs])
|
||||
|
||||
plys = []
|
||||
limit = len(outputs[0])
|
||||
@@ -433,10 +454,7 @@ def regression(inputs, outputs, args): # inputs, outputs expects N-D array
|
||||
|
||||
if 'sig' in args: # formula: a tanh (b(x + c)) + d
|
||||
|
||||
try:
|
||||
|
||||
X = np.array(inputs)
|
||||
y = np.array(outputs)
|
||||
try:
|
||||
|
||||
def func(x, a, b, c, d):
|
||||
|
||||
@@ -452,32 +470,32 @@ def regression(inputs, outputs, args): # inputs, outputs expects N-D array
|
||||
|
||||
return regressions
|
||||
|
||||
def elo(starting_score, opposing_score, observed, N, K):
|
||||
class Metrics:
|
||||
|
||||
expected = 1/(1+10**((np.array(opposing_score) - starting_score)/N))
|
||||
def elo(self, starting_score, opposing_score, observed, N, K):
|
||||
|
||||
return starting_score + K*(np.sum(observed) - np.sum(expected))
|
||||
return Elo.calculate(starting_score, opposing_score, observed, N, K)
|
||||
|
||||
def glicko2(starting_score, starting_rd, starting_vol, opposing_score, opposing_rd, observations):
|
||||
def glicko2(self, starting_score, starting_rd, starting_vol, opposing_score, opposing_rd, observations):
|
||||
|
||||
player = Glicko2(rating = starting_score, rd = starting_rd, vol = starting_vol)
|
||||
player = Glicko2.Glicko2(rating = starting_score, rd = starting_rd, vol = starting_vol)
|
||||
|
||||
player.update_player([x for x in opposing_score], [x for x in opposing_rd], observations)
|
||||
player.update_player([x for x in opposing_score], [x for x in opposing_rd], observations)
|
||||
|
||||
return (player.rating, player.rd, player.vol)
|
||||
return (player.rating, player.rd, player.vol)
|
||||
|
||||
def trueskill(teams_data, observations): # teams_data is array of array of tuples ie. [[(mu, sigma), (mu, sigma), (mu, sigma)], [(mu, sigma), (mu, sigma), (mu, sigma)]]
|
||||
def trueskill(self, teams_data, observations): # teams_data is array of array of tuples ie. [[(mu, sigma), (mu, sigma), (mu, sigma)], [(mu, sigma), (mu, sigma), (mu, sigma)]]
|
||||
|
||||
team_ratings = []
|
||||
team_ratings = []
|
||||
|
||||
for team in teams_data:
|
||||
team_temp = []
|
||||
for player in team:
|
||||
player = Trueskill.Rating(player[0], player[1])
|
||||
team_temp.append(player)
|
||||
team_ratings.append(team_temp)
|
||||
for team in teams_data:
|
||||
team_temp = ()
|
||||
for player in team:
|
||||
player = Trueskill.Rating(player[0], player[1])
|
||||
team_temp = team_temp + (player,)
|
||||
team_ratings.append(team_temp)
|
||||
|
||||
return Trueskill.rate(teams_data, observations)
|
||||
return Trueskill.rate(team_ratings, ranks=observations)
|
||||
|
||||
class RegressionMetrics():
|
||||
|
||||
@@ -569,24 +587,25 @@ def decisiontree(data, labels, test_size = 0.3, criterion = "gini", splitter = "
|
||||
|
||||
return model, metrics
|
||||
|
||||
@jit(forceobj=True)
|
||||
def knn_classifier(data, labels, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, n_neighbors=5, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling
|
||||
class KNN:
|
||||
|
||||
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
|
||||
model = sklearn.neighbors.KNeighborsClassifier()
|
||||
model.fit(data_train, labels_train)
|
||||
predictions = model.predict(data_test)
|
||||
def knn_classifier(self, data, labels, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, n_neighbors=5, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling
|
||||
|
||||
return model, ClassificationMetrics(predictions, labels_test)
|
||||
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
|
||||
model = sklearn.neighbors.KNeighborsClassifier()
|
||||
model.fit(data_train, labels_train)
|
||||
predictions = model.predict(data_test)
|
||||
|
||||
def knn_regressor(data, outputs, test_size, n_neighbors = 5, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None):
|
||||
return model, ClassificationMetrics(predictions, labels_test)
|
||||
|
||||
data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1)
|
||||
model = sklearn.neighbors.KNeighborsRegressor(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs)
|
||||
model.fit(data_train, outputs_train)
|
||||
predictions = model.predict(data_test)
|
||||
def knn_regressor(self, data, outputs, test_size, n_neighbors = 5, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None):
|
||||
|
||||
return model, RegressionMetrics(predictions, outputs_test)
|
||||
data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1)
|
||||
model = sklearn.neighbors.KNeighborsRegressor(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs)
|
||||
model.fit(data_train, outputs_train)
|
||||
predictions = model.predict(data_test)
|
||||
|
||||
return model, RegressionMetrics(predictions, outputs_test)
|
||||
|
||||
class NaiveBayes:
|
||||
|
||||
@@ -700,320 +719,205 @@ def random_forest_regressor(data, outputs, test_size, n_estimators="warn", crite
|
||||
|
||||
return kernel, RegressionMetrics(predictions, outputs_test)
|
||||
|
||||
class Regression:
|
||||
class CorrelationTests:
|
||||
|
||||
# Titan Robotics Team 2022: CUDA-based Regressions Module
|
||||
# Written by Arthur Lu & Jacob Levine
|
||||
# Notes:
|
||||
# this module has been automatically inegrated into analysis.py, and should be callable as a class from the package
|
||||
# this module is cuda-optimized and vectorized (except for one small part)
|
||||
# setup:
|
||||
def anova_oneway(self, *args): #expects arrays of samples
|
||||
|
||||
__version__ = "1.0.0.003"
|
||||
results = scipy.stats.f_oneway(*args)
|
||||
return {"F-value": results[0], "p-value": results[1]}
|
||||
|
||||
# changelog should be viewed using print(analysis.regression.__changelog__)
|
||||
__changelog__ = """
|
||||
1.0.0.003:
|
||||
- bug fixes
|
||||
1.0.0.002:
|
||||
-Added more parameters to log, exponential, polynomial
|
||||
-Added SigmoidalRegKernelArthur, because Arthur apparently needs
|
||||
to train the scaling and shifting of sigmoids
|
||||
def pearson(self, x, y):
|
||||
|
||||
1.0.0.001:
|
||||
-initial release, with linear, log, exponential, polynomial, and sigmoid kernels
|
||||
-already vectorized (except for polynomial generation) and CUDA-optimized
|
||||
"""
|
||||
results = scipy.stats.pearsonr(x, y)
|
||||
return {"r-value": results[0], "p-value": results[1]}
|
||||
|
||||
__author__ = (
|
||||
"Jacob Levine <jlevine@imsa.edu>",
|
||||
"Arthur Lu <learthurgo@gmail.com>"
|
||||
)
|
||||
def spearman(self, a, b = None, axis = 0, nan_policy = 'propagate'):
|
||||
|
||||
__all__ = [
|
||||
'factorial',
|
||||
'take_all_pwrs',
|
||||
'num_poly_terms',
|
||||
'set_device',
|
||||
'LinearRegKernel',
|
||||
'SigmoidalRegKernel',
|
||||
'LogRegKernel',
|
||||
'PolyRegKernel',
|
||||
'ExpRegKernel',
|
||||
'SigmoidalRegKernelArthur',
|
||||
'SGDTrain',
|
||||
'CustomTrain'
|
||||
]
|
||||
results = scipy.stats.spearmanr(a, b = b, axis = axis, nan_policy = nan_policy)
|
||||
return {"r-value": results[0], "p-value": results[1]}
|
||||
|
||||
global device
|
||||
def point_biserial(self, x,y):
|
||||
|
||||
device = "cuda:0" if torch.torch.cuda.is_available() else "cpu"
|
||||
results = scipy.stats.pointbiserialr(x, y)
|
||||
return {"r-value": results[0], "p-value": results[1]}
|
||||
|
||||
#todo: document completely
|
||||
def kendall(self, x, y, initial_lexsort = None, nan_policy = 'propagate', method = 'auto'):
|
||||
|
||||
def set_device(self, new_device):
|
||||
device=new_device
|
||||
results = scipy.stats.kendalltau(x, y, initial_lexsort = initial_lexsort, nan_policy = nan_policy, method = method)
|
||||
return {"tau": results[0], "p-value": results[1]}
|
||||
|
||||
class LinearRegKernel():
|
||||
parameters= []
|
||||
weights=None
|
||||
bias=None
|
||||
def __init__(self, num_vars):
|
||||
self.weights=torch.rand(num_vars, requires_grad=True, device=device)
|
||||
self.bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.parameters=[self.weights,self.bias]
|
||||
def forward(self,mtx):
|
||||
long_bias=self.bias.repeat([1,mtx.size()[1]])
|
||||
return torch.matmul(self.weights,mtx)+long_bias
|
||||
def kendall_weighted(self, x, y, rank = True, weigher = None, additive = True):
|
||||
|
||||
class SigmoidalRegKernel():
|
||||
parameters= []
|
||||
weights=None
|
||||
bias=None
|
||||
sigmoid=torch.nn.Sigmoid()
|
||||
def __init__(self, num_vars):
|
||||
self.weights=torch.rand(num_vars, requires_grad=True, device=device)
|
||||
self.bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.parameters=[self.weights,self.bias]
|
||||
def forward(self,mtx):
|
||||
long_bias=self.bias.repeat([1,mtx.size()[1]])
|
||||
return self.sigmoid(torch.matmul(self.weights,mtx)+long_bias)
|
||||
results = scipy.stats.weightedtau(x, y, rank = rank, weigher = weigher, additive = additive)
|
||||
return {"tau": results[0], "p-value": results[1]}
|
||||
|
||||
class SigmoidalRegKernelArthur():
|
||||
parameters= []
|
||||
weights=None
|
||||
in_bias=None
|
||||
scal_mult=None
|
||||
out_bias=None
|
||||
sigmoid=torch.nn.Sigmoid()
|
||||
def __init__(self, num_vars):
|
||||
self.weights=torch.rand(num_vars, requires_grad=True, device=device)
|
||||
self.in_bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.scal_mult=torch.rand(1, requires_grad=True, device=device)
|
||||
self.out_bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.parameters=[self.weights,self.in_bias, self.scal_mult, self.out_bias]
|
||||
def forward(self,mtx):
|
||||
long_in_bias=self.in_bias.repeat([1,mtx.size()[1]])
|
||||
long_out_bias=self.out_bias.repeat([1,mtx.size()[1]])
|
||||
return (self.scal_mult*self.sigmoid(torch.matmul(self.weights,mtx)+long_in_bias))+long_out_bias
|
||||
def mgc(self, x, y, compute_distance = None, reps = 1000, workers = 1, is_twosamp = False, random_state = None):
|
||||
|
||||
class LogRegKernel():
|
||||
parameters= []
|
||||
weights=None
|
||||
in_bias=None
|
||||
scal_mult=None
|
||||
out_bias=None
|
||||
def __init__(self, num_vars):
|
||||
self.weights=torch.rand(num_vars, requires_grad=True, device=device)
|
||||
self.in_bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.scal_mult=torch.rand(1, requires_grad=True, device=device)
|
||||
self.out_bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.parameters=[self.weights,self.in_bias, self.scal_mult, self.out_bias]
|
||||
def forward(self,mtx):
|
||||
long_in_bias=self.in_bias.repeat([1,mtx.size()[1]])
|
||||
long_out_bias=self.out_bias.repeat([1,mtx.size()[1]])
|
||||
return (self.scal_mult*torch.log(torch.matmul(self.weights,mtx)+long_in_bias))+long_out_bias
|
||||
results = scipy.stats.multiscale_graphcorr(x, y, compute_distance = compute_distance, reps = reps, workers = workers, is_twosamp = is_twosamp, random_state = random_state)
|
||||
return {"k-value": results[0], "p-value": results[1], "data": results[2]} # unsure if MGC test returns a k value
|
||||
|
||||
class ExpRegKernel():
|
||||
parameters= []
|
||||
weights=None
|
||||
in_bias=None
|
||||
scal_mult=None
|
||||
out_bias=None
|
||||
def __init__(self, num_vars):
|
||||
self.weights=torch.rand(num_vars, requires_grad=True, device=device)
|
||||
self.in_bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.scal_mult=torch.rand(1, requires_grad=True, device=device)
|
||||
self.out_bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.parameters=[self.weights,self.in_bias, self.scal_mult, self.out_bias]
|
||||
def forward(self,mtx):
|
||||
long_in_bias=self.in_bias.repeat([1,mtx.size()[1]])
|
||||
long_out_bias=self.out_bias.repeat([1,mtx.size()[1]])
|
||||
return (self.scal_mult*torch.exp(torch.matmul(self.weights,mtx)+long_in_bias))+long_out_bias
|
||||
class StatisticalTests:
|
||||
|
||||
class PolyRegKernel():
|
||||
parameters= []
|
||||
weights=None
|
||||
bias=None
|
||||
power=None
|
||||
def __init__(self, num_vars, power):
|
||||
self.power=power
|
||||
num_terms=self.num_poly_terms(num_vars, power)
|
||||
self.weights=torch.rand(num_terms, requires_grad=True, device=device)
|
||||
self.bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.parameters=[self.weights,self.bias]
|
||||
def num_poly_terms(self,num_vars, power):
|
||||
if power == 0:
|
||||
return 0
|
||||
return int(self.factorial(num_vars+power-1) / self.factorial(power) / self.factorial(num_vars-1)) + self.num_poly_terms(num_vars, power-1)
|
||||
def factorial(self,n):
|
||||
if n==0:
|
||||
return 1
|
||||
else:
|
||||
return n*self.factorial(n-1)
|
||||
def take_all_pwrs(self, vec, pwr):
|
||||
#todo: vectorize (kinda)
|
||||
combins=torch.combinations(vec, r=pwr, with_replacement=True)
|
||||
out=torch.ones(combins.size()[0]).to(device).to(torch.float)
|
||||
for i in torch.t(combins).to(device).to(torch.float):
|
||||
out *= i
|
||||
if pwr == 1:
|
||||
return out
|
||||
else:
|
||||
return torch.cat((out,self.take_all_pwrs(vec, pwr-1)))
|
||||
def forward(self,mtx):
|
||||
#TODO: Vectorize the last part
|
||||
cols=[]
|
||||
for i in torch.t(mtx):
|
||||
cols.append(self.take_all_pwrs(i,self.power))
|
||||
new_mtx=torch.t(torch.stack(cols))
|
||||
long_bias=self.bias.repeat([1,mtx.size()[1]])
|
||||
return torch.matmul(self.weights,new_mtx)+long_bias
|
||||
def ttest_onesample(self, a, popmean, axis = 0, nan_policy = 'propagate'):
|
||||
|
||||
def SGDTrain(self, kernel, data, ground, loss=torch.nn.MSELoss(), iterations=1000, learning_rate=.1, return_losses=False):
|
||||
optim=torch.optim.SGD(kernel.parameters, lr=learning_rate)
|
||||
data_cuda=data.to(device)
|
||||
ground_cuda=ground.to(device)
|
||||
if (return_losses):
|
||||
losses=[]
|
||||
for i in range(iterations):
|
||||
with torch.set_grad_enabled(True):
|
||||
optim.zero_grad()
|
||||
pred=kernel.forward(data_cuda)
|
||||
ls=loss(pred,ground_cuda)
|
||||
losses.append(ls.item())
|
||||
ls.backward()
|
||||
optim.step()
|
||||
return [kernel,losses]
|
||||
else:
|
||||
for i in range(iterations):
|
||||
with torch.set_grad_enabled(True):
|
||||
optim.zero_grad()
|
||||
pred=kernel.forward(data_cuda)
|
||||
ls=loss(pred,ground_cuda)
|
||||
ls.backward()
|
||||
optim.step()
|
||||
return kernel
|
||||
results = scipy.stats.ttest_1samp(a, popmean, axis = axis, nan_policy = nan_policy)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def CustomTrain(self, kernel, optim, data, ground, loss=torch.nn.MSELoss(), iterations=1000, return_losses=False):
|
||||
data_cuda=data.to(device)
|
||||
ground_cuda=ground.to(device)
|
||||
if (return_losses):
|
||||
losses=[]
|
||||
for i in range(iterations):
|
||||
with torch.set_grad_enabled(True):
|
||||
optim.zero_grad()
|
||||
pred=kernel.forward(data)
|
||||
ls=loss(pred,ground)
|
||||
losses.append(ls.item())
|
||||
ls.backward()
|
||||
optim.step()
|
||||
return [kernel,losses]
|
||||
else:
|
||||
for i in range(iterations):
|
||||
with torch.set_grad_enabled(True):
|
||||
optim.zero_grad()
|
||||
pred=kernel.forward(data_cuda)
|
||||
ls=loss(pred,ground_cuda)
|
||||
ls.backward()
|
||||
optim.step()
|
||||
return kernel
|
||||
def ttest_independent(self, a, b, equal = True, nan_policy = 'propagate'):
|
||||
|
||||
class Glicko2:
|
||||
results = scipy.stats.ttest_ind(a, b, equal_var = equal, nan_policy = nan_policy)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
_tau = 0.5
|
||||
def ttest_statistic(self, o1, o2, equal = True):
|
||||
|
||||
def getRating(self):
|
||||
return (self.__rating * 173.7178) + 1500
|
||||
results = scipy.stats.ttest_ind_from_stats(o1["mean"], o1["std"], o1["nobs"], o2["mean"], o2["std"], o2["nobs"], equal_var = equal)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def setRating(self, rating):
|
||||
self.__rating = (rating - 1500) / 173.7178
|
||||
def ttest_related(self, a, b, axis = 0, nan_policy='propagate'):
|
||||
|
||||
rating = property(getRating, setRating)
|
||||
results = scipy.stats.ttest_rel(a, b, axis = axis, nan_policy = nan_policy)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def getRd(self):
|
||||
return self.__rd * 173.7178
|
||||
def ks_fitness(self, rvs, cdf, args = (), N = 20, alternative = 'two-sided', mode = 'approx'):
|
||||
|
||||
def setRd(self, rd):
|
||||
self.__rd = rd / 173.7178
|
||||
results = scipy.stats.kstest(rvs, cdf, args = args, N = N, alternative = alternative, mode = mode)
|
||||
return {"ks-value": results[0], "p-value": results[1]}
|
||||
|
||||
rd = property(getRd, setRd)
|
||||
|
||||
def __init__(self, rating = 1500, rd = 350, vol = 0.06):
|
||||
def chisquare(self, f_obs, f_exp = None, ddof = None, axis = 0):
|
||||
|
||||
self.setRating(rating)
|
||||
self.setRd(rd)
|
||||
self.vol = vol
|
||||
|
||||
def _preRatingRD(self):
|
||||
results = scipy.stats.chisquare(f_obs, f_exp = f_exp, ddof = ddof, axis = axis)
|
||||
return {"chisquared-value": results[0], "p-value": results[1]}
|
||||
|
||||
self.__rd = math.sqrt(math.pow(self.__rd, 2) + math.pow(self.vol, 2))
|
||||
def powerdivergence(self, f_obs, f_exp = None, ddof = None, axis = 0, lambda_ = None):
|
||||
|
||||
results = scipy.stats.power_divergence(f_obs, f_exp = f_exp, ddof = ddof, axis = axis, lambda_ = lambda_)
|
||||
return {"powerdivergence-value": results[0], "p-value": results[1]}
|
||||
|
||||
def ks_twosample(self, x, y, alternative = 'two_sided', mode = 'auto'):
|
||||
|
||||
def update_player(self, rating_list, RD_list, outcome_list):
|
||||
results = scipy.stats.ks_2samp(x, y, alternative = alternative, mode = mode)
|
||||
return {"ks-value": results[0], "p-value": results[1]}
|
||||
|
||||
rating_list = [(x - 1500) / 173.7178 for x in rating_list]
|
||||
RD_list = [x / 173.7178 for x in RD_list]
|
||||
def es_twosample(self, x, y, t = (0.4, 0.8)):
|
||||
|
||||
v = self._v(rating_list, RD_list)
|
||||
self.vol = self._newVol(rating_list, RD_list, outcome_list, v)
|
||||
self._preRatingRD()
|
||||
|
||||
self.__rd = 1 / math.sqrt((1 / math.pow(self.__rd, 2)) + (1 / v))
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * \
|
||||
(outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
self.__rating += math.pow(self.__rd, 2) * tempSum
|
||||
|
||||
|
||||
def _newVol(self, rating_list, RD_list, outcome_list, v):
|
||||
results = scipy.stats.epps_singleton_2samp(x, y, t = t)
|
||||
return {"es-value": results[0], "p-value": results[1]}
|
||||
|
||||
i = 0
|
||||
delta = self._delta(rating_list, RD_list, outcome_list, v)
|
||||
a = math.log(math.pow(self.vol, 2))
|
||||
tau = self._tau
|
||||
x0 = a
|
||||
x1 = 0
|
||||
|
||||
while x0 != x1:
|
||||
# New iteration, so x(i) becomes x(i-1)
|
||||
x0 = x1
|
||||
d = math.pow(self.__rating, 2) + v + math.exp(x0)
|
||||
h1 = -(x0 - a) / math.pow(tau, 2) - 0.5 * math.exp(x0) \
|
||||
/ d + 0.5 * math.exp(x0) * math.pow(delta / d, 2)
|
||||
h2 = -1 / math.pow(tau, 2) - 0.5 * math.exp(x0) * \
|
||||
(math.pow(self.__rating, 2) + v) \
|
||||
/ math.pow(d, 2) + 0.5 * math.pow(delta, 2) * math.exp(x0) \
|
||||
* (math.pow(self.__rating, 2) + v - math.exp(x0)) / math.pow(d, 3)
|
||||
x1 = x0 - (h1 / h2)
|
||||
def mw_rank(self, x, y, use_continuity = True, alternative = None):
|
||||
|
||||
return math.exp(x1 / 2)
|
||||
|
||||
def _delta(self, rating_list, RD_list, outcome_list, v):
|
||||
results = scipy.stats.mannwhitneyu(x, y, use_continuity = use_continuity, alternative = alternative)
|
||||
return {"u-value": results[0], "p-value": results[1]}
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * (outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
return v * tempSum
|
||||
|
||||
def _v(self, rating_list, RD_list):
|
||||
def mw_tiecorrection(self, rank_values):
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempE = self._E(rating_list[i], RD_list[i])
|
||||
tempSum += math.pow(self._g(RD_list[i]), 2) * tempE * (1 - tempE)
|
||||
return 1 / tempSum
|
||||
|
||||
def _E(self, p2rating, p2RD):
|
||||
results = scipy.stats.tiecorrect(rank_values)
|
||||
return {"correction-factor": results}
|
||||
|
||||
return 1 / (1 + math.exp(-1 * self._g(p2RD) * \
|
||||
(self.__rating - p2rating)))
|
||||
|
||||
def _g(self, RD):
|
||||
def rankdata(self, a, method = 'average'):
|
||||
|
||||
return 1 / math.sqrt(1 + 3 * math.pow(RD, 2) / math.pow(math.pi, 2))
|
||||
|
||||
def did_not_compete(self):
|
||||
results = scipy.stats.rankdata(a, method = method)
|
||||
return results
|
||||
|
||||
self._preRatingRD()
|
||||
def wilcoxon_ranksum(self, a, b): # this seems to be superceded by Mann Whitney Wilcoxon U Test
|
||||
|
||||
results = scipy.stats.ranksums(a, b)
|
||||
return {"u-value": results[0], "p-value": results[1]}
|
||||
|
||||
def wilcoxon_signedrank(self, x, y = None, zero_method = 'wilcox', correction = False, alternative = 'two-sided'):
|
||||
|
||||
results = scipy.stats.wilcoxon(x, y = y, zero_method = zero_method, correction = correction, alternative = alternative)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def kw_htest(self, *args, nan_policy = 'propagate'):
|
||||
|
||||
results = scipy.stats.kruskal(*args, nan_policy = nan_policy)
|
||||
return {"h-value": results[0], "p-value": results[1]}
|
||||
|
||||
def friedman_chisquare(self, *args):
|
||||
|
||||
results = scipy.stats.friedmanchisquare(*args)
|
||||
return {"chisquared-value": results[0], "p-value": results[1]}
|
||||
|
||||
def bm_wtest(self, x, y, alternative = 'two-sided', distribution = 't', nan_policy = 'propagate'):
|
||||
|
||||
results = scipy.stats.brunnermunzel(x, y, alternative = alternative, distribution = distribution, nan_policy = nan_policy)
|
||||
return {"w-value": results[0], "p-value": results[1]}
|
||||
|
||||
def combine_pvalues(self, pvalues, method = 'fisher', weights = None):
|
||||
|
||||
results = scipy.stats.combine_pvalues(pvalues, method = method, weights = weights)
|
||||
return {"combined-statistic": results[0], "p-value": results[1]}
|
||||
|
||||
def jb_fitness(self, x):
|
||||
|
||||
results = scipy.stats.jarque_bera(x)
|
||||
return {"jb-value": results[0], "p-value": results[1]}
|
||||
|
||||
def ab_equality(self, x, y):
|
||||
|
||||
results = scipy.stats.ansari(x, y)
|
||||
return {"ab-value": results[0], "p-value": results[1]}
|
||||
|
||||
def bartlett_variance(self, *args):
|
||||
|
||||
results = scipy.stats.bartlett(*args)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def levene_variance(self, *args, center = 'median', proportiontocut = 0.05):
|
||||
|
||||
results = scipy.stats.levene(*args, center = center, proportiontocut = proportiontocut)
|
||||
return {"w-value": results[0], "p-value": results[1]}
|
||||
|
||||
def sw_normality(self, x):
|
||||
|
||||
results = scipy.stats.shapiro(x)
|
||||
return {"w-value": results[0], "p-value": results[1]}
|
||||
|
||||
def shapiro(self, x):
|
||||
|
||||
return "destroyed by facts and logic"
|
||||
|
||||
def ad_onesample(self, x, dist = 'norm'):
|
||||
|
||||
results = scipy.stats.anderson(x, dist = dist)
|
||||
return {"d-value": results[0], "critical-values": results[1], "significance-value": results[2]}
|
||||
|
||||
def ad_ksample(self, samples, midrank = True):
|
||||
|
||||
results = scipy.stats.anderson_ksamp(samples, midrank = midrank)
|
||||
return {"d-value": results[0], "critical-values": results[1], "significance-value": results[2]}
|
||||
|
||||
def binomial(self, x, n = None, p = 0.5, alternative = 'two-sided'):
|
||||
|
||||
results = scipy.stats.binom_test(x, n = n, p = p, alternative = alternative)
|
||||
return {"p-value": results}
|
||||
|
||||
def fk_variance(self, *args, center = 'median', proportiontocut = 0.05):
|
||||
|
||||
results = scipy.stats.fligner(*args, center = center, proportiontocut = proportiontocut)
|
||||
return {"h-value": results[0], "p-value": results[1]} # unknown if the statistic is an h value
|
||||
|
||||
def mood_mediantest(self, *args, ties = 'below', correction = True, lambda_ = 1, nan_policy = 'propagate'):
|
||||
|
||||
results = scipy.stats.median_test(*args, ties = ties, correction = correction, lambda_ = lambda_, nan_policy = nan_policy)
|
||||
return {"chisquared-value": results[0], "p-value": results[1], "m-value": results[2], "table": results[3]}
|
||||
|
||||
def mood_equalscale(self, x, y, axis = 0):
|
||||
|
||||
results = scipy.stats.mood(x, y, axis = axis)
|
||||
return {"z-score": results[0], "p-value": results[1]}
|
||||
|
||||
def skewtest(self, a, axis = 0, nan_policy = 'propogate'):
|
||||
|
||||
results = scipy.stats.skewtest(a, axis = axis, nan_policy = nan_policy)
|
||||
return {"z-score": results[0], "p-value": results[1]}
|
||||
|
||||
def kurtosistest(self, a, axis = 0, nan_policy = 'propogate'):
|
||||
|
||||
results = scipy.stats.kurtosistest(a, axis = axis, nan_policy = nan_policy)
|
||||
return {"z-score": results[0], "p-value": results[1]}
|
||||
|
||||
def normaltest(self, a, axis = 0, nan_policy = 'propogate'):
|
||||
|
||||
results = scipy.stats.normaltest(a, axis = axis, nan_policy = nan_policy)
|
||||
return {"z-score": results[0], "p-value": results[1]}
|
99
analysis-master/analysis-amd64/build/lib/analysis/glicko2.py
Normal file
99
analysis-master/analysis-amd64/build/lib/analysis/glicko2.py
Normal file
@@ -0,0 +1,99 @@
|
||||
import math
|
||||
|
||||
class Glicko2:
|
||||
_tau = 0.5
|
||||
|
||||
def getRating(self):
|
||||
return (self.__rating * 173.7178) + 1500
|
||||
|
||||
def setRating(self, rating):
|
||||
self.__rating = (rating - 1500) / 173.7178
|
||||
|
||||
rating = property(getRating, setRating)
|
||||
|
||||
def getRd(self):
|
||||
return self.__rd * 173.7178
|
||||
|
||||
def setRd(self, rd):
|
||||
self.__rd = rd / 173.7178
|
||||
|
||||
rd = property(getRd, setRd)
|
||||
|
||||
def __init__(self, rating = 1500, rd = 350, vol = 0.06):
|
||||
|
||||
self.setRating(rating)
|
||||
self.setRd(rd)
|
||||
self.vol = vol
|
||||
|
||||
def _preRatingRD(self):
|
||||
|
||||
self.__rd = math.sqrt(math.pow(self.__rd, 2) + math.pow(self.vol, 2))
|
||||
|
||||
def update_player(self, rating_list, RD_list, outcome_list):
|
||||
|
||||
rating_list = [(x - 1500) / 173.7178 for x in rating_list]
|
||||
RD_list = [x / 173.7178 for x in RD_list]
|
||||
|
||||
v = self._v(rating_list, RD_list)
|
||||
self.vol = self._newVol(rating_list, RD_list, outcome_list, v)
|
||||
self._preRatingRD()
|
||||
|
||||
self.__rd = 1 / math.sqrt((1 / math.pow(self.__rd, 2)) + (1 / v))
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * \
|
||||
(outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
self.__rating += math.pow(self.__rd, 2) * tempSum
|
||||
|
||||
|
||||
def _newVol(self, rating_list, RD_list, outcome_list, v):
|
||||
|
||||
i = 0
|
||||
delta = self._delta(rating_list, RD_list, outcome_list, v)
|
||||
a = math.log(math.pow(self.vol, 2))
|
||||
tau = self._tau
|
||||
x0 = a
|
||||
x1 = 0
|
||||
|
||||
while x0 != x1:
|
||||
# New iteration, so x(i) becomes x(i-1)
|
||||
x0 = x1
|
||||
d = math.pow(self.__rating, 2) + v + math.exp(x0)
|
||||
h1 = -(x0 - a) / math.pow(tau, 2) - 0.5 * math.exp(x0) \
|
||||
/ d + 0.5 * math.exp(x0) * math.pow(delta / d, 2)
|
||||
h2 = -1 / math.pow(tau, 2) - 0.5 * math.exp(x0) * \
|
||||
(math.pow(self.__rating, 2) + v) \
|
||||
/ math.pow(d, 2) + 0.5 * math.pow(delta, 2) * math.exp(x0) \
|
||||
* (math.pow(self.__rating, 2) + v - math.exp(x0)) / math.pow(d, 3)
|
||||
x1 = x0 - (h1 / h2)
|
||||
|
||||
return math.exp(x1 / 2)
|
||||
|
||||
def _delta(self, rating_list, RD_list, outcome_list, v):
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * (outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
return v * tempSum
|
||||
|
||||
def _v(self, rating_list, RD_list):
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempE = self._E(rating_list[i], RD_list[i])
|
||||
tempSum += math.pow(self._g(RD_list[i]), 2) * tempE * (1 - tempE)
|
||||
return 1 / tempSum
|
||||
|
||||
def _E(self, p2rating, p2RD):
|
||||
|
||||
return 1 / (1 + math.exp(-1 * self._g(p2RD) * \
|
||||
(self.__rating - p2rating)))
|
||||
|
||||
def _g(self, RD):
|
||||
|
||||
return 1 / math.sqrt(1 + 3 * math.pow(RD, 2) / math.pow(math.pi, 2))
|
||||
|
||||
def did_not_compete(self):
|
||||
|
||||
self._preRatingRD()
|
@@ -0,0 +1,7 @@
|
||||
import numpy as np
|
||||
|
||||
def calculate(starting_score, opposing_score, observed, N, K):
|
||||
|
||||
expected = 1/(1+10**((np.array(opposing_score) - starting_score)/N))
|
||||
|
||||
return starting_score + K*(np.sum(observed) - np.sum(expected))
|
@@ -0,0 +1,99 @@
|
||||
import math
|
||||
|
||||
class Glicko2:
|
||||
_tau = 0.5
|
||||
|
||||
def getRating(self):
|
||||
return (self.__rating * 173.7178) + 1500
|
||||
|
||||
def setRating(self, rating):
|
||||
self.__rating = (rating - 1500) / 173.7178
|
||||
|
||||
rating = property(getRating, setRating)
|
||||
|
||||
def getRd(self):
|
||||
return self.__rd * 173.7178
|
||||
|
||||
def setRd(self, rd):
|
||||
self.__rd = rd / 173.7178
|
||||
|
||||
rd = property(getRd, setRd)
|
||||
|
||||
def __init__(self, rating = 1500, rd = 350, vol = 0.06):
|
||||
|
||||
self.setRating(rating)
|
||||
self.setRd(rd)
|
||||
self.vol = vol
|
||||
|
||||
def _preRatingRD(self):
|
||||
|
||||
self.__rd = math.sqrt(math.pow(self.__rd, 2) + math.pow(self.vol, 2))
|
||||
|
||||
def update_player(self, rating_list, RD_list, outcome_list):
|
||||
|
||||
rating_list = [(x - 1500) / 173.7178 for x in rating_list]
|
||||
RD_list = [x / 173.7178 for x in RD_list]
|
||||
|
||||
v = self._v(rating_list, RD_list)
|
||||
self.vol = self._newVol(rating_list, RD_list, outcome_list, v)
|
||||
self._preRatingRD()
|
||||
|
||||
self.__rd = 1 / math.sqrt((1 / math.pow(self.__rd, 2)) + (1 / v))
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * \
|
||||
(outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
self.__rating += math.pow(self.__rd, 2) * tempSum
|
||||
|
||||
|
||||
def _newVol(self, rating_list, RD_list, outcome_list, v):
|
||||
|
||||
i = 0
|
||||
delta = self._delta(rating_list, RD_list, outcome_list, v)
|
||||
a = math.log(math.pow(self.vol, 2))
|
||||
tau = self._tau
|
||||
x0 = a
|
||||
x1 = 0
|
||||
|
||||
while x0 != x1:
|
||||
# New iteration, so x(i) becomes x(i-1)
|
||||
x0 = x1
|
||||
d = math.pow(self.__rating, 2) + v + math.exp(x0)
|
||||
h1 = -(x0 - a) / math.pow(tau, 2) - 0.5 * math.exp(x0) \
|
||||
/ d + 0.5 * math.exp(x0) * math.pow(delta / d, 2)
|
||||
h2 = -1 / math.pow(tau, 2) - 0.5 * math.exp(x0) * \
|
||||
(math.pow(self.__rating, 2) + v) \
|
||||
/ math.pow(d, 2) + 0.5 * math.pow(delta, 2) * math.exp(x0) \
|
||||
* (math.pow(self.__rating, 2) + v - math.exp(x0)) / math.pow(d, 3)
|
||||
x1 = x0 - (h1 / h2)
|
||||
|
||||
return math.exp(x1 / 2)
|
||||
|
||||
def _delta(self, rating_list, RD_list, outcome_list, v):
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * (outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
return v * tempSum
|
||||
|
||||
def _v(self, rating_list, RD_list):
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempE = self._E(rating_list[i], RD_list[i])
|
||||
tempSum += math.pow(self._g(RD_list[i]), 2) * tempE * (1 - tempE)
|
||||
return 1 / tempSum
|
||||
|
||||
def _E(self, p2rating, p2RD):
|
||||
|
||||
return 1 / (1 + math.exp(-1 * self._g(p2RD) * \
|
||||
(self.__rating - p2rating)))
|
||||
|
||||
def _g(self, RD):
|
||||
|
||||
return 1 / math.sqrt(1 + 3 * math.pow(RD, 2) / math.pow(math.pi, 2))
|
||||
|
||||
def did_not_compete(self):
|
||||
|
||||
self._preRatingRD()
|
@@ -1,20 +1,23 @@
|
||||
# Titan Robotics Team 2022: CUDA-based Regressions Module
|
||||
# Written by Arthur Lu & Jacob Levine
|
||||
# Notes:
|
||||
# this should be imported as a python module using 'import regression'
|
||||
# this should be included in the local directory or environment variable
|
||||
# this module is cuda-optimized and vectorized (except for one small part)
|
||||
# this module has been automatically inegrated into analysis.py, and should be callable as a class from the package
|
||||
# this module is cuda-optimized and vectorized (except for one small part)
|
||||
# setup:
|
||||
|
||||
__version__ = "1.0.0.002"
|
||||
__version__ = "1.0.0.004"
|
||||
|
||||
# changelog should be viewed using print(regression.__changelog__)
|
||||
# changelog should be viewed using print(analysis.regression.__changelog__)
|
||||
__changelog__ = """
|
||||
1.0.0.004:
|
||||
- bug fixes
|
||||
- fixed changelog
|
||||
1.0.0.003:
|
||||
- bug fixes
|
||||
1.0.0.002:
|
||||
-Added more parameters to log, exponential, polynomial
|
||||
-Added SigmoidalRegKernelArthur, because Arthur apparently needs
|
||||
to train the scaling and shifting of sigmoids
|
||||
|
||||
1.0.0.001:
|
||||
-initial release, with linear, log, exponential, polynomial, and sigmoid kernels
|
||||
-already vectorized (except for polynomial generation) and CUDA-optimized
|
||||
@@ -22,6 +25,7 @@ __changelog__ = """
|
||||
|
||||
__author__ = (
|
||||
"Jacob Levine <jlevine@imsa.edu>",
|
||||
"Arthur Lu <learthurgo@gmail.com>"
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
@@ -39,35 +43,15 @@ __all__ = [
|
||||
'CustomTrain'
|
||||
]
|
||||
|
||||
|
||||
# imports (just one for now):
|
||||
|
||||
import torch
|
||||
|
||||
global device
|
||||
|
||||
device = "cuda:0" if torch.torch.cuda.is_available() else "cpu"
|
||||
|
||||
#todo: document completely
|
||||
|
||||
def factorial(n):
|
||||
if n==0:
|
||||
return 1
|
||||
else:
|
||||
return n*factorial(n-1)
|
||||
def num_poly_terms(num_vars, power):
|
||||
if power == 0:
|
||||
return 0
|
||||
return int(factorial(num_vars+power-1) / factorial(power) / factorial(num_vars-1)) + num_poly_terms(num_vars, power-1)
|
||||
|
||||
def take_all_pwrs(vec,pwr):
|
||||
#todo: vectorize (kinda)
|
||||
combins=torch.combinations(vec, r=pwr, with_replacement=True)
|
||||
out=torch.ones(combins.size()[0])
|
||||
for i in torch.t(combins):
|
||||
out *= i
|
||||
return torch.cat(out,take_all_pwrs(vec, pwr-1))
|
||||
|
||||
def set_device(new_device):
|
||||
global device
|
||||
def set_device(self, new_device):
|
||||
device=new_device
|
||||
|
||||
class LinearRegKernel():
|
||||
@@ -154,20 +138,39 @@ class PolyRegKernel():
|
||||
power=None
|
||||
def __init__(self, num_vars, power):
|
||||
self.power=power
|
||||
num_terms=num_poly_terms(num_vars, power)
|
||||
num_terms=self.num_poly_terms(num_vars, power)
|
||||
self.weights=torch.rand(num_terms, requires_grad=True, device=device)
|
||||
self.bias=torch.rand(1, requires_grad=True, device=device)
|
||||
self.parameters=[self.weights,self.bias]
|
||||
def num_poly_terms(self,num_vars, power):
|
||||
if power == 0:
|
||||
return 0
|
||||
return int(self.factorial(num_vars+power-1) / self.factorial(power) / self.factorial(num_vars-1)) + self.num_poly_terms(num_vars, power-1)
|
||||
def factorial(self,n):
|
||||
if n==0:
|
||||
return 1
|
||||
else:
|
||||
return n*self.factorial(n-1)
|
||||
def take_all_pwrs(self, vec, pwr):
|
||||
#todo: vectorize (kinda)
|
||||
combins=torch.combinations(vec, r=pwr, with_replacement=True)
|
||||
out=torch.ones(combins.size()[0]).to(device).to(torch.float)
|
||||
for i in torch.t(combins).to(device).to(torch.float):
|
||||
out *= i
|
||||
if pwr == 1:
|
||||
return out
|
||||
else:
|
||||
return torch.cat((out,self.take_all_pwrs(vec, pwr-1)))
|
||||
def forward(self,mtx):
|
||||
#TODO: Vectorize the last part
|
||||
cols=[]
|
||||
for i in torch.t(mtx):
|
||||
cols.append(take_all_pwrs(i,self.power))
|
||||
cols.append(self.take_all_pwrs(i,self.power))
|
||||
new_mtx=torch.t(torch.stack(cols))
|
||||
long_bias=self.bias.repeat([1,mtx.size()[1]])
|
||||
return torch.matmul(self.weights,new_mtx)+long_bias
|
||||
|
||||
def SGDTrain(kernel, data, ground, loss=torch.nn.MSELoss(), iterations=1000, learning_rate=.1, return_losses=False):
|
||||
def SGDTrain(self, kernel, data, ground, loss=torch.nn.MSELoss(), iterations=1000, learning_rate=.1, return_losses=False):
|
||||
optim=torch.optim.SGD(kernel.parameters, lr=learning_rate)
|
||||
data_cuda=data.to(device)
|
||||
ground_cuda=ground.to(device)
|
||||
@@ -192,7 +195,7 @@ def SGDTrain(kernel, data, ground, loss=torch.nn.MSELoss(), iterations=1000, lea
|
||||
optim.step()
|
||||
return kernel
|
||||
|
||||
def CustomTrain(kernel, optim, data, ground, loss=torch.nn.MSELoss(), iterations=1000, return_losses=False):
|
||||
def CustomTrain(self, kernel, optim, data, ground, loss=torch.nn.MSELoss(), iterations=1000, return_losses=False):
|
||||
data_cuda=data.to(device)
|
||||
ground_cuda=ground.to(device)
|
||||
if (return_losses):
|
||||
@@ -214,4 +217,4 @@ def CustomTrain(kernel, optim, data, ground, loss=torch.nn.MSELoss(), iterations
|
||||
ls=loss(pred,ground_cuda)
|
||||
ls.backward()
|
||||
optim.step()
|
||||
return kernel
|
||||
return kernel
|
907
analysis-master/analysis-amd64/build/lib/analysis/trueskill.py
Normal file
907
analysis-master/analysis-amd64/build/lib/analysis/trueskill.py
Normal file
@@ -0,0 +1,907 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from itertools import chain
|
||||
import math
|
||||
|
||||
from six import iteritems
|
||||
from six.moves import map, range, zip
|
||||
from six import iterkeys
|
||||
|
||||
import copy
|
||||
try:
|
||||
from numbers import Number
|
||||
except ImportError:
|
||||
Number = (int, long, float, complex)
|
||||
|
||||
inf = float('inf')
|
||||
|
||||
class Gaussian(object):
|
||||
#: Precision, the inverse of the variance.
|
||||
pi = 0
|
||||
#: Precision adjusted mean, the precision multiplied by the mean.
|
||||
tau = 0
|
||||
|
||||
def __init__(self, mu=None, sigma=None, pi=0, tau=0):
|
||||
if mu is not None:
|
||||
if sigma is None:
|
||||
raise TypeError('sigma argument is needed')
|
||||
elif sigma == 0:
|
||||
raise ValueError('sigma**2 should be greater than 0')
|
||||
pi = sigma ** -2
|
||||
tau = pi * mu
|
||||
self.pi = pi
|
||||
self.tau = tau
|
||||
|
||||
@property
|
||||
def mu(self):
|
||||
return self.pi and self.tau / self.pi
|
||||
|
||||
@property
|
||||
def sigma(self):
|
||||
return math.sqrt(1 / self.pi) if self.pi else inf
|
||||
|
||||
def __mul__(self, other):
|
||||
pi, tau = self.pi + other.pi, self.tau + other.tau
|
||||
return Gaussian(pi=pi, tau=tau)
|
||||
|
||||
def __truediv__(self, other):
|
||||
pi, tau = self.pi - other.pi, self.tau - other.tau
|
||||
return Gaussian(pi=pi, tau=tau)
|
||||
|
||||
__div__ = __truediv__ # for Python 2
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.pi == other.pi and self.tau == other.tau
|
||||
|
||||
def __lt__(self, other):
|
||||
return self.mu < other.mu
|
||||
|
||||
def __le__(self, other):
|
||||
return self.mu <= other.mu
|
||||
|
||||
def __gt__(self, other):
|
||||
return self.mu > other.mu
|
||||
|
||||
def __ge__(self, other):
|
||||
return self.mu >= other.mu
|
||||
|
||||
def __repr__(self):
|
||||
return 'N(mu={:.3f}, sigma={:.3f})'.format(self.mu, self.sigma)
|
||||
|
||||
def _repr_latex_(self):
|
||||
latex = r'\mathcal{{ N }}( {:.3f}, {:.3f}^2 )'.format(self.mu, self.sigma)
|
||||
return '$%s$' % latex
|
||||
|
||||
class Matrix(list):
|
||||
def __init__(self, src, height=None, width=None):
|
||||
if callable(src):
|
||||
f, src = src, {}
|
||||
size = [height, width]
|
||||
if not height:
|
||||
def set_height(height):
|
||||
size[0] = height
|
||||
size[0] = set_height
|
||||
if not width:
|
||||
def set_width(width):
|
||||
size[1] = width
|
||||
size[1] = set_width
|
||||
try:
|
||||
for (r, c), val in f(*size):
|
||||
src[r, c] = val
|
||||
except TypeError:
|
||||
raise TypeError('A callable src must return an interable '
|
||||
'which generates a tuple containing '
|
||||
'coordinate and value')
|
||||
height, width = tuple(size)
|
||||
if height is None or width is None:
|
||||
raise TypeError('A callable src must call set_height and '
|
||||
'set_width if the size is non-deterministic')
|
||||
if isinstance(src, list):
|
||||
is_number = lambda x: isinstance(x, Number)
|
||||
unique_col_sizes = set(map(len, src))
|
||||
everything_are_number = filter(is_number, sum(src, []))
|
||||
if len(unique_col_sizes) != 1 or not everything_are_number:
|
||||
raise ValueError('src must be a rectangular array of numbers')
|
||||
two_dimensional_array = src
|
||||
elif isinstance(src, dict):
|
||||
if not height or not width:
|
||||
w = h = 0
|
||||
for r, c in iterkeys(src):
|
||||
if not height:
|
||||
h = max(h, r + 1)
|
||||
if not width:
|
||||
w = max(w, c + 1)
|
||||
if not height:
|
||||
height = h
|
||||
if not width:
|
||||
width = w
|
||||
two_dimensional_array = []
|
||||
for r in range(height):
|
||||
row = []
|
||||
two_dimensional_array.append(row)
|
||||
for c in range(width):
|
||||
row.append(src.get((r, c), 0))
|
||||
else:
|
||||
raise TypeError('src must be a list or dict or callable')
|
||||
super(Matrix, self).__init__(two_dimensional_array)
|
||||
|
||||
@property
|
||||
def height(self):
|
||||
return len(self)
|
||||
|
||||
@property
|
||||
def width(self):
|
||||
return len(self[0])
|
||||
|
||||
def transpose(self):
|
||||
height, width = self.height, self.width
|
||||
src = {}
|
||||
for c in range(width):
|
||||
for r in range(height):
|
||||
src[c, r] = self[r][c]
|
||||
return type(self)(src, height=width, width=height)
|
||||
|
||||
def minor(self, row_n, col_n):
|
||||
height, width = self.height, self.width
|
||||
if not (0 <= row_n < height):
|
||||
raise ValueError('row_n should be between 0 and %d' % height)
|
||||
elif not (0 <= col_n < width):
|
||||
raise ValueError('col_n should be between 0 and %d' % width)
|
||||
two_dimensional_array = []
|
||||
for r in range(height):
|
||||
if r == row_n:
|
||||
continue
|
||||
row = []
|
||||
two_dimensional_array.append(row)
|
||||
for c in range(width):
|
||||
if c == col_n:
|
||||
continue
|
||||
row.append(self[r][c])
|
||||
return type(self)(two_dimensional_array)
|
||||
|
||||
def determinant(self):
|
||||
height, width = self.height, self.width
|
||||
if height != width:
|
||||
raise ValueError('Only square matrix can calculate a determinant')
|
||||
tmp, rv = copy.deepcopy(self), 1.
|
||||
for c in range(width - 1, 0, -1):
|
||||
pivot, r = max((abs(tmp[r][c]), r) for r in range(c + 1))
|
||||
pivot = tmp[r][c]
|
||||
if not pivot:
|
||||
return 0.
|
||||
tmp[r], tmp[c] = tmp[c], tmp[r]
|
||||
if r != c:
|
||||
rv = -rv
|
||||
rv *= pivot
|
||||
fact = -1. / pivot
|
||||
for r in range(c):
|
||||
f = fact * tmp[r][c]
|
||||
for x in range(c):
|
||||
tmp[r][x] += f * tmp[c][x]
|
||||
return rv * tmp[0][0]
|
||||
|
||||
def adjugate(self):
|
||||
height, width = self.height, self.width
|
||||
if height != width:
|
||||
raise ValueError('Only square matrix can be adjugated')
|
||||
if height == 2:
|
||||
a, b = self[0][0], self[0][1]
|
||||
c, d = self[1][0], self[1][1]
|
||||
return type(self)([[d, -b], [-c, a]])
|
||||
src = {}
|
||||
for r in range(height):
|
||||
for c in range(width):
|
||||
sign = -1 if (r + c) % 2 else 1
|
||||
src[r, c] = self.minor(r, c).determinant() * sign
|
||||
return type(self)(src, height, width)
|
||||
|
||||
def inverse(self):
|
||||
if self.height == self.width == 1:
|
||||
return type(self)([[1. / self[0][0]]])
|
||||
return (1. / self.determinant()) * self.adjugate()
|
||||
|
||||
def __add__(self, other):
|
||||
height, width = self.height, self.width
|
||||
if (height, width) != (other.height, other.width):
|
||||
raise ValueError('Must be same size')
|
||||
src = {}
|
||||
for r in range(height):
|
||||
for c in range(width):
|
||||
src[r, c] = self[r][c] + other[r][c]
|
||||
return type(self)(src, height, width)
|
||||
|
||||
def __mul__(self, other):
|
||||
if self.width != other.height:
|
||||
raise ValueError('Bad size')
|
||||
height, width = self.height, other.width
|
||||
src = {}
|
||||
for r in range(height):
|
||||
for c in range(width):
|
||||
src[r, c] = sum(self[r][x] * other[x][c]
|
||||
for x in range(self.width))
|
||||
return type(self)(src, height, width)
|
||||
|
||||
def __rmul__(self, other):
|
||||
if not isinstance(other, Number):
|
||||
raise TypeError('The operand should be a number')
|
||||
height, width = self.height, self.width
|
||||
src = {}
|
||||
for r in range(height):
|
||||
for c in range(width):
|
||||
src[r, c] = other * self[r][c]
|
||||
return type(self)(src, height, width)
|
||||
|
||||
def __repr__(self):
|
||||
return '{}({})'.format(type(self).__name__, super(Matrix, self).__repr__())
|
||||
|
||||
def _repr_latex_(self):
|
||||
rows = [' && '.join(['%.3f' % cell for cell in row]) for row in self]
|
||||
latex = r'\begin{matrix} %s \end{matrix}' % r'\\'.join(rows)
|
||||
return '$%s$' % latex
|
||||
|
||||
def _gen_erfcinv(erfc, math=math):
|
||||
def erfcinv(y):
|
||||
"""The inverse function of erfc."""
|
||||
if y >= 2:
|
||||
return -100.
|
||||
elif y <= 0:
|
||||
return 100.
|
||||
zero_point = y < 1
|
||||
if not zero_point:
|
||||
y = 2 - y
|
||||
t = math.sqrt(-2 * math.log(y / 2.))
|
||||
x = -0.70711 * \
|
||||
((2.30753 + t * 0.27061) / (1. + t * (0.99229 + t * 0.04481)) - t)
|
||||
for i in range(2):
|
||||
err = erfc(x) - y
|
||||
x += err / (1.12837916709551257 * math.exp(-(x ** 2)) - x * err)
|
||||
return x if zero_point else -x
|
||||
return erfcinv
|
||||
|
||||
def _gen_ppf(erfc, math=math):
|
||||
erfcinv = _gen_erfcinv(erfc, math)
|
||||
def ppf(x, mu=0, sigma=1):
|
||||
return mu - sigma * math.sqrt(2) * erfcinv(2 * x)
|
||||
return ppf
|
||||
|
||||
def erfc(x):
|
||||
z = abs(x)
|
||||
t = 1. / (1. + z / 2.)
|
||||
r = t * math.exp(-z * z - 1.26551223 + t * (1.00002368 + t * (
|
||||
0.37409196 + t * (0.09678418 + t * (-0.18628806 + t * (
|
||||
0.27886807 + t * (-1.13520398 + t * (1.48851587 + t * (
|
||||
-0.82215223 + t * 0.17087277
|
||||
)))
|
||||
)))
|
||||
)))
|
||||
return 2. - r if x < 0 else r
|
||||
|
||||
def cdf(x, mu=0, sigma=1):
|
||||
return 0.5 * erfc(-(x - mu) / (sigma * math.sqrt(2)))
|
||||
|
||||
|
||||
def pdf(x, mu=0, sigma=1):
|
||||
return (1 / math.sqrt(2 * math.pi) * abs(sigma) *
|
||||
math.exp(-(((x - mu) / abs(sigma)) ** 2 / 2)))
|
||||
|
||||
ppf = _gen_ppf(erfc)
|
||||
|
||||
def choose_backend(backend):
|
||||
if backend is None: # fallback
|
||||
return cdf, pdf, ppf
|
||||
elif backend == 'mpmath':
|
||||
try:
|
||||
import mpmath
|
||||
except ImportError:
|
||||
raise ImportError('Install "mpmath" to use this backend')
|
||||
return mpmath.ncdf, mpmath.npdf, _gen_ppf(mpmath.erfc, math=mpmath)
|
||||
elif backend == 'scipy':
|
||||
try:
|
||||
from scipy.stats import norm
|
||||
except ImportError:
|
||||
raise ImportError('Install "scipy" to use this backend')
|
||||
return norm.cdf, norm.pdf, norm.ppf
|
||||
raise ValueError('%r backend is not defined' % backend)
|
||||
|
||||
def available_backends():
|
||||
backends = [None]
|
||||
for backend in ['mpmath', 'scipy']:
|
||||
try:
|
||||
__import__(backend)
|
||||
except ImportError:
|
||||
continue
|
||||
backends.append(backend)
|
||||
return backends
|
||||
|
||||
class Node(object):
|
||||
|
||||
pass
|
||||
|
||||
class Variable(Node, Gaussian):
|
||||
|
||||
def __init__(self):
|
||||
self.messages = {}
|
||||
super(Variable, self).__init__()
|
||||
|
||||
def set(self, val):
|
||||
delta = self.delta(val)
|
||||
self.pi, self.tau = val.pi, val.tau
|
||||
return delta
|
||||
|
||||
def delta(self, other):
|
||||
pi_delta = abs(self.pi - other.pi)
|
||||
if pi_delta == inf:
|
||||
return 0.
|
||||
return max(abs(self.tau - other.tau), math.sqrt(pi_delta))
|
||||
|
||||
def update_message(self, factor, pi=0, tau=0, message=None):
|
||||
message = message or Gaussian(pi=pi, tau=tau)
|
||||
old_message, self[factor] = self[factor], message
|
||||
return self.set(self / old_message * message)
|
||||
|
||||
def update_value(self, factor, pi=0, tau=0, value=None):
|
||||
value = value or Gaussian(pi=pi, tau=tau)
|
||||
old_message = self[factor]
|
||||
self[factor] = value * old_message / self
|
||||
return self.set(value)
|
||||
|
||||
def __getitem__(self, factor):
|
||||
return self.messages[factor]
|
||||
|
||||
def __setitem__(self, factor, message):
|
||||
self.messages[factor] = message
|
||||
|
||||
def __repr__(self):
|
||||
args = (type(self).__name__, super(Variable, self).__repr__(),
|
||||
len(self.messages), '' if len(self.messages) == 1 else 's')
|
||||
return '<%s %s with %d connection%s>' % args
|
||||
|
||||
|
||||
class Factor(Node):
|
||||
|
||||
def __init__(self, variables):
|
||||
self.vars = variables
|
||||
for var in variables:
|
||||
var[self] = Gaussian()
|
||||
|
||||
def down(self):
|
||||
return 0
|
||||
|
||||
def up(self):
|
||||
return 0
|
||||
|
||||
@property
|
||||
def var(self):
|
||||
assert len(self.vars) == 1
|
||||
return self.vars[0]
|
||||
|
||||
def __repr__(self):
|
||||
args = (type(self).__name__, len(self.vars),
|
||||
'' if len(self.vars) == 1 else 's')
|
||||
return '<%s with %d connection%s>' % args
|
||||
|
||||
|
||||
class PriorFactor(Factor):
|
||||
|
||||
def __init__(self, var, val, dynamic=0):
|
||||
super(PriorFactor, self).__init__([var])
|
||||
self.val = val
|
||||
self.dynamic = dynamic
|
||||
|
||||
def down(self):
|
||||
sigma = math.sqrt(self.val.sigma ** 2 + self.dynamic ** 2)
|
||||
value = Gaussian(self.val.mu, sigma)
|
||||
return self.var.update_value(self, value=value)
|
||||
|
||||
|
||||
class LikelihoodFactor(Factor):
|
||||
|
||||
def __init__(self, mean_var, value_var, variance):
|
||||
super(LikelihoodFactor, self).__init__([mean_var, value_var])
|
||||
self.mean = mean_var
|
||||
self.value = value_var
|
||||
self.variance = variance
|
||||
|
||||
def calc_a(self, var):
|
||||
return 1. / (1. + self.variance * var.pi)
|
||||
|
||||
def down(self):
|
||||
# update value.
|
||||
msg = self.mean / self.mean[self]
|
||||
a = self.calc_a(msg)
|
||||
return self.value.update_message(self, a * msg.pi, a * msg.tau)
|
||||
|
||||
def up(self):
|
||||
# update mean.
|
||||
msg = self.value / self.value[self]
|
||||
a = self.calc_a(msg)
|
||||
return self.mean.update_message(self, a * msg.pi, a * msg.tau)
|
||||
|
||||
|
||||
class SumFactor(Factor):
|
||||
|
||||
def __init__(self, sum_var, term_vars, coeffs):
|
||||
super(SumFactor, self).__init__([sum_var] + term_vars)
|
||||
self.sum = sum_var
|
||||
self.terms = term_vars
|
||||
self.coeffs = coeffs
|
||||
|
||||
def down(self):
|
||||
vals = self.terms
|
||||
msgs = [var[self] for var in vals]
|
||||
return self.update(self.sum, vals, msgs, self.coeffs)
|
||||
|
||||
def up(self, index=0):
|
||||
coeff = self.coeffs[index]
|
||||
coeffs = []
|
||||
for x, c in enumerate(self.coeffs):
|
||||
try:
|
||||
if x == index:
|
||||
coeffs.append(1. / coeff)
|
||||
else:
|
||||
coeffs.append(-c / coeff)
|
||||
except ZeroDivisionError:
|
||||
coeffs.append(0.)
|
||||
vals = self.terms[:]
|
||||
vals[index] = self.sum
|
||||
msgs = [var[self] for var in vals]
|
||||
return self.update(self.terms[index], vals, msgs, coeffs)
|
||||
|
||||
def update(self, var, vals, msgs, coeffs):
|
||||
pi_inv = 0
|
||||
mu = 0
|
||||
for val, msg, coeff in zip(vals, msgs, coeffs):
|
||||
div = val / msg
|
||||
mu += coeff * div.mu
|
||||
if pi_inv == inf:
|
||||
continue
|
||||
try:
|
||||
# numpy.float64 handles floating-point error by different way.
|
||||
# For example, it can just warn RuntimeWarning on n/0 problem
|
||||
# instead of throwing ZeroDivisionError. So div.pi, the
|
||||
# denominator has to be a built-in float.
|
||||
pi_inv += coeff ** 2 / float(div.pi)
|
||||
except ZeroDivisionError:
|
||||
pi_inv = inf
|
||||
pi = 1. / pi_inv
|
||||
tau = pi * mu
|
||||
return var.update_message(self, pi, tau)
|
||||
|
||||
|
||||
class TruncateFactor(Factor):
|
||||
|
||||
def __init__(self, var, v_func, w_func, draw_margin):
|
||||
super(TruncateFactor, self).__init__([var])
|
||||
self.v_func = v_func
|
||||
self.w_func = w_func
|
||||
self.draw_margin = draw_margin
|
||||
|
||||
def up(self):
|
||||
val = self.var
|
||||
msg = self.var[self]
|
||||
div = val / msg
|
||||
sqrt_pi = math.sqrt(div.pi)
|
||||
args = (div.tau / sqrt_pi, self.draw_margin * sqrt_pi)
|
||||
v = self.v_func(*args)
|
||||
w = self.w_func(*args)
|
||||
denom = (1. - w)
|
||||
pi, tau = div.pi / denom, (div.tau + sqrt_pi * v) / denom
|
||||
return val.update_value(self, pi, tau)
|
||||
|
||||
#: Default initial mean of ratings.
|
||||
MU = 25.
|
||||
#: Default initial standard deviation of ratings.
|
||||
SIGMA = MU / 3
|
||||
#: Default distance that guarantees about 76% chance of winning.
|
||||
BETA = SIGMA / 2
|
||||
#: Default dynamic factor.
|
||||
TAU = SIGMA / 100
|
||||
#: Default draw probability of the game.
|
||||
DRAW_PROBABILITY = .10
|
||||
#: A basis to check reliability of the result.
|
||||
DELTA = 0.0001
|
||||
|
||||
|
||||
def calc_draw_probability(draw_margin, size, env=None):
|
||||
if env is None:
|
||||
env = global_env()
|
||||
return 2 * env.cdf(draw_margin / (math.sqrt(size) * env.beta)) - 1
|
||||
|
||||
|
||||
def calc_draw_margin(draw_probability, size, env=None):
|
||||
if env is None:
|
||||
env = global_env()
|
||||
return env.ppf((draw_probability + 1) / 2.) * math.sqrt(size) * env.beta
|
||||
|
||||
|
||||
def _team_sizes(rating_groups):
|
||||
team_sizes = [0]
|
||||
for group in rating_groups:
|
||||
team_sizes.append(len(group) + team_sizes[-1])
|
||||
del team_sizes[0]
|
||||
return team_sizes
|
||||
|
||||
|
||||
def _floating_point_error(env):
|
||||
if env.backend == 'mpmath':
|
||||
msg = 'Set "mpmath.mp.dps" to higher'
|
||||
else:
|
||||
msg = 'Cannot calculate correctly, set backend to "mpmath"'
|
||||
return FloatingPointError(msg)
|
||||
|
||||
|
||||
class Rating(Gaussian):
|
||||
def __init__(self, mu=None, sigma=None):
|
||||
if isinstance(mu, tuple):
|
||||
mu, sigma = mu
|
||||
elif isinstance(mu, Gaussian):
|
||||
mu, sigma = mu.mu, mu.sigma
|
||||
if mu is None:
|
||||
mu = global_env().mu
|
||||
if sigma is None:
|
||||
sigma = global_env().sigma
|
||||
super(Rating, self).__init__(mu, sigma)
|
||||
|
||||
def __int__(self):
|
||||
return int(self.mu)
|
||||
|
||||
def __long__(self):
|
||||
return long(self.mu)
|
||||
|
||||
def __float__(self):
|
||||
return float(self.mu)
|
||||
|
||||
def __iter__(self):
|
||||
return iter((self.mu, self.sigma))
|
||||
|
||||
def __repr__(self):
|
||||
c = type(self)
|
||||
args = ('.'.join([c.__module__, c.__name__]), self.mu, self.sigma)
|
||||
return '%s(mu=%.3f, sigma=%.3f)' % args
|
||||
|
||||
|
||||
class TrueSkill(object):
|
||||
def __init__(self, mu=MU, sigma=SIGMA, beta=BETA, tau=TAU,
|
||||
draw_probability=DRAW_PROBABILITY, backend=None):
|
||||
self.mu = mu
|
||||
self.sigma = sigma
|
||||
self.beta = beta
|
||||
self.tau = tau
|
||||
self.draw_probability = draw_probability
|
||||
self.backend = backend
|
||||
if isinstance(backend, tuple):
|
||||
self.cdf, self.pdf, self.ppf = backend
|
||||
else:
|
||||
self.cdf, self.pdf, self.ppf = choose_backend(backend)
|
||||
|
||||
def create_rating(self, mu=None, sigma=None):
|
||||
if mu is None:
|
||||
mu = self.mu
|
||||
if sigma is None:
|
||||
sigma = self.sigma
|
||||
return Rating(mu, sigma)
|
||||
|
||||
def v_win(self, diff, draw_margin):
|
||||
x = diff - draw_margin
|
||||
denom = self.cdf(x)
|
||||
return (self.pdf(x) / denom) if denom else -x
|
||||
|
||||
def v_draw(self, diff, draw_margin):
|
||||
abs_diff = abs(diff)
|
||||
a, b = draw_margin - abs_diff, -draw_margin - abs_diff
|
||||
denom = self.cdf(a) - self.cdf(b)
|
||||
numer = self.pdf(b) - self.pdf(a)
|
||||
return ((numer / denom) if denom else a) * (-1 if diff < 0 else +1)
|
||||
|
||||
def w_win(self, diff, draw_margin):
|
||||
x = diff - draw_margin
|
||||
v = self.v_win(diff, draw_margin)
|
||||
w = v * (v + x)
|
||||
if 0 < w < 1:
|
||||
return w
|
||||
raise _floating_point_error(self)
|
||||
|
||||
def w_draw(self, diff, draw_margin):
|
||||
abs_diff = abs(diff)
|
||||
a, b = draw_margin - abs_diff, -draw_margin - abs_diff
|
||||
denom = self.cdf(a) - self.cdf(b)
|
||||
if not denom:
|
||||
raise _floating_point_error(self)
|
||||
v = self.v_draw(abs_diff, draw_margin)
|
||||
return (v ** 2) + (a * self.pdf(a) - b * self.pdf(b)) / denom
|
||||
|
||||
def validate_rating_groups(self, rating_groups):
|
||||
# check group sizes
|
||||
if len(rating_groups) < 2:
|
||||
raise ValueError('Need multiple rating groups')
|
||||
elif not all(rating_groups):
|
||||
raise ValueError('Each group must contain multiple ratings')
|
||||
# check group types
|
||||
group_types = set(map(type, rating_groups))
|
||||
if len(group_types) != 1:
|
||||
raise TypeError('All groups should be same type')
|
||||
elif group_types.pop() is Rating:
|
||||
raise TypeError('Rating cannot be a rating group')
|
||||
# normalize rating_groups
|
||||
if isinstance(rating_groups[0], dict):
|
||||
dict_rating_groups = rating_groups
|
||||
rating_groups = []
|
||||
keys = []
|
||||
for dict_rating_group in dict_rating_groups:
|
||||
rating_group, key_group = [], []
|
||||
for key, rating in iteritems(dict_rating_group):
|
||||
rating_group.append(rating)
|
||||
key_group.append(key)
|
||||
rating_groups.append(tuple(rating_group))
|
||||
keys.append(tuple(key_group))
|
||||
else:
|
||||
rating_groups = list(rating_groups)
|
||||
keys = None
|
||||
return rating_groups, keys
|
||||
|
||||
def validate_weights(self, weights, rating_groups, keys=None):
|
||||
if weights is None:
|
||||
weights = [(1,) * len(g) for g in rating_groups]
|
||||
elif isinstance(weights, dict):
|
||||
weights_dict, weights = weights, []
|
||||
for x, group in enumerate(rating_groups):
|
||||
w = []
|
||||
weights.append(w)
|
||||
for y, rating in enumerate(group):
|
||||
if keys is not None:
|
||||
y = keys[x][y]
|
||||
w.append(weights_dict.get((x, y), 1))
|
||||
return weights
|
||||
|
||||
def factor_graph_builders(self, rating_groups, ranks, weights):
|
||||
flatten_ratings = sum(map(tuple, rating_groups), ())
|
||||
flatten_weights = sum(map(tuple, weights), ())
|
||||
size = len(flatten_ratings)
|
||||
group_size = len(rating_groups)
|
||||
# create variables
|
||||
rating_vars = [Variable() for x in range(size)]
|
||||
perf_vars = [Variable() for x in range(size)]
|
||||
team_perf_vars = [Variable() for x in range(group_size)]
|
||||
team_diff_vars = [Variable() for x in range(group_size - 1)]
|
||||
team_sizes = _team_sizes(rating_groups)
|
||||
# layer builders
|
||||
def build_rating_layer():
|
||||
for rating_var, rating in zip(rating_vars, flatten_ratings):
|
||||
yield PriorFactor(rating_var, rating, self.tau)
|
||||
def build_perf_layer():
|
||||
for rating_var, perf_var in zip(rating_vars, perf_vars):
|
||||
yield LikelihoodFactor(rating_var, perf_var, self.beta ** 2)
|
||||
def build_team_perf_layer():
|
||||
for team, team_perf_var in enumerate(team_perf_vars):
|
||||
if team > 0:
|
||||
start = team_sizes[team - 1]
|
||||
else:
|
||||
start = 0
|
||||
end = team_sizes[team]
|
||||
child_perf_vars = perf_vars[start:end]
|
||||
coeffs = flatten_weights[start:end]
|
||||
yield SumFactor(team_perf_var, child_perf_vars, coeffs)
|
||||
def build_team_diff_layer():
|
||||
for team, team_diff_var in enumerate(team_diff_vars):
|
||||
yield SumFactor(team_diff_var,
|
||||
team_perf_vars[team:team + 2], [+1, -1])
|
||||
def build_trunc_layer():
|
||||
for x, team_diff_var in enumerate(team_diff_vars):
|
||||
if callable(self.draw_probability):
|
||||
# dynamic draw probability
|
||||
team_perf1, team_perf2 = team_perf_vars[x:x + 2]
|
||||
args = (Rating(team_perf1), Rating(team_perf2), self)
|
||||
draw_probability = self.draw_probability(*args)
|
||||
else:
|
||||
# static draw probability
|
||||
draw_probability = self.draw_probability
|
||||
size = sum(map(len, rating_groups[x:x + 2]))
|
||||
draw_margin = calc_draw_margin(draw_probability, size, self)
|
||||
if ranks[x] == ranks[x + 1]: # is a tie?
|
||||
v_func, w_func = self.v_draw, self.w_draw
|
||||
else:
|
||||
v_func, w_func = self.v_win, self.w_win
|
||||
yield TruncateFactor(team_diff_var,
|
||||
v_func, w_func, draw_margin)
|
||||
# build layers
|
||||
return (build_rating_layer, build_perf_layer, build_team_perf_layer,
|
||||
build_team_diff_layer, build_trunc_layer)
|
||||
|
||||
def run_schedule(self, build_rating_layer, build_perf_layer,
|
||||
build_team_perf_layer, build_team_diff_layer,
|
||||
build_trunc_layer, min_delta=DELTA):
|
||||
if min_delta <= 0:
|
||||
raise ValueError('min_delta must be greater than 0')
|
||||
layers = []
|
||||
def build(builders):
|
||||
layers_built = [list(build()) for build in builders]
|
||||
layers.extend(layers_built)
|
||||
return layers_built
|
||||
# gray arrows
|
||||
layers_built = build([build_rating_layer,
|
||||
build_perf_layer,
|
||||
build_team_perf_layer])
|
||||
rating_layer, perf_layer, team_perf_layer = layers_built
|
||||
for f in chain(*layers_built):
|
||||
f.down()
|
||||
# arrow #1, #2, #3
|
||||
team_diff_layer, trunc_layer = build([build_team_diff_layer,
|
||||
build_trunc_layer])
|
||||
team_diff_len = len(team_diff_layer)
|
||||
for x in range(10):
|
||||
if team_diff_len == 1:
|
||||
# only two teams
|
||||
team_diff_layer[0].down()
|
||||
delta = trunc_layer[0].up()
|
||||
else:
|
||||
# multiple teams
|
||||
delta = 0
|
||||
for x in range(team_diff_len - 1):
|
||||
team_diff_layer[x].down()
|
||||
delta = max(delta, trunc_layer[x].up())
|
||||
team_diff_layer[x].up(1) # up to right variable
|
||||
for x in range(team_diff_len - 1, 0, -1):
|
||||
team_diff_layer[x].down()
|
||||
delta = max(delta, trunc_layer[x].up())
|
||||
team_diff_layer[x].up(0) # up to left variable
|
||||
# repeat until to small update
|
||||
if delta <= min_delta:
|
||||
break
|
||||
# up both ends
|
||||
team_diff_layer[0].up(0)
|
||||
team_diff_layer[team_diff_len - 1].up(1)
|
||||
# up the remainder of the black arrows
|
||||
for f in team_perf_layer:
|
||||
for x in range(len(f.vars) - 1):
|
||||
f.up(x)
|
||||
for f in perf_layer:
|
||||
f.up()
|
||||
return layers
|
||||
|
||||
def rate(self, rating_groups, ranks=None, weights=None, min_delta=DELTA):
|
||||
rating_groups, keys = self.validate_rating_groups(rating_groups)
|
||||
weights = self.validate_weights(weights, rating_groups, keys)
|
||||
group_size = len(rating_groups)
|
||||
if ranks is None:
|
||||
ranks = range(group_size)
|
||||
elif len(ranks) != group_size:
|
||||
raise ValueError('Wrong ranks')
|
||||
# sort rating groups by rank
|
||||
by_rank = lambda x: x[1][1]
|
||||
sorting = sorted(enumerate(zip(rating_groups, ranks, weights)),
|
||||
key=by_rank)
|
||||
sorted_rating_groups, sorted_ranks, sorted_weights = [], [], []
|
||||
for x, (g, r, w) in sorting:
|
||||
sorted_rating_groups.append(g)
|
||||
sorted_ranks.append(r)
|
||||
# make weights to be greater than 0
|
||||
sorted_weights.append(max(min_delta, w_) for w_ in w)
|
||||
# build factor graph
|
||||
args = (sorted_rating_groups, sorted_ranks, sorted_weights)
|
||||
builders = self.factor_graph_builders(*args)
|
||||
args = builders + (min_delta,)
|
||||
layers = self.run_schedule(*args)
|
||||
# make result
|
||||
rating_layer, team_sizes = layers[0], _team_sizes(sorted_rating_groups)
|
||||
transformed_groups = []
|
||||
for start, end in zip([0] + team_sizes[:-1], team_sizes):
|
||||
group = []
|
||||
for f in rating_layer[start:end]:
|
||||
group.append(Rating(float(f.var.mu), float(f.var.sigma)))
|
||||
transformed_groups.append(tuple(group))
|
||||
by_hint = lambda x: x[0]
|
||||
unsorting = sorted(zip((x for x, __ in sorting), transformed_groups),
|
||||
key=by_hint)
|
||||
if keys is None:
|
||||
return [g for x, g in unsorting]
|
||||
# restore the structure with input dictionary keys
|
||||
return [dict(zip(keys[x], g)) for x, g in unsorting]
|
||||
|
||||
def quality(self, rating_groups, weights=None):
|
||||
rating_groups, keys = self.validate_rating_groups(rating_groups)
|
||||
weights = self.validate_weights(weights, rating_groups, keys)
|
||||
flatten_ratings = sum(map(tuple, rating_groups), ())
|
||||
flatten_weights = sum(map(tuple, weights), ())
|
||||
length = len(flatten_ratings)
|
||||
# a vector of all of the skill means
|
||||
mean_matrix = Matrix([[r.mu] for r in flatten_ratings])
|
||||
# a matrix whose diagonal values are the variances (sigma ** 2) of each
|
||||
# of the players.
|
||||
def variance_matrix(height, width):
|
||||
variances = (r.sigma ** 2 for r in flatten_ratings)
|
||||
for x, variance in enumerate(variances):
|
||||
yield (x, x), variance
|
||||
variance_matrix = Matrix(variance_matrix, length, length)
|
||||
# the player-team assignment and comparison matrix
|
||||
def rotated_a_matrix(set_height, set_width):
|
||||
t = 0
|
||||
for r, (cur, _next) in enumerate(zip(rating_groups[:-1],
|
||||
rating_groups[1:])):
|
||||
for x in range(t, t + len(cur)):
|
||||
yield (r, x), flatten_weights[x]
|
||||
t += 1
|
||||
x += 1
|
||||
for x in range(x, x + len(_next)):
|
||||
yield (r, x), -flatten_weights[x]
|
||||
set_height(r + 1)
|
||||
set_width(x + 1)
|
||||
rotated_a_matrix = Matrix(rotated_a_matrix)
|
||||
a_matrix = rotated_a_matrix.transpose()
|
||||
# match quality further derivation
|
||||
_ata = (self.beta ** 2) * rotated_a_matrix * a_matrix
|
||||
_atsa = rotated_a_matrix * variance_matrix * a_matrix
|
||||
start = mean_matrix.transpose() * a_matrix
|
||||
middle = _ata + _atsa
|
||||
end = rotated_a_matrix * mean_matrix
|
||||
# make result
|
||||
e_arg = (-0.5 * start * middle.inverse() * end).determinant()
|
||||
s_arg = _ata.determinant() / middle.determinant()
|
||||
return math.exp(e_arg) * math.sqrt(s_arg)
|
||||
|
||||
def expose(self, rating):
|
||||
k = self.mu / self.sigma
|
||||
return rating.mu - k * rating.sigma
|
||||
|
||||
def make_as_global(self):
|
||||
return setup(env=self)
|
||||
|
||||
def __repr__(self):
|
||||
c = type(self)
|
||||
if callable(self.draw_probability):
|
||||
f = self.draw_probability
|
||||
draw_probability = '.'.join([f.__module__, f.__name__])
|
||||
else:
|
||||
draw_probability = '%.1f%%' % (self.draw_probability * 100)
|
||||
if self.backend is None:
|
||||
backend = ''
|
||||
elif isinstance(self.backend, tuple):
|
||||
backend = ', backend=...'
|
||||
else:
|
||||
backend = ', backend=%r' % self.backend
|
||||
args = ('.'.join([c.__module__, c.__name__]), self.mu, self.sigma,
|
||||
self.beta, self.tau, draw_probability, backend)
|
||||
return ('%s(mu=%.3f, sigma=%.3f, beta=%.3f, tau=%.3f, '
|
||||
'draw_probability=%s%s)' % args)
|
||||
|
||||
|
||||
def rate_1vs1(rating1, rating2, drawn=False, min_delta=DELTA, env=None):
|
||||
if env is None:
|
||||
env = global_env()
|
||||
ranks = [0, 0 if drawn else 1]
|
||||
teams = env.rate([(rating1,), (rating2,)], ranks, min_delta=min_delta)
|
||||
return teams[0][0], teams[1][0]
|
||||
|
||||
|
||||
def quality_1vs1(rating1, rating2, env=None):
|
||||
if env is None:
|
||||
env = global_env()
|
||||
return env.quality([(rating1,), (rating2,)])
|
||||
|
||||
|
||||
def global_env():
|
||||
try:
|
||||
global_env.__trueskill__
|
||||
except AttributeError:
|
||||
# setup the default environment
|
||||
setup()
|
||||
return global_env.__trueskill__
|
||||
|
||||
|
||||
def setup(mu=MU, sigma=SIGMA, beta=BETA, tau=TAU,
|
||||
draw_probability=DRAW_PROBABILITY, backend=None, env=None):
|
||||
if env is None:
|
||||
env = TrueSkill(mu, sigma, beta, tau, draw_probability, backend)
|
||||
global_env.__trueskill__ = env
|
||||
return env
|
||||
|
||||
|
||||
def rate(rating_groups, ranks=None, weights=None, min_delta=DELTA):
|
||||
return global_env().rate(rating_groups, ranks, weights, min_delta)
|
||||
|
||||
|
||||
def quality(rating_groups, weights=None):
|
||||
return global_env().quality(rating_groups, weights)
|
||||
|
||||
|
||||
def expose(rating):
|
||||
return global_env().expose(rating)
|
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.10-py3-none-any.whl
vendored
Normal file
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.10-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.10.tar.gz
vendored
Normal file
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.10.tar.gz
vendored
Normal file
Binary file not shown.
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.11-py3-none-any.whl
vendored
Normal file
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.11-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.11.tar.gz
vendored
Normal file
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.11.tar.gz
vendored
Normal file
Binary file not shown.
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.12-py3-none-any.whl
vendored
Normal file
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.12-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.12.tar.gz
vendored
Normal file
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.12.tar.gz
vendored
Normal file
Binary file not shown.
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.8-py3-none-any.whl
vendored
Normal file
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.8-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.8.tar.gz
vendored
Normal file
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.8.tar.gz
vendored
Normal file
Binary file not shown.
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.9-py3-none-any.whl
vendored
Normal file
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.9-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.9.tar.gz
vendored
Normal file
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.9.tar.gz
vendored
Normal file
Binary file not shown.
5
analysis-master/analysis-amd64/docker/Dockerfile
Normal file
5
analysis-master/analysis-amd64/docker/Dockerfile
Normal file
@@ -0,0 +1,5 @@
|
||||
FROM python
|
||||
WORKDIR ~/
|
||||
COPY ./ ./
|
||||
RUN pip install -r requirements.txt
|
||||
CMD ["bash"]
|
3
analysis-master/analysis-amd64/docker/start-docker.sh
Executable file
3
analysis-master/analysis-amd64/docker/start-docker.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
cd ..
|
||||
docker build -t tra-analysis-amd64-dev -f docker/Dockerfile .
|
||||
docker run -it tra-analysis-amd64-dev
|
6
analysis-master/analysis-amd64/requirements.txt
Normal file
6
analysis-master/analysis-amd64/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
numba
|
||||
numpy
|
||||
scipy
|
||||
scikit-learn
|
||||
six
|
||||
matplotlib
|
@@ -1,8 +1,14 @@
|
||||
import setuptools
|
||||
|
||||
requirements = []
|
||||
|
||||
with open("requirements.txt", 'r') as file:
|
||||
for line in file:
|
||||
requirements.append(line)
|
||||
|
||||
setuptools.setup(
|
||||
name="analysis", # Replace with your own username
|
||||
version="1.0.0.006",
|
||||
name="analysis",
|
||||
version="1.0.0.012",
|
||||
author="The Titan Scouting Team",
|
||||
author_email="titanscout2022@gmail.com",
|
||||
description="analysis package developed by Titan Scouting for The Red Alliance",
|
||||
@@ -10,6 +16,7 @@ setuptools.setup(
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://github.com/titanscout2022/tr2022-strategy",
|
||||
packages=setuptools.find_packages(),
|
||||
install_requires=requirements,
|
||||
license = "GNU General Public License v3.0",
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
3
analysis-master/analysis-arm64/docker/start-docker.sh
Executable file
3
analysis-master/analysis-arm64/docker/start-docker.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
cd ..
|
||||
docker build -t tra-analysis-amd64-dev -f docker/Dockerfile .
|
||||
docker run -it tra-analysis-amd64-dev
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1 +0,0 @@
|
||||
python3 setup.py sdist bdist_wheel
|
Binary file not shown.
BIN
analysis-master/dist/analysis-1.0.0.6.tar.gz
vendored
BIN
analysis-master/dist/analysis-1.0.0.6.tar.gz
vendored
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,13 +0,0 @@
|
||||
2020ilch
|
||||
balls-blocked,basic_stats,historical_analysis,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
|
||||
balls-collected,basic_stats,historical_analysis,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
|
||||
balls-lower,basic_stats,historical_analysis,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
|
||||
balls-started,basic_stats,historical_analyss,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
|
||||
balls-upper,basic_stats,historical_analysis,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
|
||||
wheel-mechanism
|
||||
low-balls
|
||||
high-balls
|
||||
wheel-success
|
||||
strategic-focus
|
||||
climb-mechanism
|
||||
attitude
|
|
1
data analysis/config/competition.config
Normal file
1
data analysis/config/competition.config
Normal file
@@ -0,0 +1 @@
|
||||
2020ilch
|
14
data analysis/config/stats.config
Normal file
14
data analysis/config/stats.config
Normal file
@@ -0,0 +1,14 @@
|
||||
balls-blocked,basic_stats,historical_analysis,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
|
||||
balls-collected,basic_stats,historical_analysis,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
|
||||
balls-lower-teleop,basic_stats,historical_analysis,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
|
||||
balls-lower-auto,basic_stats,historical_analysis,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
|
||||
balls-started,basic_stats,historical_analyss,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
|
||||
balls-upper-teleop,basic_stats,historical_analysis,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
|
||||
balls-upper-auto,basic_stats,historical_analysis,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
|
||||
wheel-mechanism
|
||||
low-balls
|
||||
high-balls
|
||||
wheel-success
|
||||
strategic-focus
|
||||
climb-mechanism
|
||||
attitude
|
59
data analysis/get_team_rankings.py
Normal file
59
data analysis/get_team_rankings.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import data as d
|
||||
from analysis import analysis as an
|
||||
import pymongo
|
||||
import operator
|
||||
|
||||
def load_config(file):
|
||||
config_vector = {}
|
||||
file = an.load_csv(file)
|
||||
for line in file[1:]:
|
||||
config_vector[line[0]] = line[1:]
|
||||
|
||||
return (file[0][0], config_vector)
|
||||
|
||||
def get_metrics_processed_formatted(apikey, competition):
|
||||
client = pymongo.MongoClient(apikey)
|
||||
db = client.data_scouting
|
||||
mdata = db.teamlist
|
||||
x=mdata.find_one({"competition":competition})
|
||||
out = {}
|
||||
for i in x:
|
||||
try:
|
||||
out[int(i)] = d.get_team_metrics_data(apikey, competition, int(i))
|
||||
except:
|
||||
pass
|
||||
return out
|
||||
|
||||
def main():
|
||||
|
||||
apikey = an.load_csv("keys.txt")[0][0]
|
||||
tbakey = an.load_csv("keys.txt")[1][0]
|
||||
|
||||
competition, config = load_config("config.csv")
|
||||
|
||||
metrics = get_metrics_processed_formatted(apikey, competition)
|
||||
|
||||
elo = {}
|
||||
gl2 = {}
|
||||
|
||||
for team in metrics:
|
||||
|
||||
elo[team] = metrics[team]["metrics"]["elo"]["score"]
|
||||
gl2[team] = metrics[team]["metrics"]["gl2"]["score"]
|
||||
|
||||
elo = {k: v for k, v in sorted(elo.items(), key=lambda item: item[1])}
|
||||
gl2 = {k: v for k, v in sorted(gl2.items(), key=lambda item: item[1])}
|
||||
|
||||
for team in elo:
|
||||
|
||||
print("teams sorted by elo:")
|
||||
print("" + str(team) + " | " + str(elo[team]))
|
||||
|
||||
print("*"*25)
|
||||
|
||||
for team in gl2:
|
||||
|
||||
print("teams sorted by glicko2:")
|
||||
print("" + str(team) + " | " + str(gl2[team]))
|
||||
|
||||
main()
|
4
data analysis/requirements.txt
Normal file
4
data analysis/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
requests
|
||||
pymongo
|
||||
pandas
|
||||
dnspython
|
@@ -3,11 +3,28 @@
|
||||
# Notes:
|
||||
# setup:
|
||||
|
||||
__version__ = "0.0.3.000"
|
||||
__version__ = "0.0.5.002"
|
||||
|
||||
# changelog should be viewed using print(analysis.__changelog__)
|
||||
__changelog__ = """changelog:
|
||||
0.0.3.00:
|
||||
0.0.5.002:
|
||||
- made changes due to refactoring of analysis
|
||||
0.0.5.001:
|
||||
- text fixes
|
||||
- removed matplotlib requirement
|
||||
0.0.5.000:
|
||||
- improved user interface
|
||||
0.0.4.002:
|
||||
- removed unessasary code
|
||||
0.0.4.001:
|
||||
- fixed bug where X range for regression was determined before sanitization
|
||||
- better sanitized data
|
||||
0.0.4.000:
|
||||
- fixed spelling issue in __changelog__
|
||||
- addressed nan bug in regression
|
||||
- fixed errors on line 335 with metrics calling incorrect key "glicko2"
|
||||
- fixed errors in metrics computing
|
||||
0.0.3.000:
|
||||
- added analysis to pit data
|
||||
0.0.2.001:
|
||||
- minor stability patches
|
||||
@@ -71,7 +88,9 @@ __all__ = [
|
||||
|
||||
from analysis import analysis as an
|
||||
import data as d
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from os import system, name
|
||||
from pathlib import Path
|
||||
import time
|
||||
import warnings
|
||||
|
||||
@@ -80,16 +99,16 @@ def main():
|
||||
while(True):
|
||||
|
||||
current_time = time.time()
|
||||
print("time: " + str(current_time))
|
||||
print("[OK] time: " + str(current_time))
|
||||
|
||||
print(" loading config")
|
||||
competition, config = load_config("config.csv")
|
||||
print(" config loaded")
|
||||
start = time.time()
|
||||
config = load_config(Path("config/stats.config"))
|
||||
competition = an.load_csv(Path("config/competition.config"))[0][0]
|
||||
print("[OK] configs loaded")
|
||||
|
||||
print(" loading database keys")
|
||||
apikey = an.load_csv("keys.txt")[0][0]
|
||||
tbakey = an.load_csv("keys.txt")[1][0]
|
||||
print(" loaded keys")
|
||||
apikey = an.load_csv(Path("config/keys.config"))[0][0]
|
||||
tbakey = an.load_csv(Path("config/keys.config"))[1][0]
|
||||
print("[OK] loaded keys")
|
||||
|
||||
previous_time = d.get_analysis_flags(apikey, "latest_update")
|
||||
|
||||
@@ -102,38 +121,55 @@ def main():
|
||||
|
||||
previous_time = previous_time["latest_update"]
|
||||
|
||||
print(" analysis backtimed to: " + str(previous_time))
|
||||
print("[OK] analysis backtimed to: " + str(previous_time))
|
||||
|
||||
print(" loading data")
|
||||
print("[OK] loading data")
|
||||
start = time.time()
|
||||
data = d.get_match_data_formatted(apikey, competition)
|
||||
pit_data = d.pit = d.get_pit_data_formatted(apikey, competition)
|
||||
print(" loaded data")
|
||||
print("[OK] loaded data in " + str(time.time() - start) + " seconds")
|
||||
|
||||
print(" running tests")
|
||||
print("[OK] running tests")
|
||||
start = time.time()
|
||||
results = simpleloop(data, config)
|
||||
print(" finished tests")
|
||||
print("[OK] finished tests in " + str(time.time() - start) + " seconds")
|
||||
|
||||
print(" running metrics")
|
||||
metrics = metricsloop(tbakey, apikey, competition, previous_time)
|
||||
print(" finished metrics")
|
||||
print("[OK] running metrics")
|
||||
start = time.time()
|
||||
metricsloop(tbakey, apikey, competition, previous_time)
|
||||
print("[OK] finished metrics in " + str(time.time() - start) + " seconds")
|
||||
|
||||
print(" running pit analysis")
|
||||
print("[OK] running pit analysis")
|
||||
start = time.time()
|
||||
pit = pitloop(pit_data, config)
|
||||
print(" finished pit analysis")
|
||||
print("[OK] finished pit analysis in " + str(time.time() - start) + " seconds")
|
||||
|
||||
d.set_analysis_flags(apikey, "latest_update", {"latest_update":current_time})
|
||||
|
||||
print(" pushing to database")
|
||||
push_to_database(apikey, competition, results, metrics, pit)
|
||||
print(" pushed to database")
|
||||
print("[OK] pushing to database")
|
||||
start = time.time()
|
||||
push_to_database(apikey, competition, results, pit)
|
||||
print("[OK] pushed to database in " + str(time.time() - start) + " seconds")
|
||||
|
||||
clear()
|
||||
|
||||
def clear():
|
||||
|
||||
# for windows
|
||||
if name == 'nt':
|
||||
_ = system('cls')
|
||||
|
||||
# for mac and linux(here, os.name is 'posix')
|
||||
else:
|
||||
_ = system('clear')
|
||||
|
||||
def load_config(file):
|
||||
config_vector = {}
|
||||
file = an.load_csv(file)
|
||||
for line in file[1:]:
|
||||
for line in file:
|
||||
config_vector[line[0]] = line[1:]
|
||||
|
||||
return (file[0][0], config_vector)
|
||||
return config_vector
|
||||
|
||||
def simpleloop(data, tests): # expects 3D array with [Team][Variable][Match]
|
||||
|
||||
@@ -155,37 +191,37 @@ def simpleloop(data, tests): # expects 3D array with [Team][Variable][Match]
|
||||
|
||||
def simplestats(data, test):
|
||||
|
||||
data = np.array(data)
|
||||
data = data[np.isfinite(data)]
|
||||
ranges = list(range(len(data)))
|
||||
|
||||
if(test == "basic_stats"):
|
||||
return an.basic_stats(data)
|
||||
|
||||
if(test == "historical_analysis"):
|
||||
return an.histo_analysis([list(range(len(data))), data])
|
||||
return an.histo_analysis([ranges, data])
|
||||
|
||||
if(test == "regression_linear"):
|
||||
return an.regression(list(range(len(data))), data, ['lin'])
|
||||
return an.regression(ranges, data, ['lin'])
|
||||
|
||||
if(test == "regression_logarithmic"):
|
||||
return an.regression(list(range(len(data))), data, ['log'])
|
||||
return an.regression(ranges, data, ['log'])
|
||||
|
||||
if(test == "regression_exponential"):
|
||||
return an.regression(list(range(len(data))), data, ['exp'])
|
||||
return an.regression(ranges, data, ['exp'])
|
||||
|
||||
if(test == "regression_polynomial"):
|
||||
return an.regression(list(range(len(data))), data, ['ply'])
|
||||
return an.regression(ranges, data, ['ply'])
|
||||
|
||||
if(test == "regression_sigmoidal"):
|
||||
return an.regression(list(range(len(data))), data, ['sig'])
|
||||
return an.regression(ranges, data, ['sig'])
|
||||
|
||||
def push_to_database(apikey, competition, results, metrics, pit):
|
||||
def push_to_database(apikey, competition, results, pit):
|
||||
|
||||
for team in results:
|
||||
|
||||
d.push_team_tests_data(apikey, competition, team, results[team])
|
||||
|
||||
for team in metrics:
|
||||
|
||||
d.push_team_metrics_data(apikey, competition, team, metrics[team])
|
||||
|
||||
for variable in pit:
|
||||
|
||||
d.push_team_pit_data(apikey, competition, variable, pit[variable])
|
||||
@@ -197,8 +233,6 @@ def metricsloop(tbakey, apikey, competition, timestamp): # listener based metric
|
||||
|
||||
matches = d.pull_new_tba_matches(tbakey, competition, timestamp)
|
||||
|
||||
return_vector = {}
|
||||
|
||||
red = {}
|
||||
blu = {}
|
||||
|
||||
@@ -206,7 +240,7 @@ def metricsloop(tbakey, apikey, competition, timestamp): # listener based metric
|
||||
|
||||
red = load_metrics(apikey, competition, match, "red")
|
||||
blu = load_metrics(apikey, competition, match, "blue")
|
||||
|
||||
|
||||
elo_red_total = 0
|
||||
elo_blu_total = 0
|
||||
|
||||
@@ -254,11 +288,11 @@ def metricsloop(tbakey, apikey, competition, timestamp): # listener based metric
|
||||
|
||||
observations = {"red": 0.5, "blu": 0.5}
|
||||
|
||||
red_elo_delta = an.elo(red_elo["score"], blu_elo["score"], observations["red"], elo_N, elo_K) - red_elo["score"]
|
||||
blu_elo_delta = an.elo(blu_elo["score"], red_elo["score"], observations["blu"], elo_N, elo_K) - blu_elo["score"]
|
||||
red_elo_delta = an.Metrics.elo(red_elo["score"], blu_elo["score"], observations["red"], elo_N, elo_K) - red_elo["score"]
|
||||
blu_elo_delta = an.Metrics.elo(blu_elo["score"], red_elo["score"], observations["blu"], elo_N, elo_K) - blu_elo["score"]
|
||||
|
||||
new_red_gl2_score, new_red_gl2_rd, new_red_gl2_vol = an.glicko2(red_gl2["score"], red_gl2["rd"], red_gl2["vol"], [blu_gl2["score"]], [blu_gl2["rd"]], [observations["red"], observations["blu"]])
|
||||
new_blu_gl2_score, new_blu_gl2_rd, new_blu_gl2_vol = an.glicko2(blu_gl2["score"], blu_gl2["rd"], blu_gl2["vol"], [red_gl2["score"]], [red_gl2["rd"]], [observations["blu"], observations["red"]])
|
||||
new_red_gl2_score, new_red_gl2_rd, new_red_gl2_vol = an.Metrics.glicko2(red_gl2["score"], red_gl2["rd"], red_gl2["vol"], [blu_gl2["score"]], [blu_gl2["rd"]], [observations["red"], observations["blu"]])
|
||||
new_blu_gl2_score, new_blu_gl2_rd, new_blu_gl2_vol = an.Metrics.glicko2(blu_gl2["score"], blu_gl2["rd"], blu_gl2["vol"], [red_gl2["score"]], [red_gl2["rd"]], [observations["blu"], observations["red"]])
|
||||
|
||||
red_gl2_delta = {"score": new_red_gl2_score - red_gl2["score"], "rd": new_red_gl2_rd - red_gl2["rd"], "vol": new_red_gl2_vol - red_gl2["vol"]}
|
||||
blu_gl2_delta = {"score": new_blu_gl2_score - blu_gl2["score"], "rd": new_blu_gl2_rd - blu_gl2["rd"], "vol": new_blu_gl2_vol - blu_gl2["vol"]}
|
||||
@@ -279,36 +313,13 @@ def metricsloop(tbakey, apikey, competition, timestamp): # listener based metric
|
||||
blu[team]["gl2"]["rd"] = blu[team]["gl2"]["rd"] + blu_gl2_delta["rd"]
|
||||
blu[team]["gl2"]["vol"] = blu[team]["gl2"]["vol"] + blu_gl2_delta["vol"]
|
||||
|
||||
""" not functional for now
|
||||
red_trueskill = []
|
||||
blu_trueskill = []
|
||||
temp_vector = {}
|
||||
temp_vector.update(red)
|
||||
temp_vector.update(blu)
|
||||
|
||||
red_ts_team_lookup = []
|
||||
blu_ts_team_lookup = []
|
||||
for team in temp_vector:
|
||||
|
||||
for team in red:
|
||||
|
||||
red_trueskill.append((red[team]["ts"]["mu"], red[team]["ts"]["sigma"]))
|
||||
red_ts_team_lookup.append(team)
|
||||
|
||||
for team in blu:
|
||||
|
||||
blu_trueskill.append((blu[team]["ts"]["mu"], blu[team]["ts"]["sigma"]))
|
||||
blu_ts_team_lookup.append(team)
|
||||
|
||||
print(red_trueskill)
|
||||
print(blu_trueskill)
|
||||
|
||||
results = an.trueskill([red_trueskill, blu_trueskill], [observations["red"], observations["blu"]])
|
||||
|
||||
print(results)
|
||||
|
||||
"""
|
||||
|
||||
return_vector.update(red)
|
||||
return_vector.update(blu)
|
||||
|
||||
return return_vector
|
||||
d.push_team_metrics_data(apikey, competition, team, temp_vector[team])
|
||||
|
||||
def load_metrics(apikey, competition, match, group_name):
|
||||
|
||||
@@ -324,16 +335,17 @@ def load_metrics(apikey, competition, match, group_name):
|
||||
gl2 = {"score": 1500, "rd": 250, "vol": 0.06}
|
||||
ts = {"mu": 25, "sigma": 25/3}
|
||||
|
||||
d.push_team_metrics_data(apikey, competition, team, {"elo":elo, "gliko2":gl2,"trueskill":ts})
|
||||
#d.push_team_metrics_data(apikey, competition, team, {"elo":elo, "gl2":gl2,"trueskill":ts})
|
||||
|
||||
group[team] = {"elo": elo, "gl2": gl2, "ts": ts}
|
||||
|
||||
else:
|
||||
|
||||
metrics = db_data["metrics"]
|
||||
|
||||
elo = metrics["elo"]
|
||||
gl2 = metrics["gliko2"]
|
||||
ts = metrics["trueskill"]
|
||||
gl2 = metrics["gl2"]
|
||||
ts = metrics["ts"]
|
||||
|
||||
group[team] = {"elo": elo, "gl2": gl2, "ts": ts}
|
||||
|
||||
|
@@ -34,7 +34,7 @@ import numpy as np
|
||||
|
||||
|
||||
# %%
|
||||
fig, ax = plt.subplots(1, len(pit), sharey=True, figsize=(20,10))
|
||||
fig, ax = plt.subplots(1, len(pit), sharey=True, figsize=(80,15))
|
||||
|
||||
i = 0
|
||||
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -1,944 +0,0 @@
|
||||
# Titan Robotics Team 2022: Data Analysis Module
|
||||
# Written by Arthur Lu & Jacob Levine
|
||||
# Notes:
|
||||
# this should be imported as a python module using 'import analysis'
|
||||
# this should be included in the local directory or environment variable
|
||||
# this module has not been optimized for multhreaded computing
|
||||
# number of easter eggs: 2
|
||||
# setup:
|
||||
|
||||
__version__ = "1.0.9.000"
|
||||
|
||||
# changelog should be viewed using print(analysis.__changelog__)
|
||||
__changelog__ = """changelog:
|
||||
1.0.9.000:
|
||||
- refactored
|
||||
- numpyed everything
|
||||
- removed stats in favor of numpy functions
|
||||
1.0.8.005:
|
||||
- minor fixes
|
||||
1.0.8.004:
|
||||
- removed a few unused dependencies
|
||||
1.0.8.003:
|
||||
- added p_value function
|
||||
1.0.8.002:
|
||||
- updated __all__ correctly to contain changes made in v 1.0.8.000 and v 1.0.8.001
|
||||
1.0.8.001:
|
||||
- refactors
|
||||
- bugfixes
|
||||
1.0.8.000:
|
||||
- depreciated histo_analysis_old
|
||||
- depreciated debug
|
||||
- altered basic_analysis to take array data instead of filepath
|
||||
- refactor
|
||||
- optimization
|
||||
1.0.7.002:
|
||||
- bug fixes
|
||||
1.0.7.001:
|
||||
- bug fixes
|
||||
1.0.7.000:
|
||||
- added tanh_regression (logistical regression)
|
||||
- bug fixes
|
||||
1.0.6.005:
|
||||
- added z_normalize function to normalize dataset
|
||||
- bug fixes
|
||||
1.0.6.004:
|
||||
- bug fixes
|
||||
1.0.6.003:
|
||||
- bug fixes
|
||||
1.0.6.002:
|
||||
- bug fixes
|
||||
1.0.6.001:
|
||||
- corrected __all__ to contain all of the functions
|
||||
1.0.6.000:
|
||||
- added calc_overfit, which calculates two measures of overfit, error and performance
|
||||
- added calculating overfit to optimize_regression
|
||||
1.0.5.000:
|
||||
- added optimize_regression function, which is a sample function to find the optimal regressions
|
||||
- optimize_regression function filters out some overfit funtions (functions with r^2 = 1)
|
||||
- planned addition: overfit detection in the optimize_regression function
|
||||
1.0.4.002:
|
||||
- added __changelog__
|
||||
- updated debug function with log and exponential regressions
|
||||
1.0.4.001:
|
||||
- added log regressions
|
||||
- added exponential regressions
|
||||
- added log_regression and exp_regression to __all__
|
||||
1.0.3.008:
|
||||
- added debug function to further consolidate functions
|
||||
1.0.3.007:
|
||||
- added builtin benchmark function
|
||||
- added builtin random (linear) data generation function
|
||||
- added device initialization (_init_device)
|
||||
1.0.3.006:
|
||||
- reorganized the imports list to be in alphabetical order
|
||||
- added search and regurgitate functions to c_entities, nc_entities, obstacles, objectives
|
||||
1.0.3.005:
|
||||
- major bug fixes
|
||||
- updated historical analysis
|
||||
- depreciated old historical analysis
|
||||
1.0.3.004:
|
||||
- added __version__, __author__, __all__
|
||||
- added polynomial regression
|
||||
- added root mean squared function
|
||||
- added r squared function
|
||||
1.0.3.003:
|
||||
- bug fixes
|
||||
- added c_entities
|
||||
1.0.3.002:
|
||||
- bug fixes
|
||||
- added nc_entities, obstacles, objectives
|
||||
- consolidated statistics.py to analysis.py
|
||||
1.0.3.001:
|
||||
- compiled 1d, column, and row basic stats into basic stats function
|
||||
1.0.3.000:
|
||||
- added historical analysis function
|
||||
1.0.2.xxx:
|
||||
- added z score test
|
||||
1.0.1.xxx:
|
||||
- major bug fixes
|
||||
1.0.0.xxx:
|
||||
- added loading csv
|
||||
- added 1d, column, row basic stats
|
||||
"""
|
||||
|
||||
__author__ = (
|
||||
"Arthur Lu <arthurlu@ttic.edu>, "
|
||||
"Jacob Levine <jlevine@ttic.edu>,"
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'_init_device',
|
||||
'c_entities',
|
||||
'nc_entities',
|
||||
'obstacles',
|
||||
'objectives',
|
||||
'load_csv',
|
||||
'basic_stats',
|
||||
'z_score',
|
||||
'z_normalize',
|
||||
'stdev_z_split',
|
||||
'histo_analysis',
|
||||
'poly_regression',
|
||||
'log_regression',
|
||||
'exp_regression',
|
||||
'r_squared',
|
||||
'rms',
|
||||
'calc_overfit',
|
||||
'strip_data',
|
||||
'optimize_regression',
|
||||
'select_best_regression',
|
||||
'basic_analysis',
|
||||
# all statistics functions left out due to integration in other functions
|
||||
]
|
||||
|
||||
# now back to your regularly scheduled programming:
|
||||
|
||||
# imports (now in alphabetical order! v 1.0.3.006):
|
||||
|
||||
from bisect import bisect_left, bisect_right
|
||||
import collections
|
||||
import csv
|
||||
from decimal import Decimal
|
||||
import functools
|
||||
from fractions import Fraction
|
||||
from itertools import groupby
|
||||
import math
|
||||
import matplotlib
|
||||
import numbers
|
||||
import numpy as np
|
||||
import pandas
|
||||
import random
|
||||
import scipy
|
||||
from scipy.optimize import curve_fit
|
||||
from scipy import stats
|
||||
from sklearn import *
|
||||
# import statistics <-- statistics.py functions have been integrated into analysis.py as of v 1.0.3.002
|
||||
import time
|
||||
import torch
|
||||
|
||||
class error(ValueError):
|
||||
pass
|
||||
|
||||
def _init_device(setting, arg): # initiates computation device for ANNs
|
||||
if setting == "cuda":
|
||||
try:
|
||||
return torch.device(setting + ":" + str(arg) if torch.cuda.is_available() else "cpu")
|
||||
except:
|
||||
raise error("could not assign cuda or cpu")
|
||||
elif setting == "cpu":
|
||||
try:
|
||||
return torch.device("cpu")
|
||||
except:
|
||||
raise error("could not assign cpu")
|
||||
else:
|
||||
raise error("specified device does not exist")
|
||||
|
||||
def load_csv(filepath):
|
||||
with open(filepath, newline='') as csvfile:
|
||||
file_array = np.array(list(csv.reader(csvfile)))
|
||||
csvfile.close()
|
||||
return file_array
|
||||
|
||||
# data=array, mode = ['1d':1d_basic_stats, 'column':c_basic_stats, 'row':r_basic_stats], arg for mode 1 or mode 2 for column or row
|
||||
def basic_stats(data, method, arg):
|
||||
|
||||
if method == 'debug':
|
||||
return "basic_stats requires 3 args: data, mode, arg; where data is data to be analyzed, mode is an int from 0 - 2 depending on type of analysis (by column or by row) and is only applicable to 2d arrays (for 1d arrays use mode 1), and arg is row/column number for mode 1 or mode 2; function returns: [mean, median, mode, stdev, variance]"
|
||||
|
||||
if method == "1d" or method == 0:
|
||||
|
||||
data_t = np.array(data).astype(float)
|
||||
|
||||
_mean = mean(data_t)
|
||||
_median = median(data_t)
|
||||
try:
|
||||
_mode = mode(data_t)
|
||||
except:
|
||||
_mode = None
|
||||
try:
|
||||
_stdev = stdev(data_t)
|
||||
except:
|
||||
_stdev = None
|
||||
try:
|
||||
_variance = variance(data_t)
|
||||
except:
|
||||
_variance = None
|
||||
|
||||
return _mean, _median, _mode, _stdev, _variance
|
||||
"""
|
||||
elif method == "column" or method == 1:
|
||||
|
||||
c_data = []
|
||||
c_data_sorted = []
|
||||
|
||||
for i in data:
|
||||
try:
|
||||
c_data.append(float(i[arg]))
|
||||
except:
|
||||
pass
|
||||
|
||||
_mean = mean(c_data)
|
||||
_median = median(c_data)
|
||||
try:
|
||||
_mode = mode(c_data)
|
||||
except:
|
||||
_mode = None
|
||||
try:
|
||||
_stdev = stdev(c_data)
|
||||
except:
|
||||
_stdev = None
|
||||
try:
|
||||
_variance = variance(c_data)
|
||||
except:
|
||||
_variance = None
|
||||
|
||||
return _mean, _median, _mode, _stdev, _variance
|
||||
|
||||
elif method == "row" or method == 2:
|
||||
|
||||
r_data = []
|
||||
|
||||
for i in range(len(data[arg])):
|
||||
r_data.append(float(data[arg][i]))
|
||||
|
||||
_mean = mean(r_data)
|
||||
_median = median(r_data)
|
||||
try:
|
||||
_mode = mode(r_data)
|
||||
except:
|
||||
_mode = None
|
||||
try:
|
||||
_stdev = stdev(r_data)
|
||||
except:
|
||||
_stdev = None
|
||||
try:
|
||||
_variance = variance(r_data)
|
||||
except:
|
||||
_variance = None
|
||||
|
||||
return _mean, _median, _mode, _stdev, _variance
|
||||
|
||||
else:
|
||||
raise error("method error")
|
||||
"""
|
||||
|
||||
|
||||
# returns z score with inputs of point, mean and standard deviation of spread
|
||||
def z_score(point, mean, stdev):
|
||||
score = (point - mean) / stdev
|
||||
return score
|
||||
|
||||
# mode is either 'x' or 'y' or 'both' depending on the variable(s) to be normalized
|
||||
def z_normalize(x, y, mode):
|
||||
|
||||
x_norm = np.array().astype(float)
|
||||
y_norm = np.array().astype(float)
|
||||
|
||||
mean = 0
|
||||
stdev = 0
|
||||
|
||||
if mode == 'x':
|
||||
_mean, _median, _mode, _stdev, _variance = basic_stats(x, "1d", 0)
|
||||
|
||||
for i in range(0, len(x), 1):
|
||||
x_norm.append(z_score(x[i], _mean, _stdev))
|
||||
|
||||
return x_norm, y
|
||||
|
||||
if mode == 'y':
|
||||
_mean, _median, _mode, _stdev, _variance = basic_stats(y, "1d", 0)
|
||||
|
||||
for i in range(0, len(y), 1):
|
||||
y_norm.append(z_score(y[i], _mean, _stdev))
|
||||
|
||||
return x, y_norm
|
||||
|
||||
if mode == 'both':
|
||||
_mean, _median, _mode, _stdev, _variance = basic_stats(x, "1d", 0)
|
||||
|
||||
for i in range(0, len(x), 1):
|
||||
x_norm.append(z_score(x[i], _mean, _stdev))
|
||||
|
||||
_mean, _median, _mode, _stdev, _variance = basic_stats(y, "1d", 0)
|
||||
|
||||
for i in range(0, len(y), 1):
|
||||
y_norm.append(z_score(y[i], _mean, _stdev))
|
||||
|
||||
return x_norm, y_norm
|
||||
|
||||
else:
|
||||
|
||||
return error('method error')
|
||||
|
||||
|
||||
# returns n-th percentile of spread given mean, standard deviation, lower z-score, and upper z-score
|
||||
def stdev_z_split(mean, stdev, delta, low_bound, high_bound):
|
||||
|
||||
z_split = np.array().astype(float)
|
||||
i = low_bound
|
||||
|
||||
while True:
|
||||
z_split.append(float((1 / (stdev * math.sqrt(2 * math.pi))) *
|
||||
math.e ** (-0.5 * (((i - mean) / stdev) ** 2))))
|
||||
i = i + delta
|
||||
if i > high_bound:
|
||||
break
|
||||
|
||||
return z_split
|
||||
|
||||
|
||||
def histo_analysis(hist_data, delta, low_bound, high_bound):
|
||||
|
||||
if hist_data == 'debug':
|
||||
return ('returns list of predicted values based on historical data; input delta for delta step in z-score and lower and higher bounds in number of standard deviations')
|
||||
|
||||
derivative = []
|
||||
|
||||
for i in range(0, len(hist_data), 1):
|
||||
try:
|
||||
derivative.append(float(hist_data[i - 1]) - float(hist_data[i]))
|
||||
except:
|
||||
pass
|
||||
|
||||
derivative_sorted = sorted(derivative, key=int)
|
||||
mean_derivative = basic_stats(derivative_sorted, "1d", 0)[0]
|
||||
stdev_derivative = basic_stats(derivative_sorted, "1d", 0)[3]
|
||||
|
||||
predictions = []
|
||||
pred_change = 0
|
||||
|
||||
i = low_bound
|
||||
|
||||
while True:
|
||||
if i > high_bound:
|
||||
break
|
||||
|
||||
try:
|
||||
pred_change = mean_derivative + i * stdev_derivative
|
||||
except:
|
||||
pred_change = mean_derivative
|
||||
|
||||
predictions.append(float(hist_data[-1:][0]) + pred_change)
|
||||
|
||||
i = i + delta
|
||||
|
||||
return predictions
|
||||
|
||||
|
||||
def poly_regression(x, y, power):
|
||||
|
||||
if x == "null": # if x is 'null', then x will be filled with integer points between 1 and the size of y
|
||||
x = []
|
||||
|
||||
for i in range(len(y)):
|
||||
print(i)
|
||||
x.append(i + 1)
|
||||
|
||||
reg_eq = scipy.polyfit(x, y, deg=power)
|
||||
eq_str = ""
|
||||
|
||||
for i in range(0, len(reg_eq), 1):
|
||||
if i < len(reg_eq) - 1:
|
||||
eq_str = eq_str + str(reg_eq[i]) + \
|
||||
"*(z**" + str(len(reg_eq) - i - 1) + ")+"
|
||||
else:
|
||||
eq_str = eq_str + str(reg_eq[i]) + \
|
||||
"*(z**" + str(len(reg_eq) - i - 1) + ")"
|
||||
|
||||
vals = []
|
||||
|
||||
for i in range(0, len(x), 1):
|
||||
z = x[i]
|
||||
|
||||
try:
|
||||
exec("vals.append(" + eq_str + ")")
|
||||
except:
|
||||
pass
|
||||
|
||||
_rms = rms(vals, y)
|
||||
r2_d2 = r_squared(vals, y)
|
||||
|
||||
return [eq_str, _rms, r2_d2]
|
||||
|
||||
|
||||
def log_regression(x, y, base):
|
||||
|
||||
x_fit = []
|
||||
|
||||
for i in range(len(x)):
|
||||
try:
|
||||
# change of base for logs
|
||||
x_fit.append(np.log(x[i]) / np.log(base))
|
||||
except:
|
||||
pass
|
||||
|
||||
# y = reg_eq[0] * log(x, base) + reg_eq[1]
|
||||
reg_eq = np.polyfit(x_fit, y, 1)
|
||||
q_str = str(reg_eq[0]) + "* (np.log(z) / np.log(" + \
|
||||
str(base) + "))+" + str(reg_eq[1])
|
||||
vals = []
|
||||
|
||||
for i in range(len(x)):
|
||||
z = x[i]
|
||||
|
||||
try:
|
||||
exec("vals.append(" + eq_str + ")")
|
||||
except:
|
||||
pass
|
||||
|
||||
_rms = rms(vals, y)
|
||||
r2_d2 = r_squared(vals, y)
|
||||
|
||||
return eq_str, _rms, r2_d2
|
||||
|
||||
|
||||
def exp_regression(x, y, base):
|
||||
|
||||
y_fit = []
|
||||
|
||||
for i in range(len(y)):
|
||||
try:
|
||||
# change of base for logs
|
||||
y_fit.append(np.log(y[i]) / np.log(base))
|
||||
except:
|
||||
pass
|
||||
|
||||
# y = base ^ (reg_eq[0] * x) * base ^ (reg_eq[1])
|
||||
reg_eq = np.polyfit(x, y_fit, 1, w=np.sqrt(y_fit))
|
||||
eq_str = "(" + str(base) + "**(" + \
|
||||
str(reg_eq[0]) + "*z))*(" + str(base) + "**(" + str(reg_eq[1]) + "))"
|
||||
vals = []
|
||||
|
||||
for i in range(len(x)):
|
||||
z = x[i]
|
||||
|
||||
try:
|
||||
exec("vals.append(" + eq_str + ")")
|
||||
except:
|
||||
pass
|
||||
|
||||
_rms = rms(vals, y)
|
||||
r2_d2 = r_squared(vals, y)
|
||||
|
||||
return eq_str, _rms, r2_d2
|
||||
|
||||
|
||||
def tanh_regression(x, y):
|
||||
|
||||
def tanh(x, a, b, c, d):
|
||||
|
||||
return a * np.tanh(b * (x - c)) + d
|
||||
|
||||
reg_eq = np.float64(curve_fit(tanh, np.array(x), np.array(y))[0]).tolist()
|
||||
eq_str = str(reg_eq[0]) + " * np.tanh(" + str(reg_eq[1]) + \
|
||||
"*(z - " + str(reg_eq[2]) + ")) + " + str(reg_eq[3])
|
||||
vals = []
|
||||
|
||||
for i in range(len(x)):
|
||||
z = x[i]
|
||||
try:
|
||||
exec("vals.append(" + eq_str + ")")
|
||||
except:
|
||||
pass
|
||||
|
||||
_rms = rms(vals, y)
|
||||
r2_d2 = r_squared(vals, y)
|
||||
|
||||
return eq_str, _rms, r2_d2
|
||||
|
||||
|
||||
def r_squared(predictions, targets): # assumes equal size inputs
|
||||
|
||||
return metrics.r2_score(np.array(targets), np.array(predictions))
|
||||
|
||||
|
||||
def rms(predictions, targets): # assumes equal size inputs
|
||||
|
||||
_sum = 0
|
||||
|
||||
for i in range(0, len(targets), 1):
|
||||
_sum = (targets[i] - predictions[i]) ** 2
|
||||
|
||||
return float(math.sqrt(_sum / len(targets)))
|
||||
|
||||
|
||||
def calc_overfit(equation, rms_train, r2_train, x_test, y_test):
|
||||
|
||||
# performance overfit = performance(train) - performance(test) where performance is r^2
|
||||
# error overfit = error(train) - error(test) where error is rms; biased towards smaller values
|
||||
|
||||
vals = []
|
||||
|
||||
for i in range(0, len(x_test), 1):
|
||||
|
||||
z = x_test[i]
|
||||
|
||||
exec("vals.append(" + equation + ")")
|
||||
|
||||
r2_test = r_squared(vals, y_test)
|
||||
rms_test = rms(vals, y_test)
|
||||
|
||||
return r2_train - r2_test
|
||||
|
||||
|
||||
def strip_data(data, mode):
|
||||
|
||||
if mode == "adam": # x is the row number, y are the data
|
||||
pass
|
||||
|
||||
if mode == "eve": # x are the data, y is the column number
|
||||
pass
|
||||
|
||||
else:
|
||||
raise error("mode error")
|
||||
|
||||
|
||||
# _range in poly regression is the range of powers tried, and in log/exp it is the inverse of the stepsize taken from -1000 to 1000
|
||||
def optimize_regression(x, y, _range, resolution):
|
||||
# usage not: for demonstration purpose only, performance is shit
|
||||
if type(resolution) != int:
|
||||
raise error("resolution must be int")
|
||||
|
||||
x_train = x
|
||||
y_train = []
|
||||
|
||||
for i in range(len(y)):
|
||||
y_train.append(float(y[i]))
|
||||
|
||||
x_test = []
|
||||
y_test = []
|
||||
|
||||
for i in range(0, math.floor(len(x) * 0.5), 1):
|
||||
index = random.randint(0, len(x) - 1)
|
||||
|
||||
x_test.append(x[index])
|
||||
y_test.append(float(y[index]))
|
||||
|
||||
x_train.pop(index)
|
||||
y_train.pop(index)
|
||||
|
||||
#print(x_train, x_test)
|
||||
#print(y_train, y_test)
|
||||
|
||||
eqs = []
|
||||
rmss = []
|
||||
r2s = []
|
||||
|
||||
for i in range(0, _range + 1, 1):
|
||||
try:
|
||||
x, y, z = poly_regression(x_train, y_train, i)
|
||||
eqs.append(x)
|
||||
rmss.append(y)
|
||||
r2s.append(z)
|
||||
except:
|
||||
pass
|
||||
|
||||
for i in range(1, 100 * resolution + 1):
|
||||
try:
|
||||
x, y, z = exp_regression(x_train, y_train, float(i / resolution))
|
||||
eqs.append(x)
|
||||
rmss.append(y)
|
||||
r2s.append(z)
|
||||
except:
|
||||
pass
|
||||
|
||||
for i in range(1, 100 * resolution + 1):
|
||||
try:
|
||||
x, y, z = log_regression(x_train, y_train, float(i / resolution))
|
||||
eqs.append(x)
|
||||
rmss.append(y)
|
||||
r2s.append(z)
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
x, y, z = tanh_regression(x_train, y_train)
|
||||
|
||||
eqs.append(x)
|
||||
rmss.append(y)
|
||||
r2s.append(z)
|
||||
except:
|
||||
pass
|
||||
|
||||
# marks all equations where r2 = 1 as they 95% of the time overfit the data
|
||||
for i in range(0, len(eqs), 1):
|
||||
if r2s[i] == 1:
|
||||
eqs[i] = ""
|
||||
rmss[i] = ""
|
||||
r2s[i] = ""
|
||||
|
||||
while True: # removes all equations marked for removal
|
||||
try:
|
||||
eqs.remove('')
|
||||
rmss.remove('')
|
||||
r2s.remove('')
|
||||
except:
|
||||
break
|
||||
|
||||
overfit = []
|
||||
|
||||
for i in range(0, len(eqs), 1):
|
||||
|
||||
overfit.append(calc_overfit(eqs[i], rmss[i], r2s[i], x_test, y_test))
|
||||
|
||||
return eqs, rmss, r2s, overfit
|
||||
|
||||
|
||||
def select_best_regression(eqs, rmss, r2s, overfit, selector):
|
||||
|
||||
b_eq = ""
|
||||
b_rms = 0
|
||||
b_r2 = 0
|
||||
b_overfit = 0
|
||||
|
||||
ind = 0
|
||||
|
||||
if selector == "min_overfit":
|
||||
|
||||
ind = np.argmin(overfit)
|
||||
|
||||
b_eq = eqs[ind]
|
||||
b_rms = rmss[ind]
|
||||
b_r2 = r2s[ind]
|
||||
b_overfit = overfit[ind]
|
||||
|
||||
if selector == "max_r2s":
|
||||
|
||||
ind = np.argmax(r2s)
|
||||
b_eq = eqs[ind]
|
||||
b_rms = rmss[ind]
|
||||
b_r2 = r2s[ind]
|
||||
b_overfit = overfit[ind]
|
||||
|
||||
return b_eq, b_rms, b_r2, b_overfit
|
||||
|
||||
|
||||
def p_value(x, y): # takes 2 1d arrays
|
||||
|
||||
return stats.ttest_ind(x, y)[1]
|
||||
|
||||
|
||||
# assumes that rows are the independent variable and columns are the dependant. also assumes that time flows from lowest column to highest column.
|
||||
def basic_analysis(data):
|
||||
|
||||
row = len(data)
|
||||
column = []
|
||||
|
||||
for i in range(0, row, 1):
|
||||
column.append(len(data[i]))
|
||||
|
||||
column_max = max(column)
|
||||
row_b_stats = []
|
||||
row_histo = []
|
||||
|
||||
for i in range(0, row, 1):
|
||||
row_b_stats.append(basic_stats(data, "row", i))
|
||||
row_histo.append(histo_analysis(data[i], 0.67449, -0.67449, 0.67449))
|
||||
|
||||
column_b_stats = []
|
||||
|
||||
for i in range(0, column_max, 1):
|
||||
column_b_stats.append(basic_stats(data, "column", i))
|
||||
|
||||
return[row_b_stats, column_b_stats, row_histo]
|
||||
|
||||
|
||||
def benchmark(x, y):
|
||||
|
||||
start_g = time.time()
|
||||
generate_data("data/data.csv", x, y, -10, 10)
|
||||
end_g = time.time()
|
||||
|
||||
start_a = time.time()
|
||||
basic_analysis("data/data.csv")
|
||||
end_a = time.time()
|
||||
|
||||
return [(end_g - start_g), (end_a - start_a)]
|
||||
|
||||
|
||||
def generate_data(filename, x, y, low, high):
|
||||
|
||||
file = open(filename, "w")
|
||||
|
||||
for i in range(0, y, 1):
|
||||
temp = ""
|
||||
|
||||
for j in range(0, x - 1, 1):
|
||||
temp = str(random.uniform(low, high)) + "," + temp
|
||||
|
||||
temp = temp + str(random.uniform(low, high))
|
||||
file.write(temp + "\n")
|
||||
|
||||
def mean(data):
|
||||
|
||||
return np.mean(data)
|
||||
|
||||
def median(data):
|
||||
|
||||
return np.median(data)
|
||||
|
||||
def mode(data):
|
||||
|
||||
return np.argmax(np.bincount(data))
|
||||
|
||||
def stdev(data):
|
||||
|
||||
return np.std(data)
|
||||
|
||||
def variance(data):
|
||||
|
||||
return np.var(data)
|
||||
|
||||
"""
|
||||
|
||||
class StatisticsError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
def _sum(data, start=0):
|
||||
count = 0
|
||||
n, d = _exact_ratio(start)
|
||||
partials = {d: n}
|
||||
partials_get = partials.get
|
||||
T = _coerce(int, type(start))
|
||||
for typ, values in groupby(data, type):
|
||||
T = _coerce(T, typ) # or raise TypeError
|
||||
for n, d in map(_exact_ratio, values):
|
||||
count += 1
|
||||
partials[d] = partials_get(d, 0) + n
|
||||
if None in partials:
|
||||
|
||||
total = partials[None]
|
||||
assert not _isfinite(total)
|
||||
else:
|
||||
|
||||
total = sum(Fraction(n, d) for d, n in sorted(partials.items()))
|
||||
return (T, total, count)
|
||||
|
||||
|
||||
def _isfinite(x):
|
||||
try:
|
||||
return x.is_finite() # Likely a Decimal.
|
||||
except AttributeError:
|
||||
return math.isfinite(x) # Coerces to float first.
|
||||
|
||||
|
||||
def _coerce(T, S):
|
||||
|
||||
assert T is not bool, "initial type T is bool"
|
||||
|
||||
if T is S:
|
||||
return T
|
||||
|
||||
if S is int or S is bool:
|
||||
return T
|
||||
if T is int:
|
||||
return S
|
||||
|
||||
if issubclass(S, T):
|
||||
return S
|
||||
if issubclass(T, S):
|
||||
return T
|
||||
|
||||
if issubclass(T, int):
|
||||
return S
|
||||
if issubclass(S, int):
|
||||
return T
|
||||
|
||||
if issubclass(T, Fraction) and issubclass(S, float):
|
||||
return S
|
||||
if issubclass(T, float) and issubclass(S, Fraction):
|
||||
return T
|
||||
|
||||
msg = "don't know how to coerce %s and %s"
|
||||
raise TypeError(msg % (T.__name__, S.__name__))
|
||||
|
||||
|
||||
def _exact_ratio(x):
|
||||
|
||||
try:
|
||||
|
||||
if type(x) is float or type(x) is Decimal:
|
||||
return x.as_integer_ratio()
|
||||
try:
|
||||
|
||||
return (x.numerator, x.denominator)
|
||||
except AttributeError:
|
||||
try:
|
||||
|
||||
return x.as_integer_ratio()
|
||||
except AttributeError:
|
||||
|
||||
pass
|
||||
except (OverflowError, ValueError):
|
||||
|
||||
assert not _isfinite(x)
|
||||
return (x, None)
|
||||
msg = "can't convert type '{}' to numerator/denominator"
|
||||
raise TypeError(msg.format(type(x).__name__))
|
||||
|
||||
|
||||
def _convert(value, T):
|
||||
|
||||
if type(value) is T:
|
||||
|
||||
return value
|
||||
if issubclass(T, int) and value.denominator != 1:
|
||||
T = float
|
||||
try:
|
||||
|
||||
return T(value)
|
||||
except TypeError:
|
||||
if issubclass(T, Decimal):
|
||||
return T(value.numerator) / T(value.denominator)
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
def _counts(data):
|
||||
|
||||
table = collections.Counter(iter(data)).most_common()
|
||||
if not table:
|
||||
return table
|
||||
|
||||
maxfreq = table[0][1]
|
||||
for i in range(1, len(table)):
|
||||
if table[i][1] != maxfreq:
|
||||
table = table[:i]
|
||||
break
|
||||
return table
|
||||
|
||||
|
||||
def _find_lteq(a, x):
|
||||
|
||||
i = bisect_left(a, x)
|
||||
if i != len(a) and a[i] == x:
|
||||
return i
|
||||
raise ValueError
|
||||
|
||||
|
||||
def _find_rteq(a, l, x):
|
||||
|
||||
i = bisect_right(a, x, lo=l)
|
||||
if i != (len(a) + 1) and a[i - 1] == x:
|
||||
return i - 1
|
||||
raise ValueError
|
||||
|
||||
|
||||
def _fail_neg(values, errmsg='negative value'):
|
||||
|
||||
for x in values:
|
||||
if x < 0:
|
||||
raise StatisticsError(errmsg)
|
||||
yield x
|
||||
def mean(data):
|
||||
|
||||
if iter(data) is data:
|
||||
data = list(data)
|
||||
n = len(data)
|
||||
if n < 1:
|
||||
raise StatisticsError('mean requires at least one data point')
|
||||
T, total, count = _sum(data)
|
||||
assert count == n
|
||||
return _convert(total / n, T)
|
||||
|
||||
|
||||
def median(data):
|
||||
|
||||
data = sorted(data)
|
||||
n = len(data)
|
||||
if n == 0:
|
||||
raise StatisticsError("no median for empty data")
|
||||
if n % 2 == 1:
|
||||
return data[n // 2]
|
||||
else:
|
||||
i = n // 2
|
||||
return (data[i - 1] + data[i]) / 2
|
||||
|
||||
|
||||
def mode(data):
|
||||
|
||||
table = _counts(data)
|
||||
if len(table) == 1:
|
||||
return table[0][0]
|
||||
elif table:
|
||||
raise StatisticsError(
|
||||
'no unique mode; found %d equally common values' % len(table)
|
||||
)
|
||||
else:
|
||||
raise StatisticsError('no mode for empty data')
|
||||
|
||||
|
||||
def _ss(data, c=None):
|
||||
|
||||
if c is None:
|
||||
c = mean(data)
|
||||
T, total, count = _sum((x - c)**2 for x in data)
|
||||
|
||||
U, total2, count2 = _sum((x - c) for x in data)
|
||||
assert T == U and count == count2
|
||||
total -= total2**2 / len(data)
|
||||
assert not total < 0, 'negative sum of square deviations: %f' % total
|
||||
return (T, total)
|
||||
|
||||
|
||||
def variance(data, xbar=None):
|
||||
|
||||
if iter(data) is data:
|
||||
data = list(data)
|
||||
n = len(data)
|
||||
if n < 2:
|
||||
raise StatisticsError('variance requires at least two data points')
|
||||
T, ss = _ss(data, xbar)
|
||||
return _convert(ss / (n - 1), T)
|
||||
|
||||
|
||||
def stdev(data, xbar=None):
|
||||
|
||||
var = variance(data, xbar)
|
||||
try:
|
||||
return var.sqrt()
|
||||
except AttributeError:
|
||||
return math.sqrt(var)
|
||||
"""
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@@ -1,2 +0,0 @@
|
||||
python setup.py build_ext --inplace
|
||||
pause
|
@@ -1 +0,0 @@
|
||||
python setup.py build_ext --inplace
|
@@ -1,5 +0,0 @@
|
||||
from distutils.core import setup
|
||||
from Cython.Build import cythonize
|
||||
|
||||
setup(name='analysis',
|
||||
ext_modules=cythonize("analysis.py"))
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1 +0,0 @@
|
||||
[{"outputType":{"type":"APK"},"apkInfo":{"type":"MAIN","splits":[],"versionCode":1,"versionName":"1.0","enabled":true,"outputFile":"app-debug.apk","fullName":"debug","baseName":"debug"},"path":"app-debug.apk","properties":{}}]
|
13
dep/2019/apps/android/source/.gitignore
vendored
13
dep/2019/apps/android/source/.gitignore
vendored
@@ -1,13 +0,0 @@
|
||||
*.iml
|
||||
.gradle
|
||||
/local.properties
|
||||
/.idea/caches
|
||||
/.idea/libraries
|
||||
/.idea/modules.xml
|
||||
/.idea/workspace.xml
|
||||
/.idea/navEditor.xml
|
||||
/.idea/assetWizardSettings.xml
|
||||
.DS_Store
|
||||
/build
|
||||
/captures
|
||||
.externalNativeBuild
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user