mirror of
https://github.com/titanscouting/tra-analysis.git
synced 2025-09-07 07:27:20 +00:00
Compare commits
35 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
aeb4990c81 | ||
|
4545f5721a | ||
|
8d703b10b3 | ||
|
df305f30f0 | ||
|
a123b71ac9 | ||
|
a02668e59c | ||
|
4d6372f620 | ||
|
9d0b6e68d8 | ||
|
b8d51811e0 | ||
|
7a58cd08e2 | ||
|
337fae68ee | ||
|
5e71d05626 | ||
|
01df42aa49 | ||
|
33eea153c1 | ||
|
114eee5d57 | ||
|
06f008746a | ||
|
4f9c4e0dbb | ||
|
5697e8b79e | ||
|
e054e66743 | ||
|
c914bd3754 | ||
|
6c08885a53 | ||
|
375befd0c4 | ||
|
893d1fb1d0 | ||
|
6a426ae4cd | ||
|
50c064ffa4 | ||
|
1b0a9967c8 | ||
|
2605f7c29f | ||
|
6f5a3edd88 | ||
|
457146b0e4 | ||
|
f7fd8ffcf9 | ||
|
77bc792426 | ||
|
39146cc555 | ||
|
2daa09c040 | ||
|
68d27a6302 | ||
|
7fc18b7c35 |
2
.devcontainer/Dockerfile
Normal file
2
.devcontainer/Dockerfile
Normal file
@@ -0,0 +1,2 @@
|
||||
FROM python
|
||||
WORKDIR ~/
|
26
.devcontainer/devcontainer.json
Normal file
26
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"name": "TRA Analysis Development Environment",
|
||||
"build": {
|
||||
"dockerfile": "Dockerfile",
|
||||
},
|
||||
"settings": {
|
||||
"terminal.integrated.shell.linux": "/bin/bash",
|
||||
"python.pythonPath": "/usr/local/bin/python",
|
||||
"python.linting.enabled": true,
|
||||
"python.linting.pylintEnabled": true,
|
||||
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
|
||||
"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
|
||||
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
|
||||
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
|
||||
"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8",
|
||||
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
|
||||
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
|
||||
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
|
||||
"python.linting.pylintPath": "/usr/local/py-utils/bin/pylint",
|
||||
"python.testing.pytestPath": "/usr/local/py-utils/bin/pytest"
|
||||
},
|
||||
"extensions": [
|
||||
"mhutchie.git-graph",
|
||||
],
|
||||
"postCreateCommand": "pip install -r analysis-master/analysis-amd64/requirements.txt"
|
||||
}
|
5
.gitignore
vendored
5
.gitignore
vendored
@@ -18,4 +18,7 @@ data analysis/arthur_pull.ipynb
|
||||
data analysis/keys.txt
|
||||
data analysis/check_for_new_matches.ipynb
|
||||
data analysis/test.ipynb
|
||||
data analysis/visualize_pit.ipynb
|
||||
data analysis/visualize_pit.ipynb
|
||||
data analysis/config/keys.config
|
||||
analysis-master/analysis/__pycache__/
|
||||
data analysis/__pycache__/
|
@@ -1,3 +1,2 @@
|
||||
# tr2022-strategy
|
||||
Titan Robotics 2022 Strategy Team Repository
|
||||
Use at your own risk
|
||||
# red-alliance-analysis
|
||||
Titan Robotics 2022 Strategy Team Repository for Data Analysis Tools. Included with these tools are the backend data analysis engine formatted as a python package, associated binaries for the analysis package, and premade scripts that can be pulled directly from this repository and will integrate with other Red Alliance applications to quickly deploy FRC scouting tools.
|
||||
|
@@ -1,6 +1,6 @@
|
||||
Metadata-Version: 2.1
|
||||
Name: analysis
|
||||
Version: 1.0.0.8
|
||||
Version: 1.0.0.12
|
||||
Summary: analysis package developed by Titan Scouting for The Red Alliance
|
||||
Home-page: https://github.com/titanscout2022/tr2022-strategy
|
||||
Author: The Titan Scouting Team
|
@@ -3,10 +3,13 @@ analysis/__init__.py
|
||||
analysis/analysis.py
|
||||
analysis/regression.py
|
||||
analysis/titanlearn.py
|
||||
analysis/trueskill.py
|
||||
analysis/visualization.py
|
||||
analysis.egg-info/PKG-INFO
|
||||
analysis.egg-info/SOURCES.txt
|
||||
analysis.egg-info/dependency_links.txt
|
||||
analysis.egg-info/requires.txt
|
||||
analysis.egg-info/top_level.txt
|
||||
analysis.egg-info/top_level.txt
|
||||
analysis/metrics/__init__.py
|
||||
analysis/metrics/elo.py
|
||||
analysis/metrics/glicko2.py
|
||||
analysis/metrics/trueskill.py
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,16 +1,43 @@
|
||||
# Titan Robotics Team 2022: Data Analysis Module
|
||||
# Written by Arthur Lu & Jacob Levine
|
||||
# Notes:
|
||||
# this should be imported as a python module using 'import analysis'
|
||||
# this should be imported as a python module using 'from analysis import analysis'
|
||||
# this should be included in the local directory or environment variable
|
||||
# this module has been optimized for multhreaded computing
|
||||
# current benchmark of optimization: 1.33 times faster
|
||||
# setup:
|
||||
|
||||
__version__ = "1.1.13.006"
|
||||
__version__ = "1.2.0.004"
|
||||
|
||||
# changelog should be viewed using print(analysis.__changelog__)
|
||||
__changelog__ = """changelog:
|
||||
1.2.0.004:
|
||||
- fixed __all__ to reflected the correct functions and classes
|
||||
- fixed CorrelationTests and StatisticalTests class functions to require self invocation
|
||||
- added missing math import
|
||||
- fixed KNN class functions to require self invocation
|
||||
- fixed Metrics class functions to require self invocation
|
||||
- various spelling fixes in CorrelationTests and StatisticalTests
|
||||
1.2.0.003:
|
||||
- bug fixes with CorrelationTests and StatisticalTests
|
||||
- moved glicko2 and trueskill to the metrics subpackage
|
||||
- moved elo to a new metrics subpackage
|
||||
1.2.0.002:
|
||||
- fixed docs
|
||||
1.2.0.001:
|
||||
- fixed docs
|
||||
1.2.0.000:
|
||||
- cleaned up wild card imports with scipy and sklearn
|
||||
- added CorrelationTests class
|
||||
- added StatisticalTests class
|
||||
- added several correlation tests to CorrelationTests
|
||||
- added several statistical tests to StatisticalTests
|
||||
1.1.13.009:
|
||||
- moved elo, glicko2, trueskill functions under class Metrics
|
||||
1.1.13.008:
|
||||
- moved Glicko2 to a seperate package
|
||||
1.1.13.007:
|
||||
- fixed bug with trueskill
|
||||
1.1.13.006:
|
||||
- cleaned up imports
|
||||
1.1.13.005:
|
||||
@@ -255,21 +282,19 @@ __all__ = [
|
||||
'z_normalize',
|
||||
'histo_analysis',
|
||||
'regression',
|
||||
'elo',
|
||||
'glicko2',
|
||||
'trueskill',
|
||||
'Metrics',
|
||||
'RegressionMetrics',
|
||||
'ClassificationMetrics',
|
||||
'kmeans',
|
||||
'pca',
|
||||
'decisiontree',
|
||||
'knn_classifier',
|
||||
'knn_regressor',
|
||||
'KNN',
|
||||
'NaiveBayes',
|
||||
'SVM',
|
||||
'random_forest_classifier',
|
||||
'random_forest_regressor',
|
||||
'Glicko2',
|
||||
'CorrelationTests',
|
||||
'StatisticalTests',
|
||||
# all statistics functions left out due to integration in other functions
|
||||
]
|
||||
|
||||
@@ -278,14 +303,17 @@ __all__ = [
|
||||
# imports (now in alphabetical order! v 1.0.3.006):
|
||||
|
||||
import csv
|
||||
from analysis.metrics import elo as Elo
|
||||
from analysis.metrics import glicko2 as Glicko2
|
||||
import math
|
||||
import numba
|
||||
from numba import jit
|
||||
import numpy as np
|
||||
import scipy
|
||||
from scipy import *
|
||||
from scipy import optimize, stats
|
||||
import sklearn
|
||||
from sklearn import *
|
||||
from analysis import trueskill as Trueskill
|
||||
from sklearn import preprocessing, pipeline, linear_model, metrics, cluster, decomposition, tree, neighbors, naive_bayes, svm, model_selection, ensemble
|
||||
from analysis.metrics import trueskill as Trueskill
|
||||
|
||||
class error(ValueError):
|
||||
pass
|
||||
@@ -442,32 +470,32 @@ def regression(inputs, outputs, args): # inputs, outputs expects N-D array
|
||||
|
||||
return regressions
|
||||
|
||||
def elo(starting_score, opposing_score, observed, N, K):
|
||||
class Metrics:
|
||||
|
||||
expected = 1/(1+10**((np.array(opposing_score) - starting_score)/N))
|
||||
def elo(self, starting_score, opposing_score, observed, N, K):
|
||||
|
||||
return starting_score + K*(np.sum(observed) - np.sum(expected))
|
||||
return Elo.calculate(starting_score, opposing_score, observed, N, K)
|
||||
|
||||
def glicko2(starting_score, starting_rd, starting_vol, opposing_score, opposing_rd, observations):
|
||||
def glicko2(self, starting_score, starting_rd, starting_vol, opposing_score, opposing_rd, observations):
|
||||
|
||||
player = Glicko2(rating = starting_score, rd = starting_rd, vol = starting_vol)
|
||||
player = Glicko2.Glicko2(rating = starting_score, rd = starting_rd, vol = starting_vol)
|
||||
|
||||
player.update_player([x for x in opposing_score], [x for x in opposing_rd], observations)
|
||||
player.update_player([x for x in opposing_score], [x for x in opposing_rd], observations)
|
||||
|
||||
return (player.rating, player.rd, player.vol)
|
||||
return (player.rating, player.rd, player.vol)
|
||||
|
||||
def trueskill(teams_data, observations): # teams_data is array of array of tuples ie. [[(mu, sigma), (mu, sigma), (mu, sigma)], [(mu, sigma), (mu, sigma), (mu, sigma)]]
|
||||
def trueskill(self, teams_data, observations): # teams_data is array of array of tuples ie. [[(mu, sigma), (mu, sigma), (mu, sigma)], [(mu, sigma), (mu, sigma), (mu, sigma)]]
|
||||
|
||||
team_ratings = []
|
||||
team_ratings = []
|
||||
|
||||
for team in teams_data:
|
||||
team_temp = []
|
||||
for player in team:
|
||||
player = Trueskill.Rating(player[0], player[1])
|
||||
team_temp.append(player)
|
||||
team_ratings.append(team_temp)
|
||||
for team in teams_data:
|
||||
team_temp = ()
|
||||
for player in team:
|
||||
player = Trueskill.Rating(player[0], player[1])
|
||||
team_temp = team_temp + (player,)
|
||||
team_ratings.append(team_temp)
|
||||
|
||||
return Trueskill.rate(teams_data, observations)
|
||||
return Trueskill.rate(team_ratings, ranks=observations)
|
||||
|
||||
class RegressionMetrics():
|
||||
|
||||
@@ -559,24 +587,25 @@ def decisiontree(data, labels, test_size = 0.3, criterion = "gini", splitter = "
|
||||
|
||||
return model, metrics
|
||||
|
||||
@jit(forceobj=True)
|
||||
def knn_classifier(data, labels, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, n_neighbors=5, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling
|
||||
class KNN:
|
||||
|
||||
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
|
||||
model = sklearn.neighbors.KNeighborsClassifier()
|
||||
model.fit(data_train, labels_train)
|
||||
predictions = model.predict(data_test)
|
||||
def knn_classifier(self, data, labels, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, n_neighbors=5, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling
|
||||
|
||||
return model, ClassificationMetrics(predictions, labels_test)
|
||||
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
|
||||
model = sklearn.neighbors.KNeighborsClassifier()
|
||||
model.fit(data_train, labels_train)
|
||||
predictions = model.predict(data_test)
|
||||
|
||||
def knn_regressor(data, outputs, test_size, n_neighbors = 5, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None):
|
||||
return model, ClassificationMetrics(predictions, labels_test)
|
||||
|
||||
data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1)
|
||||
model = sklearn.neighbors.KNeighborsRegressor(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs)
|
||||
model.fit(data_train, outputs_train)
|
||||
predictions = model.predict(data_test)
|
||||
def knn_regressor(self, data, outputs, test_size, n_neighbors = 5, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None):
|
||||
|
||||
return model, RegressionMetrics(predictions, outputs_test)
|
||||
data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1)
|
||||
model = sklearn.neighbors.KNeighborsRegressor(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs)
|
||||
model.fit(data_train, outputs_train)
|
||||
predictions = model.predict(data_test)
|
||||
|
||||
return model, RegressionMetrics(predictions, outputs_test)
|
||||
|
||||
class NaiveBayes:
|
||||
|
||||
@@ -690,101 +719,205 @@ def random_forest_regressor(data, outputs, test_size, n_estimators="warn", crite
|
||||
|
||||
return kernel, RegressionMetrics(predictions, outputs_test)
|
||||
|
||||
class Glicko2:
|
||||
class CorrelationTests:
|
||||
|
||||
_tau = 0.5
|
||||
def anova_oneway(self, *args): #expects arrays of samples
|
||||
|
||||
def getRating(self):
|
||||
return (self.__rating * 173.7178) + 1500
|
||||
results = scipy.stats.f_oneway(*args)
|
||||
return {"F-value": results[0], "p-value": results[1]}
|
||||
|
||||
def setRating(self, rating):
|
||||
self.__rating = (rating - 1500) / 173.7178
|
||||
def pearson(self, x, y):
|
||||
|
||||
rating = property(getRating, setRating)
|
||||
results = scipy.stats.pearsonr(x, y)
|
||||
return {"r-value": results[0], "p-value": results[1]}
|
||||
|
||||
def getRd(self):
|
||||
return self.__rd * 173.7178
|
||||
def spearman(self, a, b = None, axis = 0, nan_policy = 'propagate'):
|
||||
|
||||
def setRd(self, rd):
|
||||
self.__rd = rd / 173.7178
|
||||
results = scipy.stats.spearmanr(a, b = b, axis = axis, nan_policy = nan_policy)
|
||||
return {"r-value": results[0], "p-value": results[1]}
|
||||
|
||||
rd = property(getRd, setRd)
|
||||
|
||||
def __init__(self, rating = 1500, rd = 350, vol = 0.06):
|
||||
def point_biserial(self, x,y):
|
||||
|
||||
self.setRating(rating)
|
||||
self.setRd(rd)
|
||||
self.vol = vol
|
||||
|
||||
def _preRatingRD(self):
|
||||
results = scipy.stats.pointbiserialr(x, y)
|
||||
return {"r-value": results[0], "p-value": results[1]}
|
||||
|
||||
self.__rd = math.sqrt(math.pow(self.__rd, 2) + math.pow(self.vol, 2))
|
||||
def kendall(self, x, y, initial_lexsort = None, nan_policy = 'propagate', method = 'auto'):
|
||||
|
||||
results = scipy.stats.kendalltau(x, y, initial_lexsort = initial_lexsort, nan_policy = nan_policy, method = method)
|
||||
return {"tau": results[0], "p-value": results[1]}
|
||||
|
||||
def kendall_weighted(self, x, y, rank = True, weigher = None, additive = True):
|
||||
|
||||
results = scipy.stats.weightedtau(x, y, rank = rank, weigher = weigher, additive = additive)
|
||||
return {"tau": results[0], "p-value": results[1]}
|
||||
|
||||
def mgc(self, x, y, compute_distance = None, reps = 1000, workers = 1, is_twosamp = False, random_state = None):
|
||||
|
||||
results = scipy.stats.multiscale_graphcorr(x, y, compute_distance = compute_distance, reps = reps, workers = workers, is_twosamp = is_twosamp, random_state = random_state)
|
||||
return {"k-value": results[0], "p-value": results[1], "data": results[2]} # unsure if MGC test returns a k value
|
||||
|
||||
class StatisticalTests:
|
||||
|
||||
def ttest_onesample(self, a, popmean, axis = 0, nan_policy = 'propagate'):
|
||||
|
||||
results = scipy.stats.ttest_1samp(a, popmean, axis = axis, nan_policy = nan_policy)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def ttest_independent(self, a, b, equal = True, nan_policy = 'propagate'):
|
||||
|
||||
results = scipy.stats.ttest_ind(a, b, equal_var = equal, nan_policy = nan_policy)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def ttest_statistic(self, o1, o2, equal = True):
|
||||
|
||||
results = scipy.stats.ttest_ind_from_stats(o1["mean"], o1["std"], o1["nobs"], o2["mean"], o2["std"], o2["nobs"], equal_var = equal)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def ttest_related(self, a, b, axis = 0, nan_policy='propagate'):
|
||||
|
||||
results = scipy.stats.ttest_rel(a, b, axis = axis, nan_policy = nan_policy)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def ks_fitness(self, rvs, cdf, args = (), N = 20, alternative = 'two-sided', mode = 'approx'):
|
||||
|
||||
results = scipy.stats.kstest(rvs, cdf, args = args, N = N, alternative = alternative, mode = mode)
|
||||
return {"ks-value": results[0], "p-value": results[1]}
|
||||
|
||||
def chisquare(self, f_obs, f_exp = None, ddof = None, axis = 0):
|
||||
|
||||
results = scipy.stats.chisquare(f_obs, f_exp = f_exp, ddof = ddof, axis = axis)
|
||||
return {"chisquared-value": results[0], "p-value": results[1]}
|
||||
|
||||
def powerdivergence(self, f_obs, f_exp = None, ddof = None, axis = 0, lambda_ = None):
|
||||
|
||||
results = scipy.stats.power_divergence(f_obs, f_exp = f_exp, ddof = ddof, axis = axis, lambda_ = lambda_)
|
||||
return {"powerdivergence-value": results[0], "p-value": results[1]}
|
||||
|
||||
def ks_twosample(self, x, y, alternative = 'two_sided', mode = 'auto'):
|
||||
|
||||
def update_player(self, rating_list, RD_list, outcome_list):
|
||||
results = scipy.stats.ks_2samp(x, y, alternative = alternative, mode = mode)
|
||||
return {"ks-value": results[0], "p-value": results[1]}
|
||||
|
||||
rating_list = [(x - 1500) / 173.7178 for x in rating_list]
|
||||
RD_list = [x / 173.7178 for x in RD_list]
|
||||
def es_twosample(self, x, y, t = (0.4, 0.8)):
|
||||
|
||||
v = self._v(rating_list, RD_list)
|
||||
self.vol = self._newVol(rating_list, RD_list, outcome_list, v)
|
||||
self._preRatingRD()
|
||||
|
||||
self.__rd = 1 / math.sqrt((1 / math.pow(self.__rd, 2)) + (1 / v))
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * \
|
||||
(outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
self.__rating += math.pow(self.__rd, 2) * tempSum
|
||||
|
||||
|
||||
def _newVol(self, rating_list, RD_list, outcome_list, v):
|
||||
results = scipy.stats.epps_singleton_2samp(x, y, t = t)
|
||||
return {"es-value": results[0], "p-value": results[1]}
|
||||
|
||||
i = 0
|
||||
delta = self._delta(rating_list, RD_list, outcome_list, v)
|
||||
a = math.log(math.pow(self.vol, 2))
|
||||
tau = self._tau
|
||||
x0 = a
|
||||
x1 = 0
|
||||
|
||||
while x0 != x1:
|
||||
# New iteration, so x(i) becomes x(i-1)
|
||||
x0 = x1
|
||||
d = math.pow(self.__rating, 2) + v + math.exp(x0)
|
||||
h1 = -(x0 - a) / math.pow(tau, 2) - 0.5 * math.exp(x0) \
|
||||
/ d + 0.5 * math.exp(x0) * math.pow(delta / d, 2)
|
||||
h2 = -1 / math.pow(tau, 2) - 0.5 * math.exp(x0) * \
|
||||
(math.pow(self.__rating, 2) + v) \
|
||||
/ math.pow(d, 2) + 0.5 * math.pow(delta, 2) * math.exp(x0) \
|
||||
* (math.pow(self.__rating, 2) + v - math.exp(x0)) / math.pow(d, 3)
|
||||
x1 = x0 - (h1 / h2)
|
||||
def mw_rank(self, x, y, use_continuity = True, alternative = None):
|
||||
|
||||
return math.exp(x1 / 2)
|
||||
|
||||
def _delta(self, rating_list, RD_list, outcome_list, v):
|
||||
results = scipy.stats.mannwhitneyu(x, y, use_continuity = use_continuity, alternative = alternative)
|
||||
return {"u-value": results[0], "p-value": results[1]}
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * (outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
return v * tempSum
|
||||
|
||||
def _v(self, rating_list, RD_list):
|
||||
def mw_tiecorrection(self, rank_values):
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempE = self._E(rating_list[i], RD_list[i])
|
||||
tempSum += math.pow(self._g(RD_list[i]), 2) * tempE * (1 - tempE)
|
||||
return 1 / tempSum
|
||||
|
||||
def _E(self, p2rating, p2RD):
|
||||
results = scipy.stats.tiecorrect(rank_values)
|
||||
return {"correction-factor": results}
|
||||
|
||||
return 1 / (1 + math.exp(-1 * self._g(p2RD) * \
|
||||
(self.__rating - p2rating)))
|
||||
|
||||
def _g(self, RD):
|
||||
def rankdata(self, a, method = 'average'):
|
||||
|
||||
return 1 / math.sqrt(1 + 3 * math.pow(RD, 2) / math.pow(math.pi, 2))
|
||||
|
||||
def did_not_compete(self):
|
||||
results = scipy.stats.rankdata(a, method = method)
|
||||
return results
|
||||
|
||||
self._preRatingRD()
|
||||
def wilcoxon_ranksum(self, a, b): # this seems to be superceded by Mann Whitney Wilcoxon U Test
|
||||
|
||||
results = scipy.stats.ranksums(a, b)
|
||||
return {"u-value": results[0], "p-value": results[1]}
|
||||
|
||||
def wilcoxon_signedrank(self, x, y = None, zero_method = 'wilcox', correction = False, alternative = 'two-sided'):
|
||||
|
||||
results = scipy.stats.wilcoxon(x, y = y, zero_method = zero_method, correction = correction, alternative = alternative)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def kw_htest(self, *args, nan_policy = 'propagate'):
|
||||
|
||||
results = scipy.stats.kruskal(*args, nan_policy = nan_policy)
|
||||
return {"h-value": results[0], "p-value": results[1]}
|
||||
|
||||
def friedman_chisquare(self, *args):
|
||||
|
||||
results = scipy.stats.friedmanchisquare(*args)
|
||||
return {"chisquared-value": results[0], "p-value": results[1]}
|
||||
|
||||
def bm_wtest(self, x, y, alternative = 'two-sided', distribution = 't', nan_policy = 'propagate'):
|
||||
|
||||
results = scipy.stats.brunnermunzel(x, y, alternative = alternative, distribution = distribution, nan_policy = nan_policy)
|
||||
return {"w-value": results[0], "p-value": results[1]}
|
||||
|
||||
def combine_pvalues(self, pvalues, method = 'fisher', weights = None):
|
||||
|
||||
results = scipy.stats.combine_pvalues(pvalues, method = method, weights = weights)
|
||||
return {"combined-statistic": results[0], "p-value": results[1]}
|
||||
|
||||
def jb_fitness(self, x):
|
||||
|
||||
results = scipy.stats.jarque_bera(x)
|
||||
return {"jb-value": results[0], "p-value": results[1]}
|
||||
|
||||
def ab_equality(self, x, y):
|
||||
|
||||
results = scipy.stats.ansari(x, y)
|
||||
return {"ab-value": results[0], "p-value": results[1]}
|
||||
|
||||
def bartlett_variance(self, *args):
|
||||
|
||||
results = scipy.stats.bartlett(*args)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def levene_variance(self, *args, center = 'median', proportiontocut = 0.05):
|
||||
|
||||
results = scipy.stats.levene(*args, center = center, proportiontocut = proportiontocut)
|
||||
return {"w-value": results[0], "p-value": results[1]}
|
||||
|
||||
def sw_normality(self, x):
|
||||
|
||||
results = scipy.stats.shapiro(x)
|
||||
return {"w-value": results[0], "p-value": results[1]}
|
||||
|
||||
def shapiro(self, x):
|
||||
|
||||
return "destroyed by facts and logic"
|
||||
|
||||
def ad_onesample(self, x, dist = 'norm'):
|
||||
|
||||
results = scipy.stats.anderson(x, dist = dist)
|
||||
return {"d-value": results[0], "critical-values": results[1], "significance-value": results[2]}
|
||||
|
||||
def ad_ksample(self, samples, midrank = True):
|
||||
|
||||
results = scipy.stats.anderson_ksamp(samples, midrank = midrank)
|
||||
return {"d-value": results[0], "critical-values": results[1], "significance-value": results[2]}
|
||||
|
||||
def binomial(self, x, n = None, p = 0.5, alternative = 'two-sided'):
|
||||
|
||||
results = scipy.stats.binom_test(x, n = n, p = p, alternative = alternative)
|
||||
return {"p-value": results}
|
||||
|
||||
def fk_variance(self, *args, center = 'median', proportiontocut = 0.05):
|
||||
|
||||
results = scipy.stats.fligner(*args, center = center, proportiontocut = proportiontocut)
|
||||
return {"h-value": results[0], "p-value": results[1]} # unknown if the statistic is an h value
|
||||
|
||||
def mood_mediantest(self, *args, ties = 'below', correction = True, lambda_ = 1, nan_policy = 'propagate'):
|
||||
|
||||
results = scipy.stats.median_test(*args, ties = ties, correction = correction, lambda_ = lambda_, nan_policy = nan_policy)
|
||||
return {"chisquared-value": results[0], "p-value": results[1], "m-value": results[2], "table": results[3]}
|
||||
|
||||
def mood_equalscale(self, x, y, axis = 0):
|
||||
|
||||
results = scipy.stats.mood(x, y, axis = axis)
|
||||
return {"z-score": results[0], "p-value": results[1]}
|
||||
|
||||
def skewtest(self, a, axis = 0, nan_policy = 'propogate'):
|
||||
|
||||
results = scipy.stats.skewtest(a, axis = axis, nan_policy = nan_policy)
|
||||
return {"z-score": results[0], "p-value": results[1]}
|
||||
|
||||
def kurtosistest(self, a, axis = 0, nan_policy = 'propogate'):
|
||||
|
||||
results = scipy.stats.kurtosistest(a, axis = axis, nan_policy = nan_policy)
|
||||
return {"z-score": results[0], "p-value": results[1]}
|
||||
|
||||
def normaltest(self, a, axis = 0, nan_policy = 'propogate'):
|
||||
|
||||
results = scipy.stats.normaltest(a, axis = axis, nan_policy = nan_policy)
|
||||
return {"z-score": results[0], "p-value": results[1]}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
7
analysis-master/analysis-amd64/analysis/metrics/elo.py
Normal file
7
analysis-master/analysis-amd64/analysis/metrics/elo.py
Normal file
@@ -0,0 +1,7 @@
|
||||
import numpy as np
|
||||
|
||||
def calculate(starting_score, opposing_score, observed, N, K):
|
||||
|
||||
expected = 1/(1+10**((np.array(opposing_score) - starting_score)/N))
|
||||
|
||||
return starting_score + K*(np.sum(observed) - np.sum(expected))
|
99
analysis-master/analysis-amd64/analysis/metrics/glicko2.py
Normal file
99
analysis-master/analysis-amd64/analysis/metrics/glicko2.py
Normal file
@@ -0,0 +1,99 @@
|
||||
import math
|
||||
|
||||
class Glicko2:
|
||||
_tau = 0.5
|
||||
|
||||
def getRating(self):
|
||||
return (self.__rating * 173.7178) + 1500
|
||||
|
||||
def setRating(self, rating):
|
||||
self.__rating = (rating - 1500) / 173.7178
|
||||
|
||||
rating = property(getRating, setRating)
|
||||
|
||||
def getRd(self):
|
||||
return self.__rd * 173.7178
|
||||
|
||||
def setRd(self, rd):
|
||||
self.__rd = rd / 173.7178
|
||||
|
||||
rd = property(getRd, setRd)
|
||||
|
||||
def __init__(self, rating = 1500, rd = 350, vol = 0.06):
|
||||
|
||||
self.setRating(rating)
|
||||
self.setRd(rd)
|
||||
self.vol = vol
|
||||
|
||||
def _preRatingRD(self):
|
||||
|
||||
self.__rd = math.sqrt(math.pow(self.__rd, 2) + math.pow(self.vol, 2))
|
||||
|
||||
def update_player(self, rating_list, RD_list, outcome_list):
|
||||
|
||||
rating_list = [(x - 1500) / 173.7178 for x in rating_list]
|
||||
RD_list = [x / 173.7178 for x in RD_list]
|
||||
|
||||
v = self._v(rating_list, RD_list)
|
||||
self.vol = self._newVol(rating_list, RD_list, outcome_list, v)
|
||||
self._preRatingRD()
|
||||
|
||||
self.__rd = 1 / math.sqrt((1 / math.pow(self.__rd, 2)) + (1 / v))
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * \
|
||||
(outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
self.__rating += math.pow(self.__rd, 2) * tempSum
|
||||
|
||||
|
||||
def _newVol(self, rating_list, RD_list, outcome_list, v):
|
||||
|
||||
i = 0
|
||||
delta = self._delta(rating_list, RD_list, outcome_list, v)
|
||||
a = math.log(math.pow(self.vol, 2))
|
||||
tau = self._tau
|
||||
x0 = a
|
||||
x1 = 0
|
||||
|
||||
while x0 != x1:
|
||||
# New iteration, so x(i) becomes x(i-1)
|
||||
x0 = x1
|
||||
d = math.pow(self.__rating, 2) + v + math.exp(x0)
|
||||
h1 = -(x0 - a) / math.pow(tau, 2) - 0.5 * math.exp(x0) \
|
||||
/ d + 0.5 * math.exp(x0) * math.pow(delta / d, 2)
|
||||
h2 = -1 / math.pow(tau, 2) - 0.5 * math.exp(x0) * \
|
||||
(math.pow(self.__rating, 2) + v) \
|
||||
/ math.pow(d, 2) + 0.5 * math.pow(delta, 2) * math.exp(x0) \
|
||||
* (math.pow(self.__rating, 2) + v - math.exp(x0)) / math.pow(d, 3)
|
||||
x1 = x0 - (h1 / h2)
|
||||
|
||||
return math.exp(x1 / 2)
|
||||
|
||||
def _delta(self, rating_list, RD_list, outcome_list, v):
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * (outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
return v * tempSum
|
||||
|
||||
def _v(self, rating_list, RD_list):
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempE = self._E(rating_list[i], RD_list[i])
|
||||
tempSum += math.pow(self._g(RD_list[i]), 2) * tempE * (1 - tempE)
|
||||
return 1 / tempSum
|
||||
|
||||
def _E(self, p2rating, p2RD):
|
||||
|
||||
return 1 / (1 + math.exp(-1 * self._g(p2RD) * \
|
||||
(self.__rating - p2rating)))
|
||||
|
||||
def _g(self, RD):
|
||||
|
||||
return 1 / math.sqrt(1 + 3 * math.pow(RD, 2) / math.pow(math.pi, 2))
|
||||
|
||||
def did_not_compete(self):
|
||||
|
||||
self._preRatingRD()
|
1
analysis-master/analysis-amd64/build.sh
Executable file
1
analysis-master/analysis-amd64/build.sh
Executable file
@@ -0,0 +1 @@
|
||||
python setup.py sdist bdist_wheel || python3 setup.py sdist bdist_wheel
|
@@ -1,16 +1,43 @@
|
||||
# Titan Robotics Team 2022: Data Analysis Module
|
||||
# Written by Arthur Lu & Jacob Levine
|
||||
# Notes:
|
||||
# this should be imported as a python module using 'import analysis'
|
||||
# this should be imported as a python module using 'from analysis import analysis'
|
||||
# this should be included in the local directory or environment variable
|
||||
# this module has been optimized for multhreaded computing
|
||||
# current benchmark of optimization: 1.33 times faster
|
||||
# setup:
|
||||
|
||||
__version__ = "1.1.13.006"
|
||||
__version__ = "1.2.0.004"
|
||||
|
||||
# changelog should be viewed using print(analysis.__changelog__)
|
||||
__changelog__ = """changelog:
|
||||
1.2.0.004:
|
||||
- fixed __all__ to reflected the correct functions and classes
|
||||
- fixed CorrelationTests and StatisticalTests class functions to require self invocation
|
||||
- added missing math import
|
||||
- fixed KNN class functions to require self invocation
|
||||
- fixed Metrics class functions to require self invocation
|
||||
- various spelling fixes in CorrelationTests and StatisticalTests
|
||||
1.2.0.003:
|
||||
- bug fixes with CorrelationTests and StatisticalTests
|
||||
- moved glicko2 and trueskill to the metrics subpackage
|
||||
- moved elo to a new metrics subpackage
|
||||
1.2.0.002:
|
||||
- fixed docs
|
||||
1.2.0.001:
|
||||
- fixed docs
|
||||
1.2.0.000:
|
||||
- cleaned up wild card imports with scipy and sklearn
|
||||
- added CorrelationTests class
|
||||
- added StatisticalTests class
|
||||
- added several correlation tests to CorrelationTests
|
||||
- added several statistical tests to StatisticalTests
|
||||
1.1.13.009:
|
||||
- moved elo, glicko2, trueskill functions under class Metrics
|
||||
1.1.13.008:
|
||||
- moved Glicko2 to a seperate package
|
||||
1.1.13.007:
|
||||
- fixed bug with trueskill
|
||||
1.1.13.006:
|
||||
- cleaned up imports
|
||||
1.1.13.005:
|
||||
@@ -255,21 +282,19 @@ __all__ = [
|
||||
'z_normalize',
|
||||
'histo_analysis',
|
||||
'regression',
|
||||
'elo',
|
||||
'glicko2',
|
||||
'trueskill',
|
||||
'Metrics',
|
||||
'RegressionMetrics',
|
||||
'ClassificationMetrics',
|
||||
'kmeans',
|
||||
'pca',
|
||||
'decisiontree',
|
||||
'knn_classifier',
|
||||
'knn_regressor',
|
||||
'KNN',
|
||||
'NaiveBayes',
|
||||
'SVM',
|
||||
'random_forest_classifier',
|
||||
'random_forest_regressor',
|
||||
'Glicko2',
|
||||
'CorrelationTests',
|
||||
'StatisticalTests',
|
||||
# all statistics functions left out due to integration in other functions
|
||||
]
|
||||
|
||||
@@ -278,14 +303,17 @@ __all__ = [
|
||||
# imports (now in alphabetical order! v 1.0.3.006):
|
||||
|
||||
import csv
|
||||
from analysis.metrics import elo as Elo
|
||||
from analysis.metrics import glicko2 as Glicko2
|
||||
import math
|
||||
import numba
|
||||
from numba import jit
|
||||
import numpy as np
|
||||
import scipy
|
||||
from scipy import *
|
||||
from scipy import optimize, stats
|
||||
import sklearn
|
||||
from sklearn import *
|
||||
from analysis import trueskill as Trueskill
|
||||
from sklearn import preprocessing, pipeline, linear_model, metrics, cluster, decomposition, tree, neighbors, naive_bayes, svm, model_selection, ensemble
|
||||
from analysis.metrics import trueskill as Trueskill
|
||||
|
||||
class error(ValueError):
|
||||
pass
|
||||
@@ -442,32 +470,32 @@ def regression(inputs, outputs, args): # inputs, outputs expects N-D array
|
||||
|
||||
return regressions
|
||||
|
||||
def elo(starting_score, opposing_score, observed, N, K):
|
||||
class Metrics:
|
||||
|
||||
expected = 1/(1+10**((np.array(opposing_score) - starting_score)/N))
|
||||
def elo(self, starting_score, opposing_score, observed, N, K):
|
||||
|
||||
return starting_score + K*(np.sum(observed) - np.sum(expected))
|
||||
return Elo.calculate(starting_score, opposing_score, observed, N, K)
|
||||
|
||||
def glicko2(starting_score, starting_rd, starting_vol, opposing_score, opposing_rd, observations):
|
||||
def glicko2(self, starting_score, starting_rd, starting_vol, opposing_score, opposing_rd, observations):
|
||||
|
||||
player = Glicko2(rating = starting_score, rd = starting_rd, vol = starting_vol)
|
||||
player = Glicko2.Glicko2(rating = starting_score, rd = starting_rd, vol = starting_vol)
|
||||
|
||||
player.update_player([x for x in opposing_score], [x for x in opposing_rd], observations)
|
||||
player.update_player([x for x in opposing_score], [x for x in opposing_rd], observations)
|
||||
|
||||
return (player.rating, player.rd, player.vol)
|
||||
return (player.rating, player.rd, player.vol)
|
||||
|
||||
def trueskill(teams_data, observations): # teams_data is array of array of tuples ie. [[(mu, sigma), (mu, sigma), (mu, sigma)], [(mu, sigma), (mu, sigma), (mu, sigma)]]
|
||||
def trueskill(self, teams_data, observations): # teams_data is array of array of tuples ie. [[(mu, sigma), (mu, sigma), (mu, sigma)], [(mu, sigma), (mu, sigma), (mu, sigma)]]
|
||||
|
||||
team_ratings = []
|
||||
team_ratings = []
|
||||
|
||||
for team in teams_data:
|
||||
team_temp = []
|
||||
for player in team:
|
||||
player = Trueskill.Rating(player[0], player[1])
|
||||
team_temp.append(player)
|
||||
team_ratings.append(team_temp)
|
||||
for team in teams_data:
|
||||
team_temp = ()
|
||||
for player in team:
|
||||
player = Trueskill.Rating(player[0], player[1])
|
||||
team_temp = team_temp + (player,)
|
||||
team_ratings.append(team_temp)
|
||||
|
||||
return Trueskill.rate(teams_data, observations)
|
||||
return Trueskill.rate(team_ratings, ranks=observations)
|
||||
|
||||
class RegressionMetrics():
|
||||
|
||||
@@ -559,24 +587,25 @@ def decisiontree(data, labels, test_size = 0.3, criterion = "gini", splitter = "
|
||||
|
||||
return model, metrics
|
||||
|
||||
@jit(forceobj=True)
|
||||
def knn_classifier(data, labels, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, n_neighbors=5, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling
|
||||
class KNN:
|
||||
|
||||
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
|
||||
model = sklearn.neighbors.KNeighborsClassifier()
|
||||
model.fit(data_train, labels_train)
|
||||
predictions = model.predict(data_test)
|
||||
def knn_classifier(self, data, labels, test_size = 0.3, algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=None, n_neighbors=5, p=2, weights='uniform'): #expects *2d data and 1d labels post-scaling
|
||||
|
||||
return model, ClassificationMetrics(predictions, labels_test)
|
||||
data_train, data_test, labels_train, labels_test = sklearn.model_selection.train_test_split(data, labels, test_size=test_size, random_state=1)
|
||||
model = sklearn.neighbors.KNeighborsClassifier()
|
||||
model.fit(data_train, labels_train)
|
||||
predictions = model.predict(data_test)
|
||||
|
||||
def knn_regressor(data, outputs, test_size, n_neighbors = 5, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None):
|
||||
return model, ClassificationMetrics(predictions, labels_test)
|
||||
|
||||
data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1)
|
||||
model = sklearn.neighbors.KNeighborsRegressor(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs)
|
||||
model.fit(data_train, outputs_train)
|
||||
predictions = model.predict(data_test)
|
||||
def knn_regressor(self, data, outputs, test_size, n_neighbors = 5, weights = "uniform", algorithm = "auto", leaf_size = 30, p = 2, metric = "minkowski", metric_params = None, n_jobs = None):
|
||||
|
||||
return model, RegressionMetrics(predictions, outputs_test)
|
||||
data_train, data_test, outputs_train, outputs_test = sklearn.model_selection.train_test_split(data, outputs, test_size=test_size, random_state=1)
|
||||
model = sklearn.neighbors.KNeighborsRegressor(n_neighbors = n_neighbors, weights = weights, algorithm = algorithm, leaf_size = leaf_size, p = p, metric = metric, metric_params = metric_params, n_jobs = n_jobs)
|
||||
model.fit(data_train, outputs_train)
|
||||
predictions = model.predict(data_test)
|
||||
|
||||
return model, RegressionMetrics(predictions, outputs_test)
|
||||
|
||||
class NaiveBayes:
|
||||
|
||||
@@ -690,101 +719,205 @@ def random_forest_regressor(data, outputs, test_size, n_estimators="warn", crite
|
||||
|
||||
return kernel, RegressionMetrics(predictions, outputs_test)
|
||||
|
||||
class Glicko2:
|
||||
class CorrelationTests:
|
||||
|
||||
_tau = 0.5
|
||||
def anova_oneway(self, *args): #expects arrays of samples
|
||||
|
||||
def getRating(self):
|
||||
return (self.__rating * 173.7178) + 1500
|
||||
results = scipy.stats.f_oneway(*args)
|
||||
return {"F-value": results[0], "p-value": results[1]}
|
||||
|
||||
def setRating(self, rating):
|
||||
self.__rating = (rating - 1500) / 173.7178
|
||||
def pearson(self, x, y):
|
||||
|
||||
rating = property(getRating, setRating)
|
||||
results = scipy.stats.pearsonr(x, y)
|
||||
return {"r-value": results[0], "p-value": results[1]}
|
||||
|
||||
def getRd(self):
|
||||
return self.__rd * 173.7178
|
||||
def spearman(self, a, b = None, axis = 0, nan_policy = 'propagate'):
|
||||
|
||||
def setRd(self, rd):
|
||||
self.__rd = rd / 173.7178
|
||||
results = scipy.stats.spearmanr(a, b = b, axis = axis, nan_policy = nan_policy)
|
||||
return {"r-value": results[0], "p-value": results[1]}
|
||||
|
||||
rd = property(getRd, setRd)
|
||||
|
||||
def __init__(self, rating = 1500, rd = 350, vol = 0.06):
|
||||
def point_biserial(self, x,y):
|
||||
|
||||
self.setRating(rating)
|
||||
self.setRd(rd)
|
||||
self.vol = vol
|
||||
|
||||
def _preRatingRD(self):
|
||||
results = scipy.stats.pointbiserialr(x, y)
|
||||
return {"r-value": results[0], "p-value": results[1]}
|
||||
|
||||
self.__rd = math.sqrt(math.pow(self.__rd, 2) + math.pow(self.vol, 2))
|
||||
def kendall(self, x, y, initial_lexsort = None, nan_policy = 'propagate', method = 'auto'):
|
||||
|
||||
results = scipy.stats.kendalltau(x, y, initial_lexsort = initial_lexsort, nan_policy = nan_policy, method = method)
|
||||
return {"tau": results[0], "p-value": results[1]}
|
||||
|
||||
def kendall_weighted(self, x, y, rank = True, weigher = None, additive = True):
|
||||
|
||||
results = scipy.stats.weightedtau(x, y, rank = rank, weigher = weigher, additive = additive)
|
||||
return {"tau": results[0], "p-value": results[1]}
|
||||
|
||||
def mgc(self, x, y, compute_distance = None, reps = 1000, workers = 1, is_twosamp = False, random_state = None):
|
||||
|
||||
results = scipy.stats.multiscale_graphcorr(x, y, compute_distance = compute_distance, reps = reps, workers = workers, is_twosamp = is_twosamp, random_state = random_state)
|
||||
return {"k-value": results[0], "p-value": results[1], "data": results[2]} # unsure if MGC test returns a k value
|
||||
|
||||
class StatisticalTests:
|
||||
|
||||
def ttest_onesample(self, a, popmean, axis = 0, nan_policy = 'propagate'):
|
||||
|
||||
results = scipy.stats.ttest_1samp(a, popmean, axis = axis, nan_policy = nan_policy)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def ttest_independent(self, a, b, equal = True, nan_policy = 'propagate'):
|
||||
|
||||
results = scipy.stats.ttest_ind(a, b, equal_var = equal, nan_policy = nan_policy)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def ttest_statistic(self, o1, o2, equal = True):
|
||||
|
||||
results = scipy.stats.ttest_ind_from_stats(o1["mean"], o1["std"], o1["nobs"], o2["mean"], o2["std"], o2["nobs"], equal_var = equal)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def ttest_related(self, a, b, axis = 0, nan_policy='propagate'):
|
||||
|
||||
results = scipy.stats.ttest_rel(a, b, axis = axis, nan_policy = nan_policy)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def ks_fitness(self, rvs, cdf, args = (), N = 20, alternative = 'two-sided', mode = 'approx'):
|
||||
|
||||
results = scipy.stats.kstest(rvs, cdf, args = args, N = N, alternative = alternative, mode = mode)
|
||||
return {"ks-value": results[0], "p-value": results[1]}
|
||||
|
||||
def chisquare(self, f_obs, f_exp = None, ddof = None, axis = 0):
|
||||
|
||||
results = scipy.stats.chisquare(f_obs, f_exp = f_exp, ddof = ddof, axis = axis)
|
||||
return {"chisquared-value": results[0], "p-value": results[1]}
|
||||
|
||||
def powerdivergence(self, f_obs, f_exp = None, ddof = None, axis = 0, lambda_ = None):
|
||||
|
||||
results = scipy.stats.power_divergence(f_obs, f_exp = f_exp, ddof = ddof, axis = axis, lambda_ = lambda_)
|
||||
return {"powerdivergence-value": results[0], "p-value": results[1]}
|
||||
|
||||
def ks_twosample(self, x, y, alternative = 'two_sided', mode = 'auto'):
|
||||
|
||||
def update_player(self, rating_list, RD_list, outcome_list):
|
||||
results = scipy.stats.ks_2samp(x, y, alternative = alternative, mode = mode)
|
||||
return {"ks-value": results[0], "p-value": results[1]}
|
||||
|
||||
rating_list = [(x - 1500) / 173.7178 for x in rating_list]
|
||||
RD_list = [x / 173.7178 for x in RD_list]
|
||||
def es_twosample(self, x, y, t = (0.4, 0.8)):
|
||||
|
||||
v = self._v(rating_list, RD_list)
|
||||
self.vol = self._newVol(rating_list, RD_list, outcome_list, v)
|
||||
self._preRatingRD()
|
||||
|
||||
self.__rd = 1 / math.sqrt((1 / math.pow(self.__rd, 2)) + (1 / v))
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * \
|
||||
(outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
self.__rating += math.pow(self.__rd, 2) * tempSum
|
||||
|
||||
|
||||
def _newVol(self, rating_list, RD_list, outcome_list, v):
|
||||
results = scipy.stats.epps_singleton_2samp(x, y, t = t)
|
||||
return {"es-value": results[0], "p-value": results[1]}
|
||||
|
||||
i = 0
|
||||
delta = self._delta(rating_list, RD_list, outcome_list, v)
|
||||
a = math.log(math.pow(self.vol, 2))
|
||||
tau = self._tau
|
||||
x0 = a
|
||||
x1 = 0
|
||||
|
||||
while x0 != x1:
|
||||
# New iteration, so x(i) becomes x(i-1)
|
||||
x0 = x1
|
||||
d = math.pow(self.__rating, 2) + v + math.exp(x0)
|
||||
h1 = -(x0 - a) / math.pow(tau, 2) - 0.5 * math.exp(x0) \
|
||||
/ d + 0.5 * math.exp(x0) * math.pow(delta / d, 2)
|
||||
h2 = -1 / math.pow(tau, 2) - 0.5 * math.exp(x0) * \
|
||||
(math.pow(self.__rating, 2) + v) \
|
||||
/ math.pow(d, 2) + 0.5 * math.pow(delta, 2) * math.exp(x0) \
|
||||
* (math.pow(self.__rating, 2) + v - math.exp(x0)) / math.pow(d, 3)
|
||||
x1 = x0 - (h1 / h2)
|
||||
def mw_rank(self, x, y, use_continuity = True, alternative = None):
|
||||
|
||||
return math.exp(x1 / 2)
|
||||
|
||||
def _delta(self, rating_list, RD_list, outcome_list, v):
|
||||
results = scipy.stats.mannwhitneyu(x, y, use_continuity = use_continuity, alternative = alternative)
|
||||
return {"u-value": results[0], "p-value": results[1]}
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * (outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
return v * tempSum
|
||||
|
||||
def _v(self, rating_list, RD_list):
|
||||
def mw_tiecorrection(self, rank_values):
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempE = self._E(rating_list[i], RD_list[i])
|
||||
tempSum += math.pow(self._g(RD_list[i]), 2) * tempE * (1 - tempE)
|
||||
return 1 / tempSum
|
||||
|
||||
def _E(self, p2rating, p2RD):
|
||||
results = scipy.stats.tiecorrect(rank_values)
|
||||
return {"correction-factor": results}
|
||||
|
||||
return 1 / (1 + math.exp(-1 * self._g(p2RD) * \
|
||||
(self.__rating - p2rating)))
|
||||
|
||||
def _g(self, RD):
|
||||
def rankdata(self, a, method = 'average'):
|
||||
|
||||
return 1 / math.sqrt(1 + 3 * math.pow(RD, 2) / math.pow(math.pi, 2))
|
||||
|
||||
def did_not_compete(self):
|
||||
results = scipy.stats.rankdata(a, method = method)
|
||||
return results
|
||||
|
||||
self._preRatingRD()
|
||||
def wilcoxon_ranksum(self, a, b): # this seems to be superceded by Mann Whitney Wilcoxon U Test
|
||||
|
||||
results = scipy.stats.ranksums(a, b)
|
||||
return {"u-value": results[0], "p-value": results[1]}
|
||||
|
||||
def wilcoxon_signedrank(self, x, y = None, zero_method = 'wilcox', correction = False, alternative = 'two-sided'):
|
||||
|
||||
results = scipy.stats.wilcoxon(x, y = y, zero_method = zero_method, correction = correction, alternative = alternative)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def kw_htest(self, *args, nan_policy = 'propagate'):
|
||||
|
||||
results = scipy.stats.kruskal(*args, nan_policy = nan_policy)
|
||||
return {"h-value": results[0], "p-value": results[1]}
|
||||
|
||||
def friedman_chisquare(self, *args):
|
||||
|
||||
results = scipy.stats.friedmanchisquare(*args)
|
||||
return {"chisquared-value": results[0], "p-value": results[1]}
|
||||
|
||||
def bm_wtest(self, x, y, alternative = 'two-sided', distribution = 't', nan_policy = 'propagate'):
|
||||
|
||||
results = scipy.stats.brunnermunzel(x, y, alternative = alternative, distribution = distribution, nan_policy = nan_policy)
|
||||
return {"w-value": results[0], "p-value": results[1]}
|
||||
|
||||
def combine_pvalues(self, pvalues, method = 'fisher', weights = None):
|
||||
|
||||
results = scipy.stats.combine_pvalues(pvalues, method = method, weights = weights)
|
||||
return {"combined-statistic": results[0], "p-value": results[1]}
|
||||
|
||||
def jb_fitness(self, x):
|
||||
|
||||
results = scipy.stats.jarque_bera(x)
|
||||
return {"jb-value": results[0], "p-value": results[1]}
|
||||
|
||||
def ab_equality(self, x, y):
|
||||
|
||||
results = scipy.stats.ansari(x, y)
|
||||
return {"ab-value": results[0], "p-value": results[1]}
|
||||
|
||||
def bartlett_variance(self, *args):
|
||||
|
||||
results = scipy.stats.bartlett(*args)
|
||||
return {"t-value": results[0], "p-value": results[1]}
|
||||
|
||||
def levene_variance(self, *args, center = 'median', proportiontocut = 0.05):
|
||||
|
||||
results = scipy.stats.levene(*args, center = center, proportiontocut = proportiontocut)
|
||||
return {"w-value": results[0], "p-value": results[1]}
|
||||
|
||||
def sw_normality(self, x):
|
||||
|
||||
results = scipy.stats.shapiro(x)
|
||||
return {"w-value": results[0], "p-value": results[1]}
|
||||
|
||||
def shapiro(self, x):
|
||||
|
||||
return "destroyed by facts and logic"
|
||||
|
||||
def ad_onesample(self, x, dist = 'norm'):
|
||||
|
||||
results = scipy.stats.anderson(x, dist = dist)
|
||||
return {"d-value": results[0], "critical-values": results[1], "significance-value": results[2]}
|
||||
|
||||
def ad_ksample(self, samples, midrank = True):
|
||||
|
||||
results = scipy.stats.anderson_ksamp(samples, midrank = midrank)
|
||||
return {"d-value": results[0], "critical-values": results[1], "significance-value": results[2]}
|
||||
|
||||
def binomial(self, x, n = None, p = 0.5, alternative = 'two-sided'):
|
||||
|
||||
results = scipy.stats.binom_test(x, n = n, p = p, alternative = alternative)
|
||||
return {"p-value": results}
|
||||
|
||||
def fk_variance(self, *args, center = 'median', proportiontocut = 0.05):
|
||||
|
||||
results = scipy.stats.fligner(*args, center = center, proportiontocut = proportiontocut)
|
||||
return {"h-value": results[0], "p-value": results[1]} # unknown if the statistic is an h value
|
||||
|
||||
def mood_mediantest(self, *args, ties = 'below', correction = True, lambda_ = 1, nan_policy = 'propagate'):
|
||||
|
||||
results = scipy.stats.median_test(*args, ties = ties, correction = correction, lambda_ = lambda_, nan_policy = nan_policy)
|
||||
return {"chisquared-value": results[0], "p-value": results[1], "m-value": results[2], "table": results[3]}
|
||||
|
||||
def mood_equalscale(self, x, y, axis = 0):
|
||||
|
||||
results = scipy.stats.mood(x, y, axis = axis)
|
||||
return {"z-score": results[0], "p-value": results[1]}
|
||||
|
||||
def skewtest(self, a, axis = 0, nan_policy = 'propogate'):
|
||||
|
||||
results = scipy.stats.skewtest(a, axis = axis, nan_policy = nan_policy)
|
||||
return {"z-score": results[0], "p-value": results[1]}
|
||||
|
||||
def kurtosistest(self, a, axis = 0, nan_policy = 'propogate'):
|
||||
|
||||
results = scipy.stats.kurtosistest(a, axis = axis, nan_policy = nan_policy)
|
||||
return {"z-score": results[0], "p-value": results[1]}
|
||||
|
||||
def normaltest(self, a, axis = 0, nan_policy = 'propogate'):
|
||||
|
||||
results = scipy.stats.normaltest(a, axis = axis, nan_policy = nan_policy)
|
||||
return {"z-score": results[0], "p-value": results[1]}
|
99
analysis-master/analysis-amd64/build/lib/analysis/glicko2.py
Normal file
99
analysis-master/analysis-amd64/build/lib/analysis/glicko2.py
Normal file
@@ -0,0 +1,99 @@
|
||||
import math
|
||||
|
||||
class Glicko2:
|
||||
_tau = 0.5
|
||||
|
||||
def getRating(self):
|
||||
return (self.__rating * 173.7178) + 1500
|
||||
|
||||
def setRating(self, rating):
|
||||
self.__rating = (rating - 1500) / 173.7178
|
||||
|
||||
rating = property(getRating, setRating)
|
||||
|
||||
def getRd(self):
|
||||
return self.__rd * 173.7178
|
||||
|
||||
def setRd(self, rd):
|
||||
self.__rd = rd / 173.7178
|
||||
|
||||
rd = property(getRd, setRd)
|
||||
|
||||
def __init__(self, rating = 1500, rd = 350, vol = 0.06):
|
||||
|
||||
self.setRating(rating)
|
||||
self.setRd(rd)
|
||||
self.vol = vol
|
||||
|
||||
def _preRatingRD(self):
|
||||
|
||||
self.__rd = math.sqrt(math.pow(self.__rd, 2) + math.pow(self.vol, 2))
|
||||
|
||||
def update_player(self, rating_list, RD_list, outcome_list):
|
||||
|
||||
rating_list = [(x - 1500) / 173.7178 for x in rating_list]
|
||||
RD_list = [x / 173.7178 for x in RD_list]
|
||||
|
||||
v = self._v(rating_list, RD_list)
|
||||
self.vol = self._newVol(rating_list, RD_list, outcome_list, v)
|
||||
self._preRatingRD()
|
||||
|
||||
self.__rd = 1 / math.sqrt((1 / math.pow(self.__rd, 2)) + (1 / v))
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * \
|
||||
(outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
self.__rating += math.pow(self.__rd, 2) * tempSum
|
||||
|
||||
|
||||
def _newVol(self, rating_list, RD_list, outcome_list, v):
|
||||
|
||||
i = 0
|
||||
delta = self._delta(rating_list, RD_list, outcome_list, v)
|
||||
a = math.log(math.pow(self.vol, 2))
|
||||
tau = self._tau
|
||||
x0 = a
|
||||
x1 = 0
|
||||
|
||||
while x0 != x1:
|
||||
# New iteration, so x(i) becomes x(i-1)
|
||||
x0 = x1
|
||||
d = math.pow(self.__rating, 2) + v + math.exp(x0)
|
||||
h1 = -(x0 - a) / math.pow(tau, 2) - 0.5 * math.exp(x0) \
|
||||
/ d + 0.5 * math.exp(x0) * math.pow(delta / d, 2)
|
||||
h2 = -1 / math.pow(tau, 2) - 0.5 * math.exp(x0) * \
|
||||
(math.pow(self.__rating, 2) + v) \
|
||||
/ math.pow(d, 2) + 0.5 * math.pow(delta, 2) * math.exp(x0) \
|
||||
* (math.pow(self.__rating, 2) + v - math.exp(x0)) / math.pow(d, 3)
|
||||
x1 = x0 - (h1 / h2)
|
||||
|
||||
return math.exp(x1 / 2)
|
||||
|
||||
def _delta(self, rating_list, RD_list, outcome_list, v):
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * (outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
return v * tempSum
|
||||
|
||||
def _v(self, rating_list, RD_list):
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempE = self._E(rating_list[i], RD_list[i])
|
||||
tempSum += math.pow(self._g(RD_list[i]), 2) * tempE * (1 - tempE)
|
||||
return 1 / tempSum
|
||||
|
||||
def _E(self, p2rating, p2RD):
|
||||
|
||||
return 1 / (1 + math.exp(-1 * self._g(p2RD) * \
|
||||
(self.__rating - p2rating)))
|
||||
|
||||
def _g(self, RD):
|
||||
|
||||
return 1 / math.sqrt(1 + 3 * math.pow(RD, 2) / math.pow(math.pi, 2))
|
||||
|
||||
def did_not_compete(self):
|
||||
|
||||
self._preRatingRD()
|
@@ -0,0 +1,7 @@
|
||||
import numpy as np
|
||||
|
||||
def calculate(starting_score, opposing_score, observed, N, K):
|
||||
|
||||
expected = 1/(1+10**((np.array(opposing_score) - starting_score)/N))
|
||||
|
||||
return starting_score + K*(np.sum(observed) - np.sum(expected))
|
@@ -0,0 +1,99 @@
|
||||
import math
|
||||
|
||||
class Glicko2:
|
||||
_tau = 0.5
|
||||
|
||||
def getRating(self):
|
||||
return (self.__rating * 173.7178) + 1500
|
||||
|
||||
def setRating(self, rating):
|
||||
self.__rating = (rating - 1500) / 173.7178
|
||||
|
||||
rating = property(getRating, setRating)
|
||||
|
||||
def getRd(self):
|
||||
return self.__rd * 173.7178
|
||||
|
||||
def setRd(self, rd):
|
||||
self.__rd = rd / 173.7178
|
||||
|
||||
rd = property(getRd, setRd)
|
||||
|
||||
def __init__(self, rating = 1500, rd = 350, vol = 0.06):
|
||||
|
||||
self.setRating(rating)
|
||||
self.setRd(rd)
|
||||
self.vol = vol
|
||||
|
||||
def _preRatingRD(self):
|
||||
|
||||
self.__rd = math.sqrt(math.pow(self.__rd, 2) + math.pow(self.vol, 2))
|
||||
|
||||
def update_player(self, rating_list, RD_list, outcome_list):
|
||||
|
||||
rating_list = [(x - 1500) / 173.7178 for x in rating_list]
|
||||
RD_list = [x / 173.7178 for x in RD_list]
|
||||
|
||||
v = self._v(rating_list, RD_list)
|
||||
self.vol = self._newVol(rating_list, RD_list, outcome_list, v)
|
||||
self._preRatingRD()
|
||||
|
||||
self.__rd = 1 / math.sqrt((1 / math.pow(self.__rd, 2)) + (1 / v))
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * \
|
||||
(outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
self.__rating += math.pow(self.__rd, 2) * tempSum
|
||||
|
||||
|
||||
def _newVol(self, rating_list, RD_list, outcome_list, v):
|
||||
|
||||
i = 0
|
||||
delta = self._delta(rating_list, RD_list, outcome_list, v)
|
||||
a = math.log(math.pow(self.vol, 2))
|
||||
tau = self._tau
|
||||
x0 = a
|
||||
x1 = 0
|
||||
|
||||
while x0 != x1:
|
||||
# New iteration, so x(i) becomes x(i-1)
|
||||
x0 = x1
|
||||
d = math.pow(self.__rating, 2) + v + math.exp(x0)
|
||||
h1 = -(x0 - a) / math.pow(tau, 2) - 0.5 * math.exp(x0) \
|
||||
/ d + 0.5 * math.exp(x0) * math.pow(delta / d, 2)
|
||||
h2 = -1 / math.pow(tau, 2) - 0.5 * math.exp(x0) * \
|
||||
(math.pow(self.__rating, 2) + v) \
|
||||
/ math.pow(d, 2) + 0.5 * math.pow(delta, 2) * math.exp(x0) \
|
||||
* (math.pow(self.__rating, 2) + v - math.exp(x0)) / math.pow(d, 3)
|
||||
x1 = x0 - (h1 / h2)
|
||||
|
||||
return math.exp(x1 / 2)
|
||||
|
||||
def _delta(self, rating_list, RD_list, outcome_list, v):
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempSum += self._g(RD_list[i]) * (outcome_list[i] - self._E(rating_list[i], RD_list[i]))
|
||||
return v * tempSum
|
||||
|
||||
def _v(self, rating_list, RD_list):
|
||||
|
||||
tempSum = 0
|
||||
for i in range(len(rating_list)):
|
||||
tempE = self._E(rating_list[i], RD_list[i])
|
||||
tempSum += math.pow(self._g(RD_list[i]), 2) * tempE * (1 - tempE)
|
||||
return 1 / tempSum
|
||||
|
||||
def _E(self, p2rating, p2RD):
|
||||
|
||||
return 1 / (1 + math.exp(-1 * self._g(p2RD) * \
|
||||
(self.__rating - p2rating)))
|
||||
|
||||
def _g(self, RD):
|
||||
|
||||
return 1 / math.sqrt(1 + 3 * math.pow(RD, 2) / math.pow(math.pi, 2))
|
||||
|
||||
def did_not_compete(self):
|
||||
|
||||
self._preRatingRD()
|
907
analysis-master/analysis-amd64/build/lib/analysis/trueskill.py
Normal file
907
analysis-master/analysis-amd64/build/lib/analysis/trueskill.py
Normal file
@@ -0,0 +1,907 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from itertools import chain
|
||||
import math
|
||||
|
||||
from six import iteritems
|
||||
from six.moves import map, range, zip
|
||||
from six import iterkeys
|
||||
|
||||
import copy
|
||||
try:
|
||||
from numbers import Number
|
||||
except ImportError:
|
||||
Number = (int, long, float, complex)
|
||||
|
||||
inf = float('inf')
|
||||
|
||||
class Gaussian(object):
|
||||
#: Precision, the inverse of the variance.
|
||||
pi = 0
|
||||
#: Precision adjusted mean, the precision multiplied by the mean.
|
||||
tau = 0
|
||||
|
||||
def __init__(self, mu=None, sigma=None, pi=0, tau=0):
|
||||
if mu is not None:
|
||||
if sigma is None:
|
||||
raise TypeError('sigma argument is needed')
|
||||
elif sigma == 0:
|
||||
raise ValueError('sigma**2 should be greater than 0')
|
||||
pi = sigma ** -2
|
||||
tau = pi * mu
|
||||
self.pi = pi
|
||||
self.tau = tau
|
||||
|
||||
@property
|
||||
def mu(self):
|
||||
return self.pi and self.tau / self.pi
|
||||
|
||||
@property
|
||||
def sigma(self):
|
||||
return math.sqrt(1 / self.pi) if self.pi else inf
|
||||
|
||||
def __mul__(self, other):
|
||||
pi, tau = self.pi + other.pi, self.tau + other.tau
|
||||
return Gaussian(pi=pi, tau=tau)
|
||||
|
||||
def __truediv__(self, other):
|
||||
pi, tau = self.pi - other.pi, self.tau - other.tau
|
||||
return Gaussian(pi=pi, tau=tau)
|
||||
|
||||
__div__ = __truediv__ # for Python 2
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.pi == other.pi and self.tau == other.tau
|
||||
|
||||
def __lt__(self, other):
|
||||
return self.mu < other.mu
|
||||
|
||||
def __le__(self, other):
|
||||
return self.mu <= other.mu
|
||||
|
||||
def __gt__(self, other):
|
||||
return self.mu > other.mu
|
||||
|
||||
def __ge__(self, other):
|
||||
return self.mu >= other.mu
|
||||
|
||||
def __repr__(self):
|
||||
return 'N(mu={:.3f}, sigma={:.3f})'.format(self.mu, self.sigma)
|
||||
|
||||
def _repr_latex_(self):
|
||||
latex = r'\mathcal{{ N }}( {:.3f}, {:.3f}^2 )'.format(self.mu, self.sigma)
|
||||
return '$%s$' % latex
|
||||
|
||||
class Matrix(list):
|
||||
def __init__(self, src, height=None, width=None):
|
||||
if callable(src):
|
||||
f, src = src, {}
|
||||
size = [height, width]
|
||||
if not height:
|
||||
def set_height(height):
|
||||
size[0] = height
|
||||
size[0] = set_height
|
||||
if not width:
|
||||
def set_width(width):
|
||||
size[1] = width
|
||||
size[1] = set_width
|
||||
try:
|
||||
for (r, c), val in f(*size):
|
||||
src[r, c] = val
|
||||
except TypeError:
|
||||
raise TypeError('A callable src must return an interable '
|
||||
'which generates a tuple containing '
|
||||
'coordinate and value')
|
||||
height, width = tuple(size)
|
||||
if height is None or width is None:
|
||||
raise TypeError('A callable src must call set_height and '
|
||||
'set_width if the size is non-deterministic')
|
||||
if isinstance(src, list):
|
||||
is_number = lambda x: isinstance(x, Number)
|
||||
unique_col_sizes = set(map(len, src))
|
||||
everything_are_number = filter(is_number, sum(src, []))
|
||||
if len(unique_col_sizes) != 1 or not everything_are_number:
|
||||
raise ValueError('src must be a rectangular array of numbers')
|
||||
two_dimensional_array = src
|
||||
elif isinstance(src, dict):
|
||||
if not height or not width:
|
||||
w = h = 0
|
||||
for r, c in iterkeys(src):
|
||||
if not height:
|
||||
h = max(h, r + 1)
|
||||
if not width:
|
||||
w = max(w, c + 1)
|
||||
if not height:
|
||||
height = h
|
||||
if not width:
|
||||
width = w
|
||||
two_dimensional_array = []
|
||||
for r in range(height):
|
||||
row = []
|
||||
two_dimensional_array.append(row)
|
||||
for c in range(width):
|
||||
row.append(src.get((r, c), 0))
|
||||
else:
|
||||
raise TypeError('src must be a list or dict or callable')
|
||||
super(Matrix, self).__init__(two_dimensional_array)
|
||||
|
||||
@property
|
||||
def height(self):
|
||||
return len(self)
|
||||
|
||||
@property
|
||||
def width(self):
|
||||
return len(self[0])
|
||||
|
||||
def transpose(self):
|
||||
height, width = self.height, self.width
|
||||
src = {}
|
||||
for c in range(width):
|
||||
for r in range(height):
|
||||
src[c, r] = self[r][c]
|
||||
return type(self)(src, height=width, width=height)
|
||||
|
||||
def minor(self, row_n, col_n):
|
||||
height, width = self.height, self.width
|
||||
if not (0 <= row_n < height):
|
||||
raise ValueError('row_n should be between 0 and %d' % height)
|
||||
elif not (0 <= col_n < width):
|
||||
raise ValueError('col_n should be between 0 and %d' % width)
|
||||
two_dimensional_array = []
|
||||
for r in range(height):
|
||||
if r == row_n:
|
||||
continue
|
||||
row = []
|
||||
two_dimensional_array.append(row)
|
||||
for c in range(width):
|
||||
if c == col_n:
|
||||
continue
|
||||
row.append(self[r][c])
|
||||
return type(self)(two_dimensional_array)
|
||||
|
||||
def determinant(self):
|
||||
height, width = self.height, self.width
|
||||
if height != width:
|
||||
raise ValueError('Only square matrix can calculate a determinant')
|
||||
tmp, rv = copy.deepcopy(self), 1.
|
||||
for c in range(width - 1, 0, -1):
|
||||
pivot, r = max((abs(tmp[r][c]), r) for r in range(c + 1))
|
||||
pivot = tmp[r][c]
|
||||
if not pivot:
|
||||
return 0.
|
||||
tmp[r], tmp[c] = tmp[c], tmp[r]
|
||||
if r != c:
|
||||
rv = -rv
|
||||
rv *= pivot
|
||||
fact = -1. / pivot
|
||||
for r in range(c):
|
||||
f = fact * tmp[r][c]
|
||||
for x in range(c):
|
||||
tmp[r][x] += f * tmp[c][x]
|
||||
return rv * tmp[0][0]
|
||||
|
||||
def adjugate(self):
|
||||
height, width = self.height, self.width
|
||||
if height != width:
|
||||
raise ValueError('Only square matrix can be adjugated')
|
||||
if height == 2:
|
||||
a, b = self[0][0], self[0][1]
|
||||
c, d = self[1][0], self[1][1]
|
||||
return type(self)([[d, -b], [-c, a]])
|
||||
src = {}
|
||||
for r in range(height):
|
||||
for c in range(width):
|
||||
sign = -1 if (r + c) % 2 else 1
|
||||
src[r, c] = self.minor(r, c).determinant() * sign
|
||||
return type(self)(src, height, width)
|
||||
|
||||
def inverse(self):
|
||||
if self.height == self.width == 1:
|
||||
return type(self)([[1. / self[0][0]]])
|
||||
return (1. / self.determinant()) * self.adjugate()
|
||||
|
||||
def __add__(self, other):
|
||||
height, width = self.height, self.width
|
||||
if (height, width) != (other.height, other.width):
|
||||
raise ValueError('Must be same size')
|
||||
src = {}
|
||||
for r in range(height):
|
||||
for c in range(width):
|
||||
src[r, c] = self[r][c] + other[r][c]
|
||||
return type(self)(src, height, width)
|
||||
|
||||
def __mul__(self, other):
|
||||
if self.width != other.height:
|
||||
raise ValueError('Bad size')
|
||||
height, width = self.height, other.width
|
||||
src = {}
|
||||
for r in range(height):
|
||||
for c in range(width):
|
||||
src[r, c] = sum(self[r][x] * other[x][c]
|
||||
for x in range(self.width))
|
||||
return type(self)(src, height, width)
|
||||
|
||||
def __rmul__(self, other):
|
||||
if not isinstance(other, Number):
|
||||
raise TypeError('The operand should be a number')
|
||||
height, width = self.height, self.width
|
||||
src = {}
|
||||
for r in range(height):
|
||||
for c in range(width):
|
||||
src[r, c] = other * self[r][c]
|
||||
return type(self)(src, height, width)
|
||||
|
||||
def __repr__(self):
|
||||
return '{}({})'.format(type(self).__name__, super(Matrix, self).__repr__())
|
||||
|
||||
def _repr_latex_(self):
|
||||
rows = [' && '.join(['%.3f' % cell for cell in row]) for row in self]
|
||||
latex = r'\begin{matrix} %s \end{matrix}' % r'\\'.join(rows)
|
||||
return '$%s$' % latex
|
||||
|
||||
def _gen_erfcinv(erfc, math=math):
|
||||
def erfcinv(y):
|
||||
"""The inverse function of erfc."""
|
||||
if y >= 2:
|
||||
return -100.
|
||||
elif y <= 0:
|
||||
return 100.
|
||||
zero_point = y < 1
|
||||
if not zero_point:
|
||||
y = 2 - y
|
||||
t = math.sqrt(-2 * math.log(y / 2.))
|
||||
x = -0.70711 * \
|
||||
((2.30753 + t * 0.27061) / (1. + t * (0.99229 + t * 0.04481)) - t)
|
||||
for i in range(2):
|
||||
err = erfc(x) - y
|
||||
x += err / (1.12837916709551257 * math.exp(-(x ** 2)) - x * err)
|
||||
return x if zero_point else -x
|
||||
return erfcinv
|
||||
|
||||
def _gen_ppf(erfc, math=math):
|
||||
erfcinv = _gen_erfcinv(erfc, math)
|
||||
def ppf(x, mu=0, sigma=1):
|
||||
return mu - sigma * math.sqrt(2) * erfcinv(2 * x)
|
||||
return ppf
|
||||
|
||||
def erfc(x):
|
||||
z = abs(x)
|
||||
t = 1. / (1. + z / 2.)
|
||||
r = t * math.exp(-z * z - 1.26551223 + t * (1.00002368 + t * (
|
||||
0.37409196 + t * (0.09678418 + t * (-0.18628806 + t * (
|
||||
0.27886807 + t * (-1.13520398 + t * (1.48851587 + t * (
|
||||
-0.82215223 + t * 0.17087277
|
||||
)))
|
||||
)))
|
||||
)))
|
||||
return 2. - r if x < 0 else r
|
||||
|
||||
def cdf(x, mu=0, sigma=1):
|
||||
return 0.5 * erfc(-(x - mu) / (sigma * math.sqrt(2)))
|
||||
|
||||
|
||||
def pdf(x, mu=0, sigma=1):
|
||||
return (1 / math.sqrt(2 * math.pi) * abs(sigma) *
|
||||
math.exp(-(((x - mu) / abs(sigma)) ** 2 / 2)))
|
||||
|
||||
ppf = _gen_ppf(erfc)
|
||||
|
||||
def choose_backend(backend):
|
||||
if backend is None: # fallback
|
||||
return cdf, pdf, ppf
|
||||
elif backend == 'mpmath':
|
||||
try:
|
||||
import mpmath
|
||||
except ImportError:
|
||||
raise ImportError('Install "mpmath" to use this backend')
|
||||
return mpmath.ncdf, mpmath.npdf, _gen_ppf(mpmath.erfc, math=mpmath)
|
||||
elif backend == 'scipy':
|
||||
try:
|
||||
from scipy.stats import norm
|
||||
except ImportError:
|
||||
raise ImportError('Install "scipy" to use this backend')
|
||||
return norm.cdf, norm.pdf, norm.ppf
|
||||
raise ValueError('%r backend is not defined' % backend)
|
||||
|
||||
def available_backends():
|
||||
backends = [None]
|
||||
for backend in ['mpmath', 'scipy']:
|
||||
try:
|
||||
__import__(backend)
|
||||
except ImportError:
|
||||
continue
|
||||
backends.append(backend)
|
||||
return backends
|
||||
|
||||
class Node(object):
|
||||
|
||||
pass
|
||||
|
||||
class Variable(Node, Gaussian):
|
||||
|
||||
def __init__(self):
|
||||
self.messages = {}
|
||||
super(Variable, self).__init__()
|
||||
|
||||
def set(self, val):
|
||||
delta = self.delta(val)
|
||||
self.pi, self.tau = val.pi, val.tau
|
||||
return delta
|
||||
|
||||
def delta(self, other):
|
||||
pi_delta = abs(self.pi - other.pi)
|
||||
if pi_delta == inf:
|
||||
return 0.
|
||||
return max(abs(self.tau - other.tau), math.sqrt(pi_delta))
|
||||
|
||||
def update_message(self, factor, pi=0, tau=0, message=None):
|
||||
message = message or Gaussian(pi=pi, tau=tau)
|
||||
old_message, self[factor] = self[factor], message
|
||||
return self.set(self / old_message * message)
|
||||
|
||||
def update_value(self, factor, pi=0, tau=0, value=None):
|
||||
value = value or Gaussian(pi=pi, tau=tau)
|
||||
old_message = self[factor]
|
||||
self[factor] = value * old_message / self
|
||||
return self.set(value)
|
||||
|
||||
def __getitem__(self, factor):
|
||||
return self.messages[factor]
|
||||
|
||||
def __setitem__(self, factor, message):
|
||||
self.messages[factor] = message
|
||||
|
||||
def __repr__(self):
|
||||
args = (type(self).__name__, super(Variable, self).__repr__(),
|
||||
len(self.messages), '' if len(self.messages) == 1 else 's')
|
||||
return '<%s %s with %d connection%s>' % args
|
||||
|
||||
|
||||
class Factor(Node):
|
||||
|
||||
def __init__(self, variables):
|
||||
self.vars = variables
|
||||
for var in variables:
|
||||
var[self] = Gaussian()
|
||||
|
||||
def down(self):
|
||||
return 0
|
||||
|
||||
def up(self):
|
||||
return 0
|
||||
|
||||
@property
|
||||
def var(self):
|
||||
assert len(self.vars) == 1
|
||||
return self.vars[0]
|
||||
|
||||
def __repr__(self):
|
||||
args = (type(self).__name__, len(self.vars),
|
||||
'' if len(self.vars) == 1 else 's')
|
||||
return '<%s with %d connection%s>' % args
|
||||
|
||||
|
||||
class PriorFactor(Factor):
|
||||
|
||||
def __init__(self, var, val, dynamic=0):
|
||||
super(PriorFactor, self).__init__([var])
|
||||
self.val = val
|
||||
self.dynamic = dynamic
|
||||
|
||||
def down(self):
|
||||
sigma = math.sqrt(self.val.sigma ** 2 + self.dynamic ** 2)
|
||||
value = Gaussian(self.val.mu, sigma)
|
||||
return self.var.update_value(self, value=value)
|
||||
|
||||
|
||||
class LikelihoodFactor(Factor):
|
||||
|
||||
def __init__(self, mean_var, value_var, variance):
|
||||
super(LikelihoodFactor, self).__init__([mean_var, value_var])
|
||||
self.mean = mean_var
|
||||
self.value = value_var
|
||||
self.variance = variance
|
||||
|
||||
def calc_a(self, var):
|
||||
return 1. / (1. + self.variance * var.pi)
|
||||
|
||||
def down(self):
|
||||
# update value.
|
||||
msg = self.mean / self.mean[self]
|
||||
a = self.calc_a(msg)
|
||||
return self.value.update_message(self, a * msg.pi, a * msg.tau)
|
||||
|
||||
def up(self):
|
||||
# update mean.
|
||||
msg = self.value / self.value[self]
|
||||
a = self.calc_a(msg)
|
||||
return self.mean.update_message(self, a * msg.pi, a * msg.tau)
|
||||
|
||||
|
||||
class SumFactor(Factor):
|
||||
|
||||
def __init__(self, sum_var, term_vars, coeffs):
|
||||
super(SumFactor, self).__init__([sum_var] + term_vars)
|
||||
self.sum = sum_var
|
||||
self.terms = term_vars
|
||||
self.coeffs = coeffs
|
||||
|
||||
def down(self):
|
||||
vals = self.terms
|
||||
msgs = [var[self] for var in vals]
|
||||
return self.update(self.sum, vals, msgs, self.coeffs)
|
||||
|
||||
def up(self, index=0):
|
||||
coeff = self.coeffs[index]
|
||||
coeffs = []
|
||||
for x, c in enumerate(self.coeffs):
|
||||
try:
|
||||
if x == index:
|
||||
coeffs.append(1. / coeff)
|
||||
else:
|
||||
coeffs.append(-c / coeff)
|
||||
except ZeroDivisionError:
|
||||
coeffs.append(0.)
|
||||
vals = self.terms[:]
|
||||
vals[index] = self.sum
|
||||
msgs = [var[self] for var in vals]
|
||||
return self.update(self.terms[index], vals, msgs, coeffs)
|
||||
|
||||
def update(self, var, vals, msgs, coeffs):
|
||||
pi_inv = 0
|
||||
mu = 0
|
||||
for val, msg, coeff in zip(vals, msgs, coeffs):
|
||||
div = val / msg
|
||||
mu += coeff * div.mu
|
||||
if pi_inv == inf:
|
||||
continue
|
||||
try:
|
||||
# numpy.float64 handles floating-point error by different way.
|
||||
# For example, it can just warn RuntimeWarning on n/0 problem
|
||||
# instead of throwing ZeroDivisionError. So div.pi, the
|
||||
# denominator has to be a built-in float.
|
||||
pi_inv += coeff ** 2 / float(div.pi)
|
||||
except ZeroDivisionError:
|
||||
pi_inv = inf
|
||||
pi = 1. / pi_inv
|
||||
tau = pi * mu
|
||||
return var.update_message(self, pi, tau)
|
||||
|
||||
|
||||
class TruncateFactor(Factor):
|
||||
|
||||
def __init__(self, var, v_func, w_func, draw_margin):
|
||||
super(TruncateFactor, self).__init__([var])
|
||||
self.v_func = v_func
|
||||
self.w_func = w_func
|
||||
self.draw_margin = draw_margin
|
||||
|
||||
def up(self):
|
||||
val = self.var
|
||||
msg = self.var[self]
|
||||
div = val / msg
|
||||
sqrt_pi = math.sqrt(div.pi)
|
||||
args = (div.tau / sqrt_pi, self.draw_margin * sqrt_pi)
|
||||
v = self.v_func(*args)
|
||||
w = self.w_func(*args)
|
||||
denom = (1. - w)
|
||||
pi, tau = div.pi / denom, (div.tau + sqrt_pi * v) / denom
|
||||
return val.update_value(self, pi, tau)
|
||||
|
||||
#: Default initial mean of ratings.
|
||||
MU = 25.
|
||||
#: Default initial standard deviation of ratings.
|
||||
SIGMA = MU / 3
|
||||
#: Default distance that guarantees about 76% chance of winning.
|
||||
BETA = SIGMA / 2
|
||||
#: Default dynamic factor.
|
||||
TAU = SIGMA / 100
|
||||
#: Default draw probability of the game.
|
||||
DRAW_PROBABILITY = .10
|
||||
#: A basis to check reliability of the result.
|
||||
DELTA = 0.0001
|
||||
|
||||
|
||||
def calc_draw_probability(draw_margin, size, env=None):
|
||||
if env is None:
|
||||
env = global_env()
|
||||
return 2 * env.cdf(draw_margin / (math.sqrt(size) * env.beta)) - 1
|
||||
|
||||
|
||||
def calc_draw_margin(draw_probability, size, env=None):
|
||||
if env is None:
|
||||
env = global_env()
|
||||
return env.ppf((draw_probability + 1) / 2.) * math.sqrt(size) * env.beta
|
||||
|
||||
|
||||
def _team_sizes(rating_groups):
|
||||
team_sizes = [0]
|
||||
for group in rating_groups:
|
||||
team_sizes.append(len(group) + team_sizes[-1])
|
||||
del team_sizes[0]
|
||||
return team_sizes
|
||||
|
||||
|
||||
def _floating_point_error(env):
|
||||
if env.backend == 'mpmath':
|
||||
msg = 'Set "mpmath.mp.dps" to higher'
|
||||
else:
|
||||
msg = 'Cannot calculate correctly, set backend to "mpmath"'
|
||||
return FloatingPointError(msg)
|
||||
|
||||
|
||||
class Rating(Gaussian):
|
||||
def __init__(self, mu=None, sigma=None):
|
||||
if isinstance(mu, tuple):
|
||||
mu, sigma = mu
|
||||
elif isinstance(mu, Gaussian):
|
||||
mu, sigma = mu.mu, mu.sigma
|
||||
if mu is None:
|
||||
mu = global_env().mu
|
||||
if sigma is None:
|
||||
sigma = global_env().sigma
|
||||
super(Rating, self).__init__(mu, sigma)
|
||||
|
||||
def __int__(self):
|
||||
return int(self.mu)
|
||||
|
||||
def __long__(self):
|
||||
return long(self.mu)
|
||||
|
||||
def __float__(self):
|
||||
return float(self.mu)
|
||||
|
||||
def __iter__(self):
|
||||
return iter((self.mu, self.sigma))
|
||||
|
||||
def __repr__(self):
|
||||
c = type(self)
|
||||
args = ('.'.join([c.__module__, c.__name__]), self.mu, self.sigma)
|
||||
return '%s(mu=%.3f, sigma=%.3f)' % args
|
||||
|
||||
|
||||
class TrueSkill(object):
|
||||
def __init__(self, mu=MU, sigma=SIGMA, beta=BETA, tau=TAU,
|
||||
draw_probability=DRAW_PROBABILITY, backend=None):
|
||||
self.mu = mu
|
||||
self.sigma = sigma
|
||||
self.beta = beta
|
||||
self.tau = tau
|
||||
self.draw_probability = draw_probability
|
||||
self.backend = backend
|
||||
if isinstance(backend, tuple):
|
||||
self.cdf, self.pdf, self.ppf = backend
|
||||
else:
|
||||
self.cdf, self.pdf, self.ppf = choose_backend(backend)
|
||||
|
||||
def create_rating(self, mu=None, sigma=None):
|
||||
if mu is None:
|
||||
mu = self.mu
|
||||
if sigma is None:
|
||||
sigma = self.sigma
|
||||
return Rating(mu, sigma)
|
||||
|
||||
def v_win(self, diff, draw_margin):
|
||||
x = diff - draw_margin
|
||||
denom = self.cdf(x)
|
||||
return (self.pdf(x) / denom) if denom else -x
|
||||
|
||||
def v_draw(self, diff, draw_margin):
|
||||
abs_diff = abs(diff)
|
||||
a, b = draw_margin - abs_diff, -draw_margin - abs_diff
|
||||
denom = self.cdf(a) - self.cdf(b)
|
||||
numer = self.pdf(b) - self.pdf(a)
|
||||
return ((numer / denom) if denom else a) * (-1 if diff < 0 else +1)
|
||||
|
||||
def w_win(self, diff, draw_margin):
|
||||
x = diff - draw_margin
|
||||
v = self.v_win(diff, draw_margin)
|
||||
w = v * (v + x)
|
||||
if 0 < w < 1:
|
||||
return w
|
||||
raise _floating_point_error(self)
|
||||
|
||||
def w_draw(self, diff, draw_margin):
|
||||
abs_diff = abs(diff)
|
||||
a, b = draw_margin - abs_diff, -draw_margin - abs_diff
|
||||
denom = self.cdf(a) - self.cdf(b)
|
||||
if not denom:
|
||||
raise _floating_point_error(self)
|
||||
v = self.v_draw(abs_diff, draw_margin)
|
||||
return (v ** 2) + (a * self.pdf(a) - b * self.pdf(b)) / denom
|
||||
|
||||
def validate_rating_groups(self, rating_groups):
|
||||
# check group sizes
|
||||
if len(rating_groups) < 2:
|
||||
raise ValueError('Need multiple rating groups')
|
||||
elif not all(rating_groups):
|
||||
raise ValueError('Each group must contain multiple ratings')
|
||||
# check group types
|
||||
group_types = set(map(type, rating_groups))
|
||||
if len(group_types) != 1:
|
||||
raise TypeError('All groups should be same type')
|
||||
elif group_types.pop() is Rating:
|
||||
raise TypeError('Rating cannot be a rating group')
|
||||
# normalize rating_groups
|
||||
if isinstance(rating_groups[0], dict):
|
||||
dict_rating_groups = rating_groups
|
||||
rating_groups = []
|
||||
keys = []
|
||||
for dict_rating_group in dict_rating_groups:
|
||||
rating_group, key_group = [], []
|
||||
for key, rating in iteritems(dict_rating_group):
|
||||
rating_group.append(rating)
|
||||
key_group.append(key)
|
||||
rating_groups.append(tuple(rating_group))
|
||||
keys.append(tuple(key_group))
|
||||
else:
|
||||
rating_groups = list(rating_groups)
|
||||
keys = None
|
||||
return rating_groups, keys
|
||||
|
||||
def validate_weights(self, weights, rating_groups, keys=None):
|
||||
if weights is None:
|
||||
weights = [(1,) * len(g) for g in rating_groups]
|
||||
elif isinstance(weights, dict):
|
||||
weights_dict, weights = weights, []
|
||||
for x, group in enumerate(rating_groups):
|
||||
w = []
|
||||
weights.append(w)
|
||||
for y, rating in enumerate(group):
|
||||
if keys is not None:
|
||||
y = keys[x][y]
|
||||
w.append(weights_dict.get((x, y), 1))
|
||||
return weights
|
||||
|
||||
def factor_graph_builders(self, rating_groups, ranks, weights):
|
||||
flatten_ratings = sum(map(tuple, rating_groups), ())
|
||||
flatten_weights = sum(map(tuple, weights), ())
|
||||
size = len(flatten_ratings)
|
||||
group_size = len(rating_groups)
|
||||
# create variables
|
||||
rating_vars = [Variable() for x in range(size)]
|
||||
perf_vars = [Variable() for x in range(size)]
|
||||
team_perf_vars = [Variable() for x in range(group_size)]
|
||||
team_diff_vars = [Variable() for x in range(group_size - 1)]
|
||||
team_sizes = _team_sizes(rating_groups)
|
||||
# layer builders
|
||||
def build_rating_layer():
|
||||
for rating_var, rating in zip(rating_vars, flatten_ratings):
|
||||
yield PriorFactor(rating_var, rating, self.tau)
|
||||
def build_perf_layer():
|
||||
for rating_var, perf_var in zip(rating_vars, perf_vars):
|
||||
yield LikelihoodFactor(rating_var, perf_var, self.beta ** 2)
|
||||
def build_team_perf_layer():
|
||||
for team, team_perf_var in enumerate(team_perf_vars):
|
||||
if team > 0:
|
||||
start = team_sizes[team - 1]
|
||||
else:
|
||||
start = 0
|
||||
end = team_sizes[team]
|
||||
child_perf_vars = perf_vars[start:end]
|
||||
coeffs = flatten_weights[start:end]
|
||||
yield SumFactor(team_perf_var, child_perf_vars, coeffs)
|
||||
def build_team_diff_layer():
|
||||
for team, team_diff_var in enumerate(team_diff_vars):
|
||||
yield SumFactor(team_diff_var,
|
||||
team_perf_vars[team:team + 2], [+1, -1])
|
||||
def build_trunc_layer():
|
||||
for x, team_diff_var in enumerate(team_diff_vars):
|
||||
if callable(self.draw_probability):
|
||||
# dynamic draw probability
|
||||
team_perf1, team_perf2 = team_perf_vars[x:x + 2]
|
||||
args = (Rating(team_perf1), Rating(team_perf2), self)
|
||||
draw_probability = self.draw_probability(*args)
|
||||
else:
|
||||
# static draw probability
|
||||
draw_probability = self.draw_probability
|
||||
size = sum(map(len, rating_groups[x:x + 2]))
|
||||
draw_margin = calc_draw_margin(draw_probability, size, self)
|
||||
if ranks[x] == ranks[x + 1]: # is a tie?
|
||||
v_func, w_func = self.v_draw, self.w_draw
|
||||
else:
|
||||
v_func, w_func = self.v_win, self.w_win
|
||||
yield TruncateFactor(team_diff_var,
|
||||
v_func, w_func, draw_margin)
|
||||
# build layers
|
||||
return (build_rating_layer, build_perf_layer, build_team_perf_layer,
|
||||
build_team_diff_layer, build_trunc_layer)
|
||||
|
||||
def run_schedule(self, build_rating_layer, build_perf_layer,
|
||||
build_team_perf_layer, build_team_diff_layer,
|
||||
build_trunc_layer, min_delta=DELTA):
|
||||
if min_delta <= 0:
|
||||
raise ValueError('min_delta must be greater than 0')
|
||||
layers = []
|
||||
def build(builders):
|
||||
layers_built = [list(build()) for build in builders]
|
||||
layers.extend(layers_built)
|
||||
return layers_built
|
||||
# gray arrows
|
||||
layers_built = build([build_rating_layer,
|
||||
build_perf_layer,
|
||||
build_team_perf_layer])
|
||||
rating_layer, perf_layer, team_perf_layer = layers_built
|
||||
for f in chain(*layers_built):
|
||||
f.down()
|
||||
# arrow #1, #2, #3
|
||||
team_diff_layer, trunc_layer = build([build_team_diff_layer,
|
||||
build_trunc_layer])
|
||||
team_diff_len = len(team_diff_layer)
|
||||
for x in range(10):
|
||||
if team_diff_len == 1:
|
||||
# only two teams
|
||||
team_diff_layer[0].down()
|
||||
delta = trunc_layer[0].up()
|
||||
else:
|
||||
# multiple teams
|
||||
delta = 0
|
||||
for x in range(team_diff_len - 1):
|
||||
team_diff_layer[x].down()
|
||||
delta = max(delta, trunc_layer[x].up())
|
||||
team_diff_layer[x].up(1) # up to right variable
|
||||
for x in range(team_diff_len - 1, 0, -1):
|
||||
team_diff_layer[x].down()
|
||||
delta = max(delta, trunc_layer[x].up())
|
||||
team_diff_layer[x].up(0) # up to left variable
|
||||
# repeat until to small update
|
||||
if delta <= min_delta:
|
||||
break
|
||||
# up both ends
|
||||
team_diff_layer[0].up(0)
|
||||
team_diff_layer[team_diff_len - 1].up(1)
|
||||
# up the remainder of the black arrows
|
||||
for f in team_perf_layer:
|
||||
for x in range(len(f.vars) - 1):
|
||||
f.up(x)
|
||||
for f in perf_layer:
|
||||
f.up()
|
||||
return layers
|
||||
|
||||
def rate(self, rating_groups, ranks=None, weights=None, min_delta=DELTA):
|
||||
rating_groups, keys = self.validate_rating_groups(rating_groups)
|
||||
weights = self.validate_weights(weights, rating_groups, keys)
|
||||
group_size = len(rating_groups)
|
||||
if ranks is None:
|
||||
ranks = range(group_size)
|
||||
elif len(ranks) != group_size:
|
||||
raise ValueError('Wrong ranks')
|
||||
# sort rating groups by rank
|
||||
by_rank = lambda x: x[1][1]
|
||||
sorting = sorted(enumerate(zip(rating_groups, ranks, weights)),
|
||||
key=by_rank)
|
||||
sorted_rating_groups, sorted_ranks, sorted_weights = [], [], []
|
||||
for x, (g, r, w) in sorting:
|
||||
sorted_rating_groups.append(g)
|
||||
sorted_ranks.append(r)
|
||||
# make weights to be greater than 0
|
||||
sorted_weights.append(max(min_delta, w_) for w_ in w)
|
||||
# build factor graph
|
||||
args = (sorted_rating_groups, sorted_ranks, sorted_weights)
|
||||
builders = self.factor_graph_builders(*args)
|
||||
args = builders + (min_delta,)
|
||||
layers = self.run_schedule(*args)
|
||||
# make result
|
||||
rating_layer, team_sizes = layers[0], _team_sizes(sorted_rating_groups)
|
||||
transformed_groups = []
|
||||
for start, end in zip([0] + team_sizes[:-1], team_sizes):
|
||||
group = []
|
||||
for f in rating_layer[start:end]:
|
||||
group.append(Rating(float(f.var.mu), float(f.var.sigma)))
|
||||
transformed_groups.append(tuple(group))
|
||||
by_hint = lambda x: x[0]
|
||||
unsorting = sorted(zip((x for x, __ in sorting), transformed_groups),
|
||||
key=by_hint)
|
||||
if keys is None:
|
||||
return [g for x, g in unsorting]
|
||||
# restore the structure with input dictionary keys
|
||||
return [dict(zip(keys[x], g)) for x, g in unsorting]
|
||||
|
||||
def quality(self, rating_groups, weights=None):
|
||||
rating_groups, keys = self.validate_rating_groups(rating_groups)
|
||||
weights = self.validate_weights(weights, rating_groups, keys)
|
||||
flatten_ratings = sum(map(tuple, rating_groups), ())
|
||||
flatten_weights = sum(map(tuple, weights), ())
|
||||
length = len(flatten_ratings)
|
||||
# a vector of all of the skill means
|
||||
mean_matrix = Matrix([[r.mu] for r in flatten_ratings])
|
||||
# a matrix whose diagonal values are the variances (sigma ** 2) of each
|
||||
# of the players.
|
||||
def variance_matrix(height, width):
|
||||
variances = (r.sigma ** 2 for r in flatten_ratings)
|
||||
for x, variance in enumerate(variances):
|
||||
yield (x, x), variance
|
||||
variance_matrix = Matrix(variance_matrix, length, length)
|
||||
# the player-team assignment and comparison matrix
|
||||
def rotated_a_matrix(set_height, set_width):
|
||||
t = 0
|
||||
for r, (cur, _next) in enumerate(zip(rating_groups[:-1],
|
||||
rating_groups[1:])):
|
||||
for x in range(t, t + len(cur)):
|
||||
yield (r, x), flatten_weights[x]
|
||||
t += 1
|
||||
x += 1
|
||||
for x in range(x, x + len(_next)):
|
||||
yield (r, x), -flatten_weights[x]
|
||||
set_height(r + 1)
|
||||
set_width(x + 1)
|
||||
rotated_a_matrix = Matrix(rotated_a_matrix)
|
||||
a_matrix = rotated_a_matrix.transpose()
|
||||
# match quality further derivation
|
||||
_ata = (self.beta ** 2) * rotated_a_matrix * a_matrix
|
||||
_atsa = rotated_a_matrix * variance_matrix * a_matrix
|
||||
start = mean_matrix.transpose() * a_matrix
|
||||
middle = _ata + _atsa
|
||||
end = rotated_a_matrix * mean_matrix
|
||||
# make result
|
||||
e_arg = (-0.5 * start * middle.inverse() * end).determinant()
|
||||
s_arg = _ata.determinant() / middle.determinant()
|
||||
return math.exp(e_arg) * math.sqrt(s_arg)
|
||||
|
||||
def expose(self, rating):
|
||||
k = self.mu / self.sigma
|
||||
return rating.mu - k * rating.sigma
|
||||
|
||||
def make_as_global(self):
|
||||
return setup(env=self)
|
||||
|
||||
def __repr__(self):
|
||||
c = type(self)
|
||||
if callable(self.draw_probability):
|
||||
f = self.draw_probability
|
||||
draw_probability = '.'.join([f.__module__, f.__name__])
|
||||
else:
|
||||
draw_probability = '%.1f%%' % (self.draw_probability * 100)
|
||||
if self.backend is None:
|
||||
backend = ''
|
||||
elif isinstance(self.backend, tuple):
|
||||
backend = ', backend=...'
|
||||
else:
|
||||
backend = ', backend=%r' % self.backend
|
||||
args = ('.'.join([c.__module__, c.__name__]), self.mu, self.sigma,
|
||||
self.beta, self.tau, draw_probability, backend)
|
||||
return ('%s(mu=%.3f, sigma=%.3f, beta=%.3f, tau=%.3f, '
|
||||
'draw_probability=%s%s)' % args)
|
||||
|
||||
|
||||
def rate_1vs1(rating1, rating2, drawn=False, min_delta=DELTA, env=None):
|
||||
if env is None:
|
||||
env = global_env()
|
||||
ranks = [0, 0 if drawn else 1]
|
||||
teams = env.rate([(rating1,), (rating2,)], ranks, min_delta=min_delta)
|
||||
return teams[0][0], teams[1][0]
|
||||
|
||||
|
||||
def quality_1vs1(rating1, rating2, env=None):
|
||||
if env is None:
|
||||
env = global_env()
|
||||
return env.quality([(rating1,), (rating2,)])
|
||||
|
||||
|
||||
def global_env():
|
||||
try:
|
||||
global_env.__trueskill__
|
||||
except AttributeError:
|
||||
# setup the default environment
|
||||
setup()
|
||||
return global_env.__trueskill__
|
||||
|
||||
|
||||
def setup(mu=MU, sigma=SIGMA, beta=BETA, tau=TAU,
|
||||
draw_probability=DRAW_PROBABILITY, backend=None, env=None):
|
||||
if env is None:
|
||||
env = TrueSkill(mu, sigma, beta, tau, draw_probability, backend)
|
||||
global_env.__trueskill__ = env
|
||||
return env
|
||||
|
||||
|
||||
def rate(rating_groups, ranks=None, weights=None, min_delta=DELTA):
|
||||
return global_env().rate(rating_groups, ranks, weights, min_delta)
|
||||
|
||||
|
||||
def quality(rating_groups, weights=None):
|
||||
return global_env().quality(rating_groups, weights)
|
||||
|
||||
|
||||
def expose(rating):
|
||||
return global_env().expose(rating)
|
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.10-py3-none-any.whl
vendored
Normal file
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.10-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.10.tar.gz
vendored
Normal file
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.10.tar.gz
vendored
Normal file
Binary file not shown.
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.11-py3-none-any.whl
vendored
Normal file
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.11-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.11.tar.gz
vendored
Normal file
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.11.tar.gz
vendored
Normal file
Binary file not shown.
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.12-py3-none-any.whl
vendored
Normal file
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.12-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.12.tar.gz
vendored
Normal file
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.12.tar.gz
vendored
Normal file
Binary file not shown.
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.9-py3-none-any.whl
vendored
Normal file
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.9-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.9.tar.gz
vendored
Normal file
BIN
analysis-master/analysis-amd64/dist/analysis-1.0.0.9.tar.gz
vendored
Normal file
Binary file not shown.
5
analysis-master/analysis-amd64/docker/Dockerfile
Normal file
5
analysis-master/analysis-amd64/docker/Dockerfile
Normal file
@@ -0,0 +1,5 @@
|
||||
FROM python
|
||||
WORKDIR ~/
|
||||
COPY ./ ./
|
||||
RUN pip install -r requirements.txt
|
||||
CMD ["bash"]
|
3
analysis-master/analysis-amd64/docker/start-docker.sh
Executable file
3
analysis-master/analysis-amd64/docker/start-docker.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
cd ..
|
||||
docker build -t tra-analysis-amd64-dev -f docker/Dockerfile .
|
||||
docker run -it tra-analysis-amd64-dev
|
6
analysis-master/analysis-amd64/requirements.txt
Normal file
6
analysis-master/analysis-amd64/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
numba
|
||||
numpy
|
||||
scipy
|
||||
scikit-learn
|
||||
six
|
||||
matplotlib
|
@@ -1,8 +1,14 @@
|
||||
import setuptools
|
||||
|
||||
requirements = []
|
||||
|
||||
with open("requirements.txt", 'r') as file:
|
||||
for line in file:
|
||||
requirements.append(line)
|
||||
|
||||
setuptools.setup(
|
||||
name="analysis", # Replace with your own username
|
||||
version="1.0.0.008",
|
||||
name="analysis",
|
||||
version="1.0.0.012",
|
||||
author="The Titan Scouting Team",
|
||||
author_email="titanscout2022@gmail.com",
|
||||
description="analysis package developed by Titan Scouting for The Red Alliance",
|
||||
@@ -10,14 +16,7 @@ setuptools.setup(
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://github.com/titanscout2022/tr2022-strategy",
|
||||
packages=setuptools.find_packages(),
|
||||
install_requires=[
|
||||
"numba",
|
||||
"numpy",
|
||||
"scipy",
|
||||
"scikit-learn",
|
||||
"six",
|
||||
"matplotlib"
|
||||
],
|
||||
install_requires=requirements,
|
||||
license = "GNU General Public License v3.0",
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
3
analysis-master/analysis-arm64/docker/start-docker.sh
Executable file
3
analysis-master/analysis-arm64/docker/start-docker.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
cd ..
|
||||
docker build -t tra-analysis-amd64-dev -f docker/Dockerfile .
|
||||
docker run -it tra-analysis-amd64-dev
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1 +0,0 @@
|
||||
python3 setup.py sdist bdist_wheel
|
Binary file not shown.
Binary file not shown.
1
data analysis/config/competition.config
Normal file
1
data analysis/config/competition.config
Normal file
@@ -0,0 +1 @@
|
||||
2020ilch
|
@@ -1,4 +1,3 @@
|
||||
2020ilch
|
||||
balls-blocked,basic_stats,historical_analysis,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
|
||||
balls-collected,basic_stats,historical_analysis,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
|
||||
balls-lower-teleop,basic_stats,historical_analysis,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
|
4
data analysis/requirements.txt
Normal file
4
data analysis/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
requests
|
||||
pymongo
|
||||
pandas
|
||||
dnspython
|
@@ -3,10 +3,17 @@
|
||||
# Notes:
|
||||
# setup:
|
||||
|
||||
__version__ = "0.0.4.002"
|
||||
__version__ = "0.0.5.002"
|
||||
|
||||
# changelog should be viewed using print(analysis.__changelog__)
|
||||
__changelog__ = """changelog:
|
||||
0.0.5.002:
|
||||
- made changes due to refactoring of analysis
|
||||
0.0.5.001:
|
||||
- text fixes
|
||||
- removed matplotlib requirement
|
||||
0.0.5.000:
|
||||
- improved user interface
|
||||
0.0.4.002:
|
||||
- removed unessasary code
|
||||
0.0.4.001:
|
||||
@@ -82,7 +89,8 @@ __all__ = [
|
||||
from analysis import analysis as an
|
||||
import data as d
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from os import system, name
|
||||
from pathlib import Path
|
||||
import time
|
||||
import warnings
|
||||
|
||||
@@ -91,16 +99,16 @@ def main():
|
||||
while(True):
|
||||
|
||||
current_time = time.time()
|
||||
print("time: " + str(current_time))
|
||||
print("[OK] time: " + str(current_time))
|
||||
|
||||
print(" loading config")
|
||||
competition, config = load_config("config.csv")
|
||||
print(" config loaded")
|
||||
start = time.time()
|
||||
config = load_config(Path("config/stats.config"))
|
||||
competition = an.load_csv(Path("config/competition.config"))[0][0]
|
||||
print("[OK] configs loaded")
|
||||
|
||||
print(" loading database keys")
|
||||
apikey = an.load_csv("keys.txt")[0][0]
|
||||
tbakey = an.load_csv("keys.txt")[1][0]
|
||||
print(" loaded keys")
|
||||
apikey = an.load_csv(Path("config/keys.config"))[0][0]
|
||||
tbakey = an.load_csv(Path("config/keys.config"))[1][0]
|
||||
print("[OK] loaded keys")
|
||||
|
||||
previous_time = d.get_analysis_flags(apikey, "latest_update")
|
||||
|
||||
@@ -113,38 +121,55 @@ def main():
|
||||
|
||||
previous_time = previous_time["latest_update"]
|
||||
|
||||
print(" analysis backtimed to: " + str(previous_time))
|
||||
print("[OK] analysis backtimed to: " + str(previous_time))
|
||||
|
||||
print(" loading data")
|
||||
print("[OK] loading data")
|
||||
start = time.time()
|
||||
data = d.get_match_data_formatted(apikey, competition)
|
||||
pit_data = d.pit = d.get_pit_data_formatted(apikey, competition)
|
||||
print(" loaded data")
|
||||
print("[OK] loaded data in " + str(time.time() - start) + " seconds")
|
||||
|
||||
print(" running tests")
|
||||
print("[OK] running tests")
|
||||
start = time.time()
|
||||
results = simpleloop(data, config)
|
||||
print(" finished tests")
|
||||
print("[OK] finished tests in " + str(time.time() - start) + " seconds")
|
||||
|
||||
print(" running metrics")
|
||||
print("[OK] running metrics")
|
||||
start = time.time()
|
||||
metricsloop(tbakey, apikey, competition, previous_time)
|
||||
print(" finished metrics")
|
||||
print("[OK] finished metrics in " + str(time.time() - start) + " seconds")
|
||||
|
||||
print(" running pit analysis")
|
||||
print("[OK] running pit analysis")
|
||||
start = time.time()
|
||||
pit = pitloop(pit_data, config)
|
||||
print(" finished pit analysis")
|
||||
print("[OK] finished pit analysis in " + str(time.time() - start) + " seconds")
|
||||
|
||||
d.set_analysis_flags(apikey, "latest_update", {"latest_update":current_time})
|
||||
|
||||
print(" pushing to database")
|
||||
print("[OK] pushing to database")
|
||||
start = time.time()
|
||||
push_to_database(apikey, competition, results, pit)
|
||||
print(" pushed to database")
|
||||
print("[OK] pushed to database in " + str(time.time() - start) + " seconds")
|
||||
|
||||
clear()
|
||||
|
||||
def clear():
|
||||
|
||||
# for windows
|
||||
if name == 'nt':
|
||||
_ = system('cls')
|
||||
|
||||
# for mac and linux(here, os.name is 'posix')
|
||||
else:
|
||||
_ = system('clear')
|
||||
|
||||
def load_config(file):
|
||||
config_vector = {}
|
||||
file = an.load_csv(file)
|
||||
for line in file[1:]:
|
||||
for line in file:
|
||||
config_vector[line[0]] = line[1:]
|
||||
|
||||
return (file[0][0], config_vector)
|
||||
return config_vector
|
||||
|
||||
def simpleloop(data, tests): # expects 3D array with [Team][Variable][Match]
|
||||
|
||||
@@ -263,11 +288,11 @@ def metricsloop(tbakey, apikey, competition, timestamp): # listener based metric
|
||||
|
||||
observations = {"red": 0.5, "blu": 0.5}
|
||||
|
||||
red_elo_delta = an.elo(red_elo["score"], blu_elo["score"], observations["red"], elo_N, elo_K) - red_elo["score"]
|
||||
blu_elo_delta = an.elo(blu_elo["score"], red_elo["score"], observations["blu"], elo_N, elo_K) - blu_elo["score"]
|
||||
red_elo_delta = an.Metrics.elo(red_elo["score"], blu_elo["score"], observations["red"], elo_N, elo_K) - red_elo["score"]
|
||||
blu_elo_delta = an.Metrics.elo(blu_elo["score"], red_elo["score"], observations["blu"], elo_N, elo_K) - blu_elo["score"]
|
||||
|
||||
new_red_gl2_score, new_red_gl2_rd, new_red_gl2_vol = an.glicko2(red_gl2["score"], red_gl2["rd"], red_gl2["vol"], [blu_gl2["score"]], [blu_gl2["rd"]], [observations["red"], observations["blu"]])
|
||||
new_blu_gl2_score, new_blu_gl2_rd, new_blu_gl2_vol = an.glicko2(blu_gl2["score"], blu_gl2["rd"], blu_gl2["vol"], [red_gl2["score"]], [red_gl2["rd"]], [observations["blu"], observations["red"]])
|
||||
new_red_gl2_score, new_red_gl2_rd, new_red_gl2_vol = an.Metrics.glicko2(red_gl2["score"], red_gl2["rd"], red_gl2["vol"], [blu_gl2["score"]], [blu_gl2["rd"]], [observations["red"], observations["blu"]])
|
||||
new_blu_gl2_score, new_blu_gl2_rd, new_blu_gl2_vol = an.Metrics.glicko2(blu_gl2["score"], blu_gl2["rd"], blu_gl2["vol"], [red_gl2["score"]], [red_gl2["rd"]], [observations["blu"], observations["red"]])
|
||||
|
||||
red_gl2_delta = {"score": new_red_gl2_score - red_gl2["score"], "rd": new_red_gl2_rd - red_gl2["rd"], "vol": new_red_gl2_vol - red_gl2["vol"]}
|
||||
blu_gl2_delta = {"score": new_blu_gl2_score - blu_gl2["score"], "rd": new_blu_gl2_rd - blu_gl2["rd"], "vol": new_blu_gl2_vol - blu_gl2["vol"]}
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -1,944 +0,0 @@
|
||||
# Titan Robotics Team 2022: Data Analysis Module
|
||||
# Written by Arthur Lu & Jacob Levine
|
||||
# Notes:
|
||||
# this should be imported as a python module using 'import analysis'
|
||||
# this should be included in the local directory or environment variable
|
||||
# this module has not been optimized for multhreaded computing
|
||||
# number of easter eggs: 2
|
||||
# setup:
|
||||
|
||||
__version__ = "1.0.9.000"
|
||||
|
||||
# changelog should be viewed using print(analysis.__changelog__)
|
||||
__changelog__ = """changelog:
|
||||
1.0.9.000:
|
||||
- refactored
|
||||
- numpyed everything
|
||||
- removed stats in favor of numpy functions
|
||||
1.0.8.005:
|
||||
- minor fixes
|
||||
1.0.8.004:
|
||||
- removed a few unused dependencies
|
||||
1.0.8.003:
|
||||
- added p_value function
|
||||
1.0.8.002:
|
||||
- updated __all__ correctly to contain changes made in v 1.0.8.000 and v 1.0.8.001
|
||||
1.0.8.001:
|
||||
- refactors
|
||||
- bugfixes
|
||||
1.0.8.000:
|
||||
- depreciated histo_analysis_old
|
||||
- depreciated debug
|
||||
- altered basic_analysis to take array data instead of filepath
|
||||
- refactor
|
||||
- optimization
|
||||
1.0.7.002:
|
||||
- bug fixes
|
||||
1.0.7.001:
|
||||
- bug fixes
|
||||
1.0.7.000:
|
||||
- added tanh_regression (logistical regression)
|
||||
- bug fixes
|
||||
1.0.6.005:
|
||||
- added z_normalize function to normalize dataset
|
||||
- bug fixes
|
||||
1.0.6.004:
|
||||
- bug fixes
|
||||
1.0.6.003:
|
||||
- bug fixes
|
||||
1.0.6.002:
|
||||
- bug fixes
|
||||
1.0.6.001:
|
||||
- corrected __all__ to contain all of the functions
|
||||
1.0.6.000:
|
||||
- added calc_overfit, which calculates two measures of overfit, error and performance
|
||||
- added calculating overfit to optimize_regression
|
||||
1.0.5.000:
|
||||
- added optimize_regression function, which is a sample function to find the optimal regressions
|
||||
- optimize_regression function filters out some overfit funtions (functions with r^2 = 1)
|
||||
- planned addition: overfit detection in the optimize_regression function
|
||||
1.0.4.002:
|
||||
- added __changelog__
|
||||
- updated debug function with log and exponential regressions
|
||||
1.0.4.001:
|
||||
- added log regressions
|
||||
- added exponential regressions
|
||||
- added log_regression and exp_regression to __all__
|
||||
1.0.3.008:
|
||||
- added debug function to further consolidate functions
|
||||
1.0.3.007:
|
||||
- added builtin benchmark function
|
||||
- added builtin random (linear) data generation function
|
||||
- added device initialization (_init_device)
|
||||
1.0.3.006:
|
||||
- reorganized the imports list to be in alphabetical order
|
||||
- added search and regurgitate functions to c_entities, nc_entities, obstacles, objectives
|
||||
1.0.3.005:
|
||||
- major bug fixes
|
||||
- updated historical analysis
|
||||
- depreciated old historical analysis
|
||||
1.0.3.004:
|
||||
- added __version__, __author__, __all__
|
||||
- added polynomial regression
|
||||
- added root mean squared function
|
||||
- added r squared function
|
||||
1.0.3.003:
|
||||
- bug fixes
|
||||
- added c_entities
|
||||
1.0.3.002:
|
||||
- bug fixes
|
||||
- added nc_entities, obstacles, objectives
|
||||
- consolidated statistics.py to analysis.py
|
||||
1.0.3.001:
|
||||
- compiled 1d, column, and row basic stats into basic stats function
|
||||
1.0.3.000:
|
||||
- added historical analysis function
|
||||
1.0.2.xxx:
|
||||
- added z score test
|
||||
1.0.1.xxx:
|
||||
- major bug fixes
|
||||
1.0.0.xxx:
|
||||
- added loading csv
|
||||
- added 1d, column, row basic stats
|
||||
"""
|
||||
|
||||
__author__ = (
|
||||
"Arthur Lu <arthurlu@ttic.edu>, "
|
||||
"Jacob Levine <jlevine@ttic.edu>,"
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'_init_device',
|
||||
'c_entities',
|
||||
'nc_entities',
|
||||
'obstacles',
|
||||
'objectives',
|
||||
'load_csv',
|
||||
'basic_stats',
|
||||
'z_score',
|
||||
'z_normalize',
|
||||
'stdev_z_split',
|
||||
'histo_analysis',
|
||||
'poly_regression',
|
||||
'log_regression',
|
||||
'exp_regression',
|
||||
'r_squared',
|
||||
'rms',
|
||||
'calc_overfit',
|
||||
'strip_data',
|
||||
'optimize_regression',
|
||||
'select_best_regression',
|
||||
'basic_analysis',
|
||||
# all statistics functions left out due to integration in other functions
|
||||
]
|
||||
|
||||
# now back to your regularly scheduled programming:
|
||||
|
||||
# imports (now in alphabetical order! v 1.0.3.006):
|
||||
|
||||
from bisect import bisect_left, bisect_right
|
||||
import collections
|
||||
import csv
|
||||
from decimal import Decimal
|
||||
import functools
|
||||
from fractions import Fraction
|
||||
from itertools import groupby
|
||||
import math
|
||||
import matplotlib
|
||||
import numbers
|
||||
import numpy as np
|
||||
import pandas
|
||||
import random
|
||||
import scipy
|
||||
from scipy.optimize import curve_fit
|
||||
from scipy import stats
|
||||
from sklearn import *
|
||||
# import statistics <-- statistics.py functions have been integrated into analysis.py as of v 1.0.3.002
|
||||
import time
|
||||
import torch
|
||||
|
||||
class error(ValueError):
|
||||
pass
|
||||
|
||||
def _init_device(setting, arg): # initiates computation device for ANNs
|
||||
if setting == "cuda":
|
||||
try:
|
||||
return torch.device(setting + ":" + str(arg) if torch.cuda.is_available() else "cpu")
|
||||
except:
|
||||
raise error("could not assign cuda or cpu")
|
||||
elif setting == "cpu":
|
||||
try:
|
||||
return torch.device("cpu")
|
||||
except:
|
||||
raise error("could not assign cpu")
|
||||
else:
|
||||
raise error("specified device does not exist")
|
||||
|
||||
def load_csv(filepath):
|
||||
with open(filepath, newline='') as csvfile:
|
||||
file_array = np.array(list(csv.reader(csvfile)))
|
||||
csvfile.close()
|
||||
return file_array
|
||||
|
||||
# data=array, mode = ['1d':1d_basic_stats, 'column':c_basic_stats, 'row':r_basic_stats], arg for mode 1 or mode 2 for column or row
|
||||
def basic_stats(data, method, arg):
|
||||
|
||||
if method == 'debug':
|
||||
return "basic_stats requires 3 args: data, mode, arg; where data is data to be analyzed, mode is an int from 0 - 2 depending on type of analysis (by column or by row) and is only applicable to 2d arrays (for 1d arrays use mode 1), and arg is row/column number for mode 1 or mode 2; function returns: [mean, median, mode, stdev, variance]"
|
||||
|
||||
if method == "1d" or method == 0:
|
||||
|
||||
data_t = np.array(data).astype(float)
|
||||
|
||||
_mean = mean(data_t)
|
||||
_median = median(data_t)
|
||||
try:
|
||||
_mode = mode(data_t)
|
||||
except:
|
||||
_mode = None
|
||||
try:
|
||||
_stdev = stdev(data_t)
|
||||
except:
|
||||
_stdev = None
|
||||
try:
|
||||
_variance = variance(data_t)
|
||||
except:
|
||||
_variance = None
|
||||
|
||||
return _mean, _median, _mode, _stdev, _variance
|
||||
"""
|
||||
elif method == "column" or method == 1:
|
||||
|
||||
c_data = []
|
||||
c_data_sorted = []
|
||||
|
||||
for i in data:
|
||||
try:
|
||||
c_data.append(float(i[arg]))
|
||||
except:
|
||||
pass
|
||||
|
||||
_mean = mean(c_data)
|
||||
_median = median(c_data)
|
||||
try:
|
||||
_mode = mode(c_data)
|
||||
except:
|
||||
_mode = None
|
||||
try:
|
||||
_stdev = stdev(c_data)
|
||||
except:
|
||||
_stdev = None
|
||||
try:
|
||||
_variance = variance(c_data)
|
||||
except:
|
||||
_variance = None
|
||||
|
||||
return _mean, _median, _mode, _stdev, _variance
|
||||
|
||||
elif method == "row" or method == 2:
|
||||
|
||||
r_data = []
|
||||
|
||||
for i in range(len(data[arg])):
|
||||
r_data.append(float(data[arg][i]))
|
||||
|
||||
_mean = mean(r_data)
|
||||
_median = median(r_data)
|
||||
try:
|
||||
_mode = mode(r_data)
|
||||
except:
|
||||
_mode = None
|
||||
try:
|
||||
_stdev = stdev(r_data)
|
||||
except:
|
||||
_stdev = None
|
||||
try:
|
||||
_variance = variance(r_data)
|
||||
except:
|
||||
_variance = None
|
||||
|
||||
return _mean, _median, _mode, _stdev, _variance
|
||||
|
||||
else:
|
||||
raise error("method error")
|
||||
"""
|
||||
|
||||
|
||||
# returns z score with inputs of point, mean and standard deviation of spread
|
||||
def z_score(point, mean, stdev):
|
||||
score = (point - mean) / stdev
|
||||
return score
|
||||
|
||||
# mode is either 'x' or 'y' or 'both' depending on the variable(s) to be normalized
|
||||
def z_normalize(x, y, mode):
|
||||
|
||||
x_norm = np.array().astype(float)
|
||||
y_norm = np.array().astype(float)
|
||||
|
||||
mean = 0
|
||||
stdev = 0
|
||||
|
||||
if mode == 'x':
|
||||
_mean, _median, _mode, _stdev, _variance = basic_stats(x, "1d", 0)
|
||||
|
||||
for i in range(0, len(x), 1):
|
||||
x_norm.append(z_score(x[i], _mean, _stdev))
|
||||
|
||||
return x_norm, y
|
||||
|
||||
if mode == 'y':
|
||||
_mean, _median, _mode, _stdev, _variance = basic_stats(y, "1d", 0)
|
||||
|
||||
for i in range(0, len(y), 1):
|
||||
y_norm.append(z_score(y[i], _mean, _stdev))
|
||||
|
||||
return x, y_norm
|
||||
|
||||
if mode == 'both':
|
||||
_mean, _median, _mode, _stdev, _variance = basic_stats(x, "1d", 0)
|
||||
|
||||
for i in range(0, len(x), 1):
|
||||
x_norm.append(z_score(x[i], _mean, _stdev))
|
||||
|
||||
_mean, _median, _mode, _stdev, _variance = basic_stats(y, "1d", 0)
|
||||
|
||||
for i in range(0, len(y), 1):
|
||||
y_norm.append(z_score(y[i], _mean, _stdev))
|
||||
|
||||
return x_norm, y_norm
|
||||
|
||||
else:
|
||||
|
||||
return error('method error')
|
||||
|
||||
|
||||
# returns n-th percentile of spread given mean, standard deviation, lower z-score, and upper z-score
|
||||
def stdev_z_split(mean, stdev, delta, low_bound, high_bound):
|
||||
|
||||
z_split = np.array().astype(float)
|
||||
i = low_bound
|
||||
|
||||
while True:
|
||||
z_split.append(float((1 / (stdev * math.sqrt(2 * math.pi))) *
|
||||
math.e ** (-0.5 * (((i - mean) / stdev) ** 2))))
|
||||
i = i + delta
|
||||
if i > high_bound:
|
||||
break
|
||||
|
||||
return z_split
|
||||
|
||||
|
||||
def histo_analysis(hist_data, delta, low_bound, high_bound):
|
||||
|
||||
if hist_data == 'debug':
|
||||
return ('returns list of predicted values based on historical data; input delta for delta step in z-score and lower and higher bounds in number of standard deviations')
|
||||
|
||||
derivative = []
|
||||
|
||||
for i in range(0, len(hist_data), 1):
|
||||
try:
|
||||
derivative.append(float(hist_data[i - 1]) - float(hist_data[i]))
|
||||
except:
|
||||
pass
|
||||
|
||||
derivative_sorted = sorted(derivative, key=int)
|
||||
mean_derivative = basic_stats(derivative_sorted, "1d", 0)[0]
|
||||
stdev_derivative = basic_stats(derivative_sorted, "1d", 0)[3]
|
||||
|
||||
predictions = []
|
||||
pred_change = 0
|
||||
|
||||
i = low_bound
|
||||
|
||||
while True:
|
||||
if i > high_bound:
|
||||
break
|
||||
|
||||
try:
|
||||
pred_change = mean_derivative + i * stdev_derivative
|
||||
except:
|
||||
pred_change = mean_derivative
|
||||
|
||||
predictions.append(float(hist_data[-1:][0]) + pred_change)
|
||||
|
||||
i = i + delta
|
||||
|
||||
return predictions
|
||||
|
||||
|
||||
def poly_regression(x, y, power):
|
||||
|
||||
if x == "null": # if x is 'null', then x will be filled with integer points between 1 and the size of y
|
||||
x = []
|
||||
|
||||
for i in range(len(y)):
|
||||
print(i)
|
||||
x.append(i + 1)
|
||||
|
||||
reg_eq = scipy.polyfit(x, y, deg=power)
|
||||
eq_str = ""
|
||||
|
||||
for i in range(0, len(reg_eq), 1):
|
||||
if i < len(reg_eq) - 1:
|
||||
eq_str = eq_str + str(reg_eq[i]) + \
|
||||
"*(z**" + str(len(reg_eq) - i - 1) + ")+"
|
||||
else:
|
||||
eq_str = eq_str + str(reg_eq[i]) + \
|
||||
"*(z**" + str(len(reg_eq) - i - 1) + ")"
|
||||
|
||||
vals = []
|
||||
|
||||
for i in range(0, len(x), 1):
|
||||
z = x[i]
|
||||
|
||||
try:
|
||||
exec("vals.append(" + eq_str + ")")
|
||||
except:
|
||||
pass
|
||||
|
||||
_rms = rms(vals, y)
|
||||
r2_d2 = r_squared(vals, y)
|
||||
|
||||
return [eq_str, _rms, r2_d2]
|
||||
|
||||
|
||||
def log_regression(x, y, base):
|
||||
|
||||
x_fit = []
|
||||
|
||||
for i in range(len(x)):
|
||||
try:
|
||||
# change of base for logs
|
||||
x_fit.append(np.log(x[i]) / np.log(base))
|
||||
except:
|
||||
pass
|
||||
|
||||
# y = reg_eq[0] * log(x, base) + reg_eq[1]
|
||||
reg_eq = np.polyfit(x_fit, y, 1)
|
||||
q_str = str(reg_eq[0]) + "* (np.log(z) / np.log(" + \
|
||||
str(base) + "))+" + str(reg_eq[1])
|
||||
vals = []
|
||||
|
||||
for i in range(len(x)):
|
||||
z = x[i]
|
||||
|
||||
try:
|
||||
exec("vals.append(" + eq_str + ")")
|
||||
except:
|
||||
pass
|
||||
|
||||
_rms = rms(vals, y)
|
||||
r2_d2 = r_squared(vals, y)
|
||||
|
||||
return eq_str, _rms, r2_d2
|
||||
|
||||
|
||||
def exp_regression(x, y, base):
|
||||
|
||||
y_fit = []
|
||||
|
||||
for i in range(len(y)):
|
||||
try:
|
||||
# change of base for logs
|
||||
y_fit.append(np.log(y[i]) / np.log(base))
|
||||
except:
|
||||
pass
|
||||
|
||||
# y = base ^ (reg_eq[0] * x) * base ^ (reg_eq[1])
|
||||
reg_eq = np.polyfit(x, y_fit, 1, w=np.sqrt(y_fit))
|
||||
eq_str = "(" + str(base) + "**(" + \
|
||||
str(reg_eq[0]) + "*z))*(" + str(base) + "**(" + str(reg_eq[1]) + "))"
|
||||
vals = []
|
||||
|
||||
for i in range(len(x)):
|
||||
z = x[i]
|
||||
|
||||
try:
|
||||
exec("vals.append(" + eq_str + ")")
|
||||
except:
|
||||
pass
|
||||
|
||||
_rms = rms(vals, y)
|
||||
r2_d2 = r_squared(vals, y)
|
||||
|
||||
return eq_str, _rms, r2_d2
|
||||
|
||||
|
||||
def tanh_regression(x, y):
|
||||
|
||||
def tanh(x, a, b, c, d):
|
||||
|
||||
return a * np.tanh(b * (x - c)) + d
|
||||
|
||||
reg_eq = np.float64(curve_fit(tanh, np.array(x), np.array(y))[0]).tolist()
|
||||
eq_str = str(reg_eq[0]) + " * np.tanh(" + str(reg_eq[1]) + \
|
||||
"*(z - " + str(reg_eq[2]) + ")) + " + str(reg_eq[3])
|
||||
vals = []
|
||||
|
||||
for i in range(len(x)):
|
||||
z = x[i]
|
||||
try:
|
||||
exec("vals.append(" + eq_str + ")")
|
||||
except:
|
||||
pass
|
||||
|
||||
_rms = rms(vals, y)
|
||||
r2_d2 = r_squared(vals, y)
|
||||
|
||||
return eq_str, _rms, r2_d2
|
||||
|
||||
|
||||
def r_squared(predictions, targets): # assumes equal size inputs
|
||||
|
||||
return metrics.r2_score(np.array(targets), np.array(predictions))
|
||||
|
||||
|
||||
def rms(predictions, targets): # assumes equal size inputs
|
||||
|
||||
_sum = 0
|
||||
|
||||
for i in range(0, len(targets), 1):
|
||||
_sum = (targets[i] - predictions[i]) ** 2
|
||||
|
||||
return float(math.sqrt(_sum / len(targets)))
|
||||
|
||||
|
||||
def calc_overfit(equation, rms_train, r2_train, x_test, y_test):
|
||||
|
||||
# performance overfit = performance(train) - performance(test) where performance is r^2
|
||||
# error overfit = error(train) - error(test) where error is rms; biased towards smaller values
|
||||
|
||||
vals = []
|
||||
|
||||
for i in range(0, len(x_test), 1):
|
||||
|
||||
z = x_test[i]
|
||||
|
||||
exec("vals.append(" + equation + ")")
|
||||
|
||||
r2_test = r_squared(vals, y_test)
|
||||
rms_test = rms(vals, y_test)
|
||||
|
||||
return r2_train - r2_test
|
||||
|
||||
|
||||
def strip_data(data, mode):
|
||||
|
||||
if mode == "adam": # x is the row number, y are the data
|
||||
pass
|
||||
|
||||
if mode == "eve": # x are the data, y is the column number
|
||||
pass
|
||||
|
||||
else:
|
||||
raise error("mode error")
|
||||
|
||||
|
||||
# _range in poly regression is the range of powers tried, and in log/exp it is the inverse of the stepsize taken from -1000 to 1000
|
||||
def optimize_regression(x, y, _range, resolution):
|
||||
# usage not: for demonstration purpose only, performance is shit
|
||||
if type(resolution) != int:
|
||||
raise error("resolution must be int")
|
||||
|
||||
x_train = x
|
||||
y_train = []
|
||||
|
||||
for i in range(len(y)):
|
||||
y_train.append(float(y[i]))
|
||||
|
||||
x_test = []
|
||||
y_test = []
|
||||
|
||||
for i in range(0, math.floor(len(x) * 0.5), 1):
|
||||
index = random.randint(0, len(x) - 1)
|
||||
|
||||
x_test.append(x[index])
|
||||
y_test.append(float(y[index]))
|
||||
|
||||
x_train.pop(index)
|
||||
y_train.pop(index)
|
||||
|
||||
#print(x_train, x_test)
|
||||
#print(y_train, y_test)
|
||||
|
||||
eqs = []
|
||||
rmss = []
|
||||
r2s = []
|
||||
|
||||
for i in range(0, _range + 1, 1):
|
||||
try:
|
||||
x, y, z = poly_regression(x_train, y_train, i)
|
||||
eqs.append(x)
|
||||
rmss.append(y)
|
||||
r2s.append(z)
|
||||
except:
|
||||
pass
|
||||
|
||||
for i in range(1, 100 * resolution + 1):
|
||||
try:
|
||||
x, y, z = exp_regression(x_train, y_train, float(i / resolution))
|
||||
eqs.append(x)
|
||||
rmss.append(y)
|
||||
r2s.append(z)
|
||||
except:
|
||||
pass
|
||||
|
||||
for i in range(1, 100 * resolution + 1):
|
||||
try:
|
||||
x, y, z = log_regression(x_train, y_train, float(i / resolution))
|
||||
eqs.append(x)
|
||||
rmss.append(y)
|
||||
r2s.append(z)
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
x, y, z = tanh_regression(x_train, y_train)
|
||||
|
||||
eqs.append(x)
|
||||
rmss.append(y)
|
||||
r2s.append(z)
|
||||
except:
|
||||
pass
|
||||
|
||||
# marks all equations where r2 = 1 as they 95% of the time overfit the data
|
||||
for i in range(0, len(eqs), 1):
|
||||
if r2s[i] == 1:
|
||||
eqs[i] = ""
|
||||
rmss[i] = ""
|
||||
r2s[i] = ""
|
||||
|
||||
while True: # removes all equations marked for removal
|
||||
try:
|
||||
eqs.remove('')
|
||||
rmss.remove('')
|
||||
r2s.remove('')
|
||||
except:
|
||||
break
|
||||
|
||||
overfit = []
|
||||
|
||||
for i in range(0, len(eqs), 1):
|
||||
|
||||
overfit.append(calc_overfit(eqs[i], rmss[i], r2s[i], x_test, y_test))
|
||||
|
||||
return eqs, rmss, r2s, overfit
|
||||
|
||||
|
||||
def select_best_regression(eqs, rmss, r2s, overfit, selector):
|
||||
|
||||
b_eq = ""
|
||||
b_rms = 0
|
||||
b_r2 = 0
|
||||
b_overfit = 0
|
||||
|
||||
ind = 0
|
||||
|
||||
if selector == "min_overfit":
|
||||
|
||||
ind = np.argmin(overfit)
|
||||
|
||||
b_eq = eqs[ind]
|
||||
b_rms = rmss[ind]
|
||||
b_r2 = r2s[ind]
|
||||
b_overfit = overfit[ind]
|
||||
|
||||
if selector == "max_r2s":
|
||||
|
||||
ind = np.argmax(r2s)
|
||||
b_eq = eqs[ind]
|
||||
b_rms = rmss[ind]
|
||||
b_r2 = r2s[ind]
|
||||
b_overfit = overfit[ind]
|
||||
|
||||
return b_eq, b_rms, b_r2, b_overfit
|
||||
|
||||
|
||||
def p_value(x, y): # takes 2 1d arrays
|
||||
|
||||
return stats.ttest_ind(x, y)[1]
|
||||
|
||||
|
||||
# assumes that rows are the independent variable and columns are the dependant. also assumes that time flows from lowest column to highest column.
|
||||
def basic_analysis(data):
|
||||
|
||||
row = len(data)
|
||||
column = []
|
||||
|
||||
for i in range(0, row, 1):
|
||||
column.append(len(data[i]))
|
||||
|
||||
column_max = max(column)
|
||||
row_b_stats = []
|
||||
row_histo = []
|
||||
|
||||
for i in range(0, row, 1):
|
||||
row_b_stats.append(basic_stats(data, "row", i))
|
||||
row_histo.append(histo_analysis(data[i], 0.67449, -0.67449, 0.67449))
|
||||
|
||||
column_b_stats = []
|
||||
|
||||
for i in range(0, column_max, 1):
|
||||
column_b_stats.append(basic_stats(data, "column", i))
|
||||
|
||||
return[row_b_stats, column_b_stats, row_histo]
|
||||
|
||||
|
||||
def benchmark(x, y):
|
||||
|
||||
start_g = time.time()
|
||||
generate_data("data/data.csv", x, y, -10, 10)
|
||||
end_g = time.time()
|
||||
|
||||
start_a = time.time()
|
||||
basic_analysis("data/data.csv")
|
||||
end_a = time.time()
|
||||
|
||||
return [(end_g - start_g), (end_a - start_a)]
|
||||
|
||||
|
||||
def generate_data(filename, x, y, low, high):
|
||||
|
||||
file = open(filename, "w")
|
||||
|
||||
for i in range(0, y, 1):
|
||||
temp = ""
|
||||
|
||||
for j in range(0, x - 1, 1):
|
||||
temp = str(random.uniform(low, high)) + "," + temp
|
||||
|
||||
temp = temp + str(random.uniform(low, high))
|
||||
file.write(temp + "\n")
|
||||
|
||||
def mean(data):
|
||||
|
||||
return np.mean(data)
|
||||
|
||||
def median(data):
|
||||
|
||||
return np.median(data)
|
||||
|
||||
def mode(data):
|
||||
|
||||
return np.argmax(np.bincount(data))
|
||||
|
||||
def stdev(data):
|
||||
|
||||
return np.std(data)
|
||||
|
||||
def variance(data):
|
||||
|
||||
return np.var(data)
|
||||
|
||||
"""
|
||||
|
||||
class StatisticsError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
def _sum(data, start=0):
|
||||
count = 0
|
||||
n, d = _exact_ratio(start)
|
||||
partials = {d: n}
|
||||
partials_get = partials.get
|
||||
T = _coerce(int, type(start))
|
||||
for typ, values in groupby(data, type):
|
||||
T = _coerce(T, typ) # or raise TypeError
|
||||
for n, d in map(_exact_ratio, values):
|
||||
count += 1
|
||||
partials[d] = partials_get(d, 0) + n
|
||||
if None in partials:
|
||||
|
||||
total = partials[None]
|
||||
assert not _isfinite(total)
|
||||
else:
|
||||
|
||||
total = sum(Fraction(n, d) for d, n in sorted(partials.items()))
|
||||
return (T, total, count)
|
||||
|
||||
|
||||
def _isfinite(x):
|
||||
try:
|
||||
return x.is_finite() # Likely a Decimal.
|
||||
except AttributeError:
|
||||
return math.isfinite(x) # Coerces to float first.
|
||||
|
||||
|
||||
def _coerce(T, S):
|
||||
|
||||
assert T is not bool, "initial type T is bool"
|
||||
|
||||
if T is S:
|
||||
return T
|
||||
|
||||
if S is int or S is bool:
|
||||
return T
|
||||
if T is int:
|
||||
return S
|
||||
|
||||
if issubclass(S, T):
|
||||
return S
|
||||
if issubclass(T, S):
|
||||
return T
|
||||
|
||||
if issubclass(T, int):
|
||||
return S
|
||||
if issubclass(S, int):
|
||||
return T
|
||||
|
||||
if issubclass(T, Fraction) and issubclass(S, float):
|
||||
return S
|
||||
if issubclass(T, float) and issubclass(S, Fraction):
|
||||
return T
|
||||
|
||||
msg = "don't know how to coerce %s and %s"
|
||||
raise TypeError(msg % (T.__name__, S.__name__))
|
||||
|
||||
|
||||
def _exact_ratio(x):
|
||||
|
||||
try:
|
||||
|
||||
if type(x) is float or type(x) is Decimal:
|
||||
return x.as_integer_ratio()
|
||||
try:
|
||||
|
||||
return (x.numerator, x.denominator)
|
||||
except AttributeError:
|
||||
try:
|
||||
|
||||
return x.as_integer_ratio()
|
||||
except AttributeError:
|
||||
|
||||
pass
|
||||
except (OverflowError, ValueError):
|
||||
|
||||
assert not _isfinite(x)
|
||||
return (x, None)
|
||||
msg = "can't convert type '{}' to numerator/denominator"
|
||||
raise TypeError(msg.format(type(x).__name__))
|
||||
|
||||
|
||||
def _convert(value, T):
|
||||
|
||||
if type(value) is T:
|
||||
|
||||
return value
|
||||
if issubclass(T, int) and value.denominator != 1:
|
||||
T = float
|
||||
try:
|
||||
|
||||
return T(value)
|
||||
except TypeError:
|
||||
if issubclass(T, Decimal):
|
||||
return T(value.numerator) / T(value.denominator)
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
def _counts(data):
|
||||
|
||||
table = collections.Counter(iter(data)).most_common()
|
||||
if not table:
|
||||
return table
|
||||
|
||||
maxfreq = table[0][1]
|
||||
for i in range(1, len(table)):
|
||||
if table[i][1] != maxfreq:
|
||||
table = table[:i]
|
||||
break
|
||||
return table
|
||||
|
||||
|
||||
def _find_lteq(a, x):
|
||||
|
||||
i = bisect_left(a, x)
|
||||
if i != len(a) and a[i] == x:
|
||||
return i
|
||||
raise ValueError
|
||||
|
||||
|
||||
def _find_rteq(a, l, x):
|
||||
|
||||
i = bisect_right(a, x, lo=l)
|
||||
if i != (len(a) + 1) and a[i - 1] == x:
|
||||
return i - 1
|
||||
raise ValueError
|
||||
|
||||
|
||||
def _fail_neg(values, errmsg='negative value'):
|
||||
|
||||
for x in values:
|
||||
if x < 0:
|
||||
raise StatisticsError(errmsg)
|
||||
yield x
|
||||
def mean(data):
|
||||
|
||||
if iter(data) is data:
|
||||
data = list(data)
|
||||
n = len(data)
|
||||
if n < 1:
|
||||
raise StatisticsError('mean requires at least one data point')
|
||||
T, total, count = _sum(data)
|
||||
assert count == n
|
||||
return _convert(total / n, T)
|
||||
|
||||
|
||||
def median(data):
|
||||
|
||||
data = sorted(data)
|
||||
n = len(data)
|
||||
if n == 0:
|
||||
raise StatisticsError("no median for empty data")
|
||||
if n % 2 == 1:
|
||||
return data[n // 2]
|
||||
else:
|
||||
i = n // 2
|
||||
return (data[i - 1] + data[i]) / 2
|
||||
|
||||
|
||||
def mode(data):
|
||||
|
||||
table = _counts(data)
|
||||
if len(table) == 1:
|
||||
return table[0][0]
|
||||
elif table:
|
||||
raise StatisticsError(
|
||||
'no unique mode; found %d equally common values' % len(table)
|
||||
)
|
||||
else:
|
||||
raise StatisticsError('no mode for empty data')
|
||||
|
||||
|
||||
def _ss(data, c=None):
|
||||
|
||||
if c is None:
|
||||
c = mean(data)
|
||||
T, total, count = _sum((x - c)**2 for x in data)
|
||||
|
||||
U, total2, count2 = _sum((x - c) for x in data)
|
||||
assert T == U and count == count2
|
||||
total -= total2**2 / len(data)
|
||||
assert not total < 0, 'negative sum of square deviations: %f' % total
|
||||
return (T, total)
|
||||
|
||||
|
||||
def variance(data, xbar=None):
|
||||
|
||||
if iter(data) is data:
|
||||
data = list(data)
|
||||
n = len(data)
|
||||
if n < 2:
|
||||
raise StatisticsError('variance requires at least two data points')
|
||||
T, ss = _ss(data, xbar)
|
||||
return _convert(ss / (n - 1), T)
|
||||
|
||||
|
||||
def stdev(data, xbar=None):
|
||||
|
||||
var = variance(data, xbar)
|
||||
try:
|
||||
return var.sqrt()
|
||||
except AttributeError:
|
||||
return math.sqrt(var)
|
||||
"""
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@@ -1,2 +0,0 @@
|
||||
python setup.py build_ext --inplace
|
||||
pause
|
@@ -1 +0,0 @@
|
||||
python setup.py build_ext --inplace
|
@@ -1,5 +0,0 @@
|
||||
from distutils.core import setup
|
||||
from Cython.Build import cythonize
|
||||
|
||||
setup(name='analysis',
|
||||
ext_modules=cythonize("analysis.py"))
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1 +0,0 @@
|
||||
[{"outputType":{"type":"APK"},"apkInfo":{"type":"MAIN","splits":[],"versionCode":1,"versionName":"1.0","enabled":true,"outputFile":"app-debug.apk","fullName":"debug","baseName":"debug"},"path":"app-debug.apk","properties":{}}]
|
13
dep/2019/apps/android/source/.gitignore
vendored
13
dep/2019/apps/android/source/.gitignore
vendored
@@ -1,13 +0,0 @@
|
||||
*.iml
|
||||
.gradle
|
||||
/local.properties
|
||||
/.idea/caches
|
||||
/.idea/libraries
|
||||
/.idea/modules.xml
|
||||
/.idea/workspace.xml
|
||||
/.idea/navEditor.xml
|
||||
/.idea/assetWizardSettings.xml
|
||||
.DS_Store
|
||||
/build
|
||||
/captures
|
||||
.externalNativeBuild
|
@@ -1,29 +0,0 @@
|
||||
<component name="ProjectCodeStyleConfiguration">
|
||||
<code_scheme name="Project" version="173">
|
||||
<Objective-C-extensions>
|
||||
<file>
|
||||
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Import" />
|
||||
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Macro" />
|
||||
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Typedef" />
|
||||
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Enum" />
|
||||
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Constant" />
|
||||
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Global" />
|
||||
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Struct" />
|
||||
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="FunctionPredecl" />
|
||||
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Function" />
|
||||
</file>
|
||||
<class>
|
||||
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Property" />
|
||||
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Synthesize" />
|
||||
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="InitMethod" />
|
||||
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="StaticMethod" />
|
||||
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="InstanceMethod" />
|
||||
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="DeallocMethod" />
|
||||
</class>
|
||||
<extensions>
|
||||
<pair source="cpp" header="h" fileNamingConvention="NONE" />
|
||||
<pair source="c" header="h" fileNamingConvention="NONE" />
|
||||
</extensions>
|
||||
</Objective-C-extensions>
|
||||
</code_scheme>
|
||||
</component>
|
18
dep/2019/apps/android/source/.idea/gradle.xml
generated
18
dep/2019/apps/android/source/.idea/gradle.xml
generated
@@ -1,18 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="GradleSettings">
|
||||
<option name="linkedExternalProjectsSettings">
|
||||
<GradleProjectSettings>
|
||||
<option name="distributionType" value="DEFAULT_WRAPPED" />
|
||||
<option name="externalProjectPath" value="$PROJECT_DIR$" />
|
||||
<option name="modules">
|
||||
<set>
|
||||
<option value="$PROJECT_DIR$" />
|
||||
<option value="$PROJECT_DIR$/app" />
|
||||
</set>
|
||||
</option>
|
||||
<option name="resolveModulePerSourceSet" value="false" />
|
||||
</GradleProjectSettings>
|
||||
</option>
|
||||
</component>
|
||||
</project>
|
9
dep/2019/apps/android/source/.idea/misc.xml
generated
9
dep/2019/apps/android/source/.idea/misc.xml
generated
@@ -1,9 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_11" project-jdk-name="11" project-jdk-type="JavaSDK">
|
||||
<output url="file://$PROJECT_DIR$/build/classes" />
|
||||
</component>
|
||||
<component name="ProjectType">
|
||||
<option name="id" value="Android" />
|
||||
</component>
|
||||
</project>
|
@@ -1,12 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="RunConfigurationProducerService">
|
||||
<option name="ignoredProducers">
|
||||
<set>
|
||||
<option value="org.jetbrains.plugins.gradle.execution.test.runner.AllInPackageGradleConfigurationProducer" />
|
||||
<option value="org.jetbrains.plugins.gradle.execution.test.runner.TestClassGradleConfigurationProducer" />
|
||||
<option value="org.jetbrains.plugins.gradle.execution.test.runner.TestMethodGradleConfigurationProducer" />
|
||||
</set>
|
||||
</option>
|
||||
</component>
|
||||
</project>
|
6
dep/2019/apps/android/source/.idea/vcs.xml
generated
6
dep/2019/apps/android/source/.idea/vcs.xml
generated
@@ -1,6 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$/../../.." vcs="Git" />
|
||||
</component>
|
||||
</project>
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user