2019-04-09 14:30:37 +00:00
# Titan Robotics Team 2022: Data Analysis Module
# Written by Arthur Lu & Jacob Levine
# Notes:
2019-10-29 14:41:49 +00:00
# this should be imported as a python module using 'import analysis'
# this should be included in the local directory or environment variable
# this module has been optimized for multhreaded computing
# current benchmark of optimization: 1.33 times faster
2019-04-09 14:30:37 +00:00
# setup:
2019-11-08 18:26:42 +00:00
__version__ = " 1.1.9.002 "
2019-04-09 14:30:37 +00:00
# changelog should be viewed using print(analysis.__changelog__)
__changelog__ = """ changelog:
2019-11-08 18:26:42 +00:00
1.1 .9 .002 :
- kernelized PCA and KNN
2019-11-06 21:32:21 +00:00
1.1 .9 .001 :
- fixed bugs with SVM and NaiveBayes
2019-11-06 21:26:13 +00:00
1.1 .9 .000 :
- added SVM class , subclasses , and functions
- note : untested
2019-11-05 19:38:49 +00:00
1.1 .8 .000 :
- added NaiveBayes classification engine
- note : untested
2019-11-05 19:14:08 +00:00
1.1 .7 .000 :
- added knn ( )
- added confusion matrix to decisiontree ( )
2019-11-05 18:56:53 +00:00
1.1 .6 .002 :
- changed layout of __changelog to be vscode friendly
1.1 .6 .001 :
- added additional hyperparameters to decisiontree ( )
1.1 .6 .000 :
- fixed __version__
- fixed __all__ order
- added decisiontree ( )
1.1 .5 .003 :
- added pca
1.1 .5 .002 :
- reduced import list
- added kmeans clustering engine
1.1 .5 .001 :
- simplified regression by using . to ( device )
1.1 .5 .000 :
- added polynomial regression to regression ( ) ; untested
1.1 .4 .000 :
- added trueskill ( )
1.1 .3 .002 :
- renamed regression class to Regression , regression_engine ( ) to regression gliko2_engine class to Gliko2
1.1 .3 .001 :
- changed glicko2 ( ) to return tuple instead of array
1.1 .3 .000 :
- added glicko2_engine class and glicko ( )
- verified glicko2 ( ) accuracy
1.1 .2 .003 :
- fixed elo ( )
1.1 .2 .002 :
- added elo ( )
- elo ( ) has bugs to be fixed
1.1 .2 .001 :
- readded regrression import
1.1 .2 .000 :
- integrated regression . py as regression class
- removed regression import
- fixed metadata for regression class
- fixed metadata for analysis class
1.1 .1 .001 :
- regression_engine ( ) bug fixes , now actaully regresses
1.1 .1 .000 :
- added regression_engine ( )
- added all regressions except polynomial
1.1 .0 .007 :
- updated _init_device ( )
1.1 .0 .006 :
- removed useless try statements
1.1 .0 .005 :
- removed impossible outcomes
1.1 .0 .004 :
- added performance metrics ( r ^ 2 , mse , rms )
1.1 .0 .003 :
- resolved nopython mode for mean , median , stdev , variance
1.1 .0 .002 :
- snapped ( removed ) majority of uneeded imports
- forced object mode ( bad ) on all jit
- TODO : stop numba complaining about not being able to compile in nopython mode
1.1 .0 .001 :
- removed from sklearn import * to resolve uneeded wildcard imports
1.1 .0 .000 :
- removed c_entities , nc_entities , obstacles , objectives from __all__
- applied numba . jit to all functions
- depreciated and removed stdev_z_split
- cleaned up histo_analysis to include numpy and numba . jit optimizations
- depreciated and removed all regression functions in favor of future pytorch optimizer
- depreciated and removed all nonessential functions ( basic_analysis , benchmark , strip_data )
- optimized z_normalize using sklearn . preprocessing . normalize
- TODO : implement kernel / function based pytorch regression optimizer
1.0 .9 .000 :
- refactored
- numpyed everything
- removed stats in favor of numpy functions
1.0 .8 .005 :
- minor fixes
1.0 .8 .004 :
- removed a few unused dependencies
1.0 .8 .003 :
- added p_value function
1.0 .8 .002 :
- updated __all__ correctly to contain changes made in v 1.0 .8 .000 and v 1.0 .8 .001
1.0 .8 .001 :
- refactors
- bugfixes
1.0 .8 .000 :
- depreciated histo_analysis_old
- depreciated debug
- altered basic_analysis to take array data instead of filepath
- refactor
- optimization
1.0 .7 .002 :
- bug fixes
1.0 .7 .001 :
- bug fixes
1.0 .7 .000 :
- added tanh_regression ( logistical regression )
- bug fixes
1.0 .6 .005 :
- added z_normalize function to normalize dataset
- bug fixes
1.0 .6 .004 :
- bug fixes
1.0 .6 .003 :
- bug fixes
1.0 .6 .002 :
- bug fixes
1.0 .6 .001 :
- corrected __all__ to contain all of the functions
1.0 .6 .000 :
- added calc_overfit , which calculates two measures of overfit , error and performance
- added calculating overfit to optimize_regression
1.0 .5 .000 :
- added optimize_regression function , which is a sample function to find the optimal regressions
- optimize_regression function filters out some overfit funtions ( functions with r ^ 2 = 1 )
- planned addition : overfit detection in the optimize_regression function
1.0 .4 .002 :
- added __changelog__
- updated debug function with log and exponential regressions
1.0 .4 .001 :
- added log regressions
- added exponential regressions
- added log_regression and exp_regression to __all__
1.0 .3 .008 :
- added debug function to further consolidate functions
1.0 .3 .007 :
- added builtin benchmark function
- added builtin random ( linear ) data generation function
- added device initialization ( _init_device )
1.0 .3 .006 :
- reorganized the imports list to be in alphabetical order
- added search and regurgitate functions to c_entities , nc_entities , obstacles , objectives
1.0 .3 .005 :
- major bug fixes
- updated historical analysis
- depreciated old historical analysis
1.0 .3 .004 :
- added __version__ , __author__ , __all__
- added polynomial regression
- added root mean squared function
- added r squared function
1.0 .3 .003 :
- bug fixes
- added c_entities
1.0 .3 .002 :
- bug fixes
- added nc_entities , obstacles , objectives
- consolidated statistics . py to analysis . py
1.0 .3 .001 :
- compiled 1 d , column , and row basic stats into basic stats function
1.0 .3 .000 :
- added historical analysis function
1.0 .2 . xxx :
- added z score test
1.0 .1 . xxx :
- major bug fixes
1.0 .0 . xxx :
- added loading csv
- added 1 d , column , row basic stats
2019-04-09 14:30:37 +00:00
"""
__author__ = (
2019-09-30 21:02:32 +00:00
" Arthur Lu <learthurgo@gmail.com> " ,
" Jacob Levine <jlevine@imsa.edu> " ,
2019-04-09 14:30:37 +00:00
)
__all__ = [
' _init_device ' ,
' load_csv ' ,
' basic_stats ' ,
' z_score ' ,
' z_normalize ' ,
' histo_analysis ' ,
2019-10-04 15:34:31 +00:00
' regression ' ,
' elo ' ,
' gliko2 ' ,
2019-10-05 21:53:03 +00:00
' trueskill ' ,
2019-09-16 16:11:27 +00:00
' r_squared ' ,
' mse ' ,
' rms ' ,
2019-11-05 18:47:04 +00:00
' kmeans ' ,
' pca ' ,
' decisiontree ' ,
2019-11-05 22:25:53 +00:00
' knn ' ,
2019-11-06 21:32:21 +00:00
' NaiveBayes ' ,
2019-11-06 21:33:56 +00:00
' SVM ' ,
2019-10-04 15:34:31 +00:00
' Regression ' ,
2019-11-04 16:08:28 +00:00
' Gliko2 ' ,
2019-04-09 14:30:37 +00:00
# all statistics functions left out due to integration in other functions
]
# now back to your regularly scheduled programming:
# imports (now in alphabetical order! v 1.0.3.006):
import csv
2019-09-13 16:14:13 +00:00
import numba
from numba import jit
2019-04-09 14:30:37 +00:00
import numpy as np
2019-10-08 14:30:07 +00:00
import math
2019-10-05 21:18:49 +00:00
try :
2019-10-05 21:53:03 +00:00
from analysis import trueskill as Trueskill
2019-10-05 21:18:49 +00:00
except :
2019-10-05 21:53:03 +00:00
import trueskill as Trueskill
2019-10-25 14:50:02 +00:00
import sklearn
from sklearn import *
2019-09-22 02:10:22 +00:00
import torch
2019-04-09 14:30:37 +00:00
class error ( ValueError ) :
pass
2019-09-25 19:11:20 +00:00
def _init_device ( ) : # initiates computation device for ANNs
device = ' cuda:0 ' if torch . cuda . is_available ( ) else ' cpu '
return device
2019-04-09 14:30:37 +00:00
2019-09-13 18:59:13 +00:00
@jit ( forceobj = True )
2019-04-09 14:30:37 +00:00
def load_csv ( filepath ) :
with open ( filepath , newline = ' ' ) as csvfile :
2019-04-09 14:43:42 +00:00
file_array = np . array ( list ( csv . reader ( csvfile ) ) )
2019-04-09 14:30:37 +00:00
csvfile . close ( )
return file_array
2019-09-13 19:29:22 +00:00
# expects 1d array
2019-09-13 18:59:13 +00:00
@jit ( forceobj = True )
2019-09-13 16:14:13 +00:00
def basic_stats ( data ) :
2019-04-09 14:30:37 +00:00
2019-09-13 16:14:13 +00:00
data_t = np . array ( data ) . astype ( float )
2019-04-09 14:30:37 +00:00
2019-09-13 16:14:13 +00:00
_mean = mean ( data_t )
_median = median ( data_t )
_stdev = stdev ( data_t )
_variance = variance ( data_t )
2019-04-09 14:30:37 +00:00
2019-09-13 16:14:13 +00:00
return _mean , _median , _stdev , _variance
2019-04-09 14:30:37 +00:00
# returns z score with inputs of point, mean and standard deviation of spread
2019-09-13 18:59:13 +00:00
@jit ( forceobj = True )
2019-04-09 14:30:37 +00:00
def z_score ( point , mean , stdev ) :
score = ( point - mean ) / stdev
2019-09-30 21:09:31 +00:00
2019-04-09 14:30:37 +00:00
return score
2019-09-13 16:14:13 +00:00
# expects 2d array, normalizes across all axes
2019-09-13 18:59:13 +00:00
@jit ( forceobj = True )
2019-09-13 16:14:13 +00:00
def z_normalize ( array , * args ) :
2019-04-09 14:30:37 +00:00
2019-09-13 16:14:13 +00:00
array = np . array ( array )
for arg in args :
2019-11-04 16:10:29 +00:00
array = sklearn . preprocessing . normalize ( array , axis = arg )
2019-04-09 14:30:37 +00:00
2019-09-13 16:14:13 +00:00
return array
2019-04-09 14:30:37 +00:00
2019-09-13 18:59:13 +00:00
@jit ( forceobj = True )
2019-09-13 16:14:13 +00:00
# expects 2d array of [x,y]
def histo_analysis ( hist_data ) :
2019-04-09 14:30:37 +00:00
2019-09-13 16:14:13 +00:00
hist_data = np . array ( hist_data )
derivative = np . array ( len ( hist_data ) - 1 , dtype = float )
t = np . diff ( hist_data )
derivative = t [ 1 ] / t [ 0 ]
np . sort ( derivative )
2019-04-09 14:30:37 +00:00
2019-09-16 16:11:27 +00:00
return basic_stats ( derivative ) [ 0 ] , basic_stats ( derivative ) [ 3 ]
2019-09-30 15:11:53 +00:00
@jit ( forceobj = True )
2019-10-10 04:58:08 +00:00
def regression ( device , inputs , outputs , args , loss = torch . nn . MSELoss ( ) , _iterations = 10000 , lr = 0.01 , _iterations_ply = 10000 , lr_ply = 0.01 , power_limit = None ) : # inputs, outputs expects N-D array
if power_limit == None :
2019-10-25 14:19:18 +00:00
power_limit = len ( outputs )
2019-10-10 04:58:08 +00:00
else :
power_limit + = 1
2019-09-30 15:11:53 +00:00
regressions = [ ]
2019-10-25 14:19:18 +00:00
Regression . set_device ( device )
2019-09-30 15:11:53 +00:00
2019-10-25 14:19:18 +00:00
if ' lin ' in args :
2019-09-30 15:11:53 +00:00
2019-10-25 14:19:18 +00:00
model = Regression . SGDTrain ( Regression . LinearRegKernel ( len ( inputs ) ) , torch . tensor ( inputs ) . to ( torch . float ) . to ( device ) , torch . tensor ( [ outputs ] ) . to ( torch . float ) . to ( device ) , iterations = _iterations , learning_rate = lr , return_losses = True )
regressions . append ( ( model [ 0 ] . parameters , model [ 1 ] [ : : - 1 ] [ 0 ] ) )
2019-09-30 15:11:53 +00:00
2019-10-25 14:19:18 +00:00
if ' log ' in args :
2019-09-30 15:11:53 +00:00
2019-10-25 14:19:18 +00:00
model = Regression . SGDTrain ( Regression . LogRegKernel ( len ( inputs ) ) , torch . tensor ( inputs ) . to ( torch . float ) . to ( device ) , torch . tensor ( outputs ) . to ( torch . float ) . to ( device ) , iterations = _iterations , learning_rate = lr , return_losses = True )
regressions . append ( ( model [ 0 ] . parameters , model [ 1 ] [ : : - 1 ] [ 0 ] ) )
2019-09-30 15:11:53 +00:00
2019-10-25 14:19:18 +00:00
if ' exp ' in args :
2019-09-30 15:11:53 +00:00
2019-10-25 14:19:18 +00:00
model = Regression . SGDTrain ( Regression . ExpRegKernel ( len ( inputs ) ) , torch . tensor ( inputs ) . to ( torch . float ) . to ( device ) , torch . tensor ( outputs ) . to ( torch . float ) . to ( device ) , iterations = _iterations , learning_rate = lr , return_losses = True )
regressions . append ( ( model [ 0 ] . parameters , model [ 1 ] [ : : - 1 ] [ 0 ] ) )
2019-09-30 15:11:53 +00:00
2019-10-25 14:19:18 +00:00
if ' ply ' in args :
2019-09-30 15:11:53 +00:00
2019-10-25 14:19:18 +00:00
plys = [ ]
2019-09-30 15:11:53 +00:00
2019-10-25 14:19:18 +00:00
for i in range ( 2 , power_limit ) :
2019-10-10 04:58:08 +00:00
2019-10-25 14:19:18 +00:00
model = Regression . SGDTrain ( Regression . PolyRegKernel ( len ( inputs ) , i ) , torch . tensor ( inputs ) . to ( torch . float ) . to ( device ) , torch . tensor ( outputs ) . to ( torch . float ) . to ( device ) , iterations = _iterations_ply * 10 * * i , learning_rate = lr_ply * 10 * * - i , return_losses = True )
plys . append ( ( model [ 0 ] . parameters , model [ 1 ] [ : : - 1 ] [ 0 ] ) )
regressions . append ( plys )
2019-10-10 04:58:08 +00:00
2019-10-25 14:19:18 +00:00
if ' sig ' in args :
2019-09-30 15:11:53 +00:00
2019-10-25 14:19:18 +00:00
model = Regression . SGDTrain ( Regression . SigmoidalRegKernelArthur ( len ( inputs ) ) , torch . tensor ( inputs ) . to ( torch . float ) . to ( device ) , torch . tensor ( outputs ) . to ( torch . float ) . to ( device ) , iterations = _iterations , learning_rate = lr , return_losses = True )
regressions . append ( ( model [ 0 ] . parameters , model [ 1 ] [ : : - 1 ] [ 0 ] ) )
2019-09-30 18:37:19 +00:00
return regressions
2019-09-16 16:11:27 +00:00
2019-10-03 15:42:05 +00:00
@jit ( nopython = True )
def elo ( starting_score , opposing_scores , observed , N , K ) :
2019-10-03 01:56:06 +00:00
2019-10-03 15:42:05 +00:00
expected = 1 / ( 1 + 10 * * ( ( np . array ( opposing_scores ) - starting_score ) / N ) )
2019-10-03 15:48:56 +00:00
return starting_score + K * ( np . sum ( observed ) - np . sum ( expected ) )
2019-10-03 01:56:06 +00:00
2019-10-04 14:28:25 +00:00
@jit ( forceobj = True )
2019-10-04 14:12:12 +00:00
def gliko2 ( starting_score , starting_rd , starting_vol , opposing_scores , opposing_rd , observations ) :
2019-10-04 05:26:21 +00:00
2019-10-04 15:34:31 +00:00
player = Gliko2 ( rating = starting_score , rd = starting_rd , vol = starting_vol )
2019-10-04 05:26:21 +00:00
2019-10-04 14:12:12 +00:00
player . update_player ( [ x for x in opposing_scores ] , [ x for x in opposing_rd ] , observations )
2019-10-04 05:26:21 +00:00
2019-10-04 13:13:28 +00:00
return ( player . rating , player . rd , player . vol )
2019-10-04 05:26:21 +00:00
2019-10-05 21:18:49 +00:00
@jit ( forceobj = True )
def trueskill ( teams_data , observations ) : #teams_data is array of array of tuples ie. [[(mu, sigma), (mu, sigma), (mu, sigma)], [(mu, sigma), (mu, sigma), (mu, sigma)]]
team_ratings = [ ]
for team in teams_data :
team_temp = [ ]
for player in team :
if player != None :
2019-10-05 21:53:03 +00:00
player = Trueskill . Rating ( player [ 0 ] , player [ 1 ] )
2019-10-05 21:18:49 +00:00
team_temp . append ( player )
else :
2019-10-05 21:53:03 +00:00
player = Trueskill . Rating ( )
2019-10-05 21:18:49 +00:00
team_temp . append ( player )
team_ratings . append ( team_temp )
2019-10-05 21:53:03 +00:00
return Trueskill . rate ( teams_data , observations )
2019-10-05 21:18:49 +00:00
2019-09-16 16:11:27 +00:00
@jit ( forceobj = True )
def r_squared ( predictions , targets ) : # assumes equal size inputs
2019-10-25 14:50:02 +00:00
return sklearn . metrics . r2_score ( np . array ( targets ) , np . array ( predictions ) )
2019-09-16 16:11:27 +00:00
@jit ( forceobj = True )
def mse ( predictions , targets ) :
2019-10-25 14:50:02 +00:00
return sklearn . metrics . mean_squared_error ( np . array ( targets ) , np . array ( predictions ) )
2019-09-16 16:11:27 +00:00
@jit ( forceobj = True )
def rms ( predictions , targets ) :
2019-10-25 14:50:02 +00:00
return math . sqrt ( sklearn . metrics . mean_squared_error ( np . array ( targets ) , np . array ( predictions ) ) )
2019-04-09 14:30:37 +00:00
2019-09-13 19:38:24 +00:00
@jit ( nopython = True )
2019-04-09 14:43:42 +00:00
def mean ( data ) :
return np . mean ( data )
2019-09-13 19:38:24 +00:00
@jit ( nopython = True )
2019-04-09 14:43:42 +00:00
def median ( data ) :
return np . median ( data )
2019-09-13 19:38:24 +00:00
@jit ( nopython = True )
2019-04-09 14:43:42 +00:00
def stdev ( data ) :
return np . std ( data )
2019-09-13 19:38:24 +00:00
@jit ( nopython = True )
2019-04-09 14:43:42 +00:00
def variance ( data ) :
2019-09-30 21:02:32 +00:00
return np . var ( data )
2019-11-08 18:26:42 +00:00
def kmeans ( data , n_clusters = 8 , init = " k-means++ " , n_init = 10 , max_iter = 300 , tol = 0.0001 , precompute_distances = " auto " , verbose = 0 , random_state = None , copy_x = True , n_jobs = None , algorithm = " auto " ) :
2019-10-25 14:50:02 +00:00
2019-11-08 18:26:42 +00:00
kernel = sklearn . cluster . KMeans ( n_clusters = n_clusters , init = init , n_init = n_init , max_iter = max_iter , tol = tol , precompute_distances = precompute_distances , verbose = verbose , random_state = random_state , copy_x = copy_x , n_jobs = n_jobs , algorithm = algorithm )
2019-10-25 14:50:02 +00:00
kernel . fit ( data )
predictions = kernel . predict ( data )
centers = kernel . cluster_centers_
return centers , predictions
2019-11-08 18:26:42 +00:00
def pca ( data , n_components = None , copy = True , whiten = False , svd_solver = " auto " , tol = 0.0 , iterated_power = " auto " , random_state = None ) :
2019-11-08 15:50:54 +00:00
2019-11-08 18:26:42 +00:00
kernel = sklearn . decomposition . PCA ( n_components = n_components , copy = copy , whiten = whiten , svd_solver = svd_solver , tol = tol , iterated_power = iterated_power , random_state = random_state )
2019-10-31 16:03:52 +00:00
return kernel . fit_transform ( data )
2019-11-05 19:14:08 +00:00
def decisiontree ( data , labels , test_size = 0.3 , criterion = " gini " , splitter = " default " , max_depth = None ) : #expects *2d data and 1d labels
2019-11-05 18:47:04 +00:00
data_train , data_test , labels_train , labels_test = sklearn . model_selection . train_test_split ( data , labels , test_size = test_size , random_state = 1 )
2019-11-05 18:53:39 +00:00
model = sklearn . tree . DecisionTreeClassifier ( criterion = criterion , splitter = splitter , max_depth = max_depth )
2019-11-05 18:47:04 +00:00
model = model . fit ( data_train , labels_train )
predictions = model . predict ( data_test )
2019-11-05 19:14:08 +00:00
cm = sklearn . metrics . confusion_matrix ( labels_test , predictions )
2019-11-05 19:38:49 +00:00
cr = sklearn . metrics . classification_report ( labels_test , predictions )
2019-11-05 19:14:08 +00:00
2019-11-05 19:38:49 +00:00
return model , cm , cr
2019-11-05 19:14:08 +00:00
def knn ( data , labels , test_size = 0.3 , algorithm = ' auto ' , leaf_size = 30 , metric = ' minkowski ' , metric_params = None , n_jobs = None , n_neighbors = 5 , p = 2 , weights = ' uniform ' ) : #expects *2d data and 1d labels post-scaling
data_train , data_test , labels_train , labels_test = sklearn . model_selection . train_test_split ( data , labels , test_size = test_size , random_state = 1 )
model = sklearn . neighbors . KNeighborsClassifier ( )
model . fit ( data_train , labels_train )
predictions = model . predict ( data_test )
cm = sklearn . metrics . confusion_matrix ( labels_test , predictions )
cr = sklearn . metrics . classification_report ( labels_test , predictions )
return model , cm , cr
2019-11-05 18:47:04 +00:00
2019-11-05 19:38:49 +00:00
class NaiveBayes :
def guassian ( self , data , labels , test_size = 0.3 , priors = None , var_smoothing = 1e-09 ) :
data_train , data_test , labels_train , labels_test = sklearn . model_selection . train_test_split ( data , labels , test_size = test_size , random_state = 1 )
model = sklearn . naive_bayes . GaussianNB ( priors = priors , var_smoothing = var_smoothing )
model . fit ( data_train , labels_train )
predictions = model . predict ( data_test )
cm = sklearn . metrics . confusion_matrix ( labels_test , predictions )
cr = sklearn . metrics . classification_report ( labels_test , predictions )
return model , cm , cr
def multinomial ( self , data , labels , test_size = 0.3 , alpha = 1.0 , fit_prior = True , class_prior = None ) :
data_train , data_test , labels_train , labels_test = sklearn . model_selection . train_test_split ( data , labels , test_size = test_size , random_state = 1 )
model = sklearn . naive_bayes . MultinomialNB ( alpha = alpha , fit_prior = fit_prior , class_prior = class_prior )
model . fit ( data_train , labels_train )
predictions = model . predict ( data_test )
cm = sklearn . metrics . confusion_matrix ( labels_test , predictions )
cr = sklearn . metrics . classification_report ( labels_test , predictions )
return model , cm , cr
def bernoulli ( self , data , labels , test_size = 0.3 , alpha = 1.0 , binarize = 0.0 , fit_prior = True , class_prior = None ) :
data_train , data_test , labels_train , labels_test = sklearn . model_selection . train_test_split ( data , labels , test_size = test_size , random_state = 1 )
model = sklearn . naive_bayes . BernoulliNB ( alpha = alpha , binarize = binarize , fit_prior = fit_prior , class_prior = class_prior )
model . fit ( data_train , labels_train )
predictions = model . predict ( data_test )
cm = sklearn . metrics . confusion_matrix ( labels_test , predictions )
cr = sklearn . metrics . classification_report ( labels_test , predictions )
return model , cm , cr
def complement ( self , data , labels , test_size = 0.3 , alpha = 1.0 , fit_prior = True , class_prior = None , norm = False ) :
data_train , data_test , labels_train , labels_test = sklearn . model_selection . train_test_split ( data , labels , test_size = test_size , random_state = 1 )
2019-11-06 21:32:21 +00:00
model = sklearn . naive_bayes . ComplementNB ( alpha = alpha , fit_prior = fit_prior , class_prior = class_prior , norm = norm )
2019-11-05 19:38:49 +00:00
model . fit ( data_train , labels_train )
predictions = model . predict ( data_test )
cm = sklearn . metrics . confusion_matrix ( labels_test , predictions )
cr = sklearn . metrics . classification_report ( labels_test , predictions )
return model , cm , cr
2019-11-06 21:26:13 +00:00
class SVM :
class CustomKernel :
2019-11-06 21:32:21 +00:00
def __new__ ( self , C , kernel , degre , gamma , coef0 , shrinking , probability , tol , cache_size , class_weight , verbose , max_iter , decision_function_shape , random_state ) :
2019-11-06 21:26:13 +00:00
2019-11-06 21:32:21 +00:00
return sklearn . svm . SVC ( C = C , kernel = kernel , gamma = gamma , coef0 = coef0 , shrinking = shrinking , probability = probability , tol = tol , cache_size = cache_size , class_weight = class_weight , verbose = verbose , max_iter = max_iter , decision_function_shape = decision_function_shape , random_state = random_state )
2019-11-06 21:26:13 +00:00
class StandardKernel :
2019-11-06 21:32:21 +00:00
def __new__ ( self , kernel , C = 1.0 , degree = 3 , gamma = ' auto_deprecated ' , coef0 = 0.0 , shrinking = True , probability = False , tol = 0.001 , cache_size = 200 , class_weight = None , verbose = False , max_iter = - 1 , decision_function_shape = ' ovr ' , random_state = None ) :
2019-11-06 21:26:13 +00:00
2019-11-06 21:32:21 +00:00
return sklearn . svm . SVC ( C = C , kernel = kernel , gamma = gamma , coef0 = coef0 , shrinking = shrinking , probability = probability , tol = tol , cache_size = cache_size , class_weight = class_weight , verbose = verbose , max_iter = max_iter , decision_function_shape = decision_function_shape , random_state = random_state )
2019-11-06 21:26:13 +00:00
class PrebuiltKernel :
class Linear :
2019-11-06 21:32:21 +00:00
def __new__ ( self ) :
2019-11-06 21:26:13 +00:00
return sklearn . svm . SVC ( kernel = ' linear ' )
class Polynomial :
2019-11-06 21:32:21 +00:00
def __new__ ( self , power , r_bias ) :
2019-11-06 21:26:13 +00:00
return sklearn . svm . SVC ( kernel = ' polynomial ' , degree = power , coef0 = r_bias )
class RBF :
2019-11-06 21:32:21 +00:00
def __new__ ( self , gamma ) :
2019-11-06 21:26:13 +00:00
return sklearn . svm . SVC ( kernel = ' rbf ' , gamma = gamma )
class Sigmoid :
2019-11-06 21:32:21 +00:00
def __new__ ( self , r_bias ) :
2019-11-06 21:26:13 +00:00
return sklearn . svm . SVC ( kernel = ' sigmoid ' , coef0 = r_bias )
def fit ( self , kernel , train_data , train_outputs ) : # expects *2d data, 1d labels or outputs
return kernel . fit ( train_data , train_outputs )
def eval_classification ( self , kernel , test_data , test_outputs ) :
predictions = kernel . predict ( test_data )
2019-11-06 21:32:21 +00:00
cm = sklearn . metrics . confusion_matrix ( predictions , predictions )
cr = sklearn . metrics . classification_report ( predictions , predictions )
2019-11-06 21:26:13 +00:00
return cm , cr
def eval_regression ( self , kernel , test_data , test_outputs ) :
predictions = kernel . predict ( test_data )
r_2 = r_squared ( predictions , test_outputs )
2019-11-06 21:32:21 +00:00
_mse = mse ( predictions , test_outputs )
_rms = rms ( predictions , test_outputs )
2019-11-06 21:26:13 +00:00
2019-11-06 21:32:21 +00:00
return r_2 , _mse , _rms
2019-11-06 21:26:13 +00:00
2019-10-04 15:34:31 +00:00
class Regression :
2019-09-30 21:02:32 +00:00
# Titan Robotics Team 2022: CUDA-based Regressions Module
# Written by Arthur Lu & Jacob Levine
# Notes:
# this module has been automatically inegrated into analysis.py, and should be callable as a class from the package
# this module is cuda-optimized and vectorized (except for one small part)
# setup:
__version__ = " 1.0.0.002 "
# changelog should be viewed using print(analysis.regression.__changelog__)
__changelog__ = """
1.0 .0 .002 :
- Added more parameters to log , exponential , polynomial
- Added SigmoidalRegKernelArthur , because Arthur apparently needs
to train the scaling and shifting of sigmoids
1.0 .0 .001 :
- initial release , with linear , log , exponential , polynomial , and sigmoid kernels
- already vectorized ( except for polynomial generation ) and CUDA - optimized
"""
__author__ = (
" Jacob Levine <jlevine@imsa.edu> " ,
" Arthur Lu <learthurgo@gmail.com> "
)
__all__ = [
' factorial ' ,
' take_all_pwrs ' ,
' num_poly_terms ' ,
' set_device ' ,
' LinearRegKernel ' ,
' SigmoidalRegKernel ' ,
' LogRegKernel ' ,
' PolyRegKernel ' ,
' ExpRegKernel ' ,
' SigmoidalRegKernelArthur ' ,
' SGDTrain ' ,
' CustomTrain '
]
device = " cuda:0 " if torch . torch . cuda . is_available ( ) else " cpu "
#todo: document completely
def set_device ( new_device ) :
global device
device = new_device
class LinearRegKernel ( ) :
parameters = [ ]
weights = None
bias = None
def __init__ ( self , num_vars ) :
self . weights = torch . rand ( num_vars , requires_grad = True , device = device )
self . bias = torch . rand ( 1 , requires_grad = True , device = device )
self . parameters = [ self . weights , self . bias ]
def forward ( self , mtx ) :
long_bias = self . bias . repeat ( [ 1 , mtx . size ( ) [ 1 ] ] )
return torch . matmul ( self . weights , mtx ) + long_bias
class SigmoidalRegKernel ( ) :
parameters = [ ]
weights = None
bias = None
sigmoid = torch . nn . Sigmoid ( )
def __init__ ( self , num_vars ) :
self . weights = torch . rand ( num_vars , requires_grad = True , device = device )
self . bias = torch . rand ( 1 , requires_grad = True , device = device )
self . parameters = [ self . weights , self . bias ]
def forward ( self , mtx ) :
long_bias = self . bias . repeat ( [ 1 , mtx . size ( ) [ 1 ] ] )
return self . sigmoid ( torch . matmul ( self . weights , mtx ) + long_bias )
class SigmoidalRegKernelArthur ( ) :
parameters = [ ]
weights = None
in_bias = None
scal_mult = None
out_bias = None
sigmoid = torch . nn . Sigmoid ( )
def __init__ ( self , num_vars ) :
self . weights = torch . rand ( num_vars , requires_grad = True , device = device )
self . in_bias = torch . rand ( 1 , requires_grad = True , device = device )
self . scal_mult = torch . rand ( 1 , requires_grad = True , device = device )
self . out_bias = torch . rand ( 1 , requires_grad = True , device = device )
self . parameters = [ self . weights , self . in_bias , self . scal_mult , self . out_bias ]
def forward ( self , mtx ) :
long_in_bias = self . in_bias . repeat ( [ 1 , mtx . size ( ) [ 1 ] ] )
long_out_bias = self . out_bias . repeat ( [ 1 , mtx . size ( ) [ 1 ] ] )
return ( self . scal_mult * self . sigmoid ( torch . matmul ( self . weights , mtx ) + long_in_bias ) ) + long_out_bias
class LogRegKernel ( ) :
parameters = [ ]
weights = None
in_bias = None
scal_mult = None
out_bias = None
def __init__ ( self , num_vars ) :
self . weights = torch . rand ( num_vars , requires_grad = True , device = device )
self . in_bias = torch . rand ( 1 , requires_grad = True , device = device )
self . scal_mult = torch . rand ( 1 , requires_grad = True , device = device )
self . out_bias = torch . rand ( 1 , requires_grad = True , device = device )
self . parameters = [ self . weights , self . in_bias , self . scal_mult , self . out_bias ]
def forward ( self , mtx ) :
long_in_bias = self . in_bias . repeat ( [ 1 , mtx . size ( ) [ 1 ] ] )
long_out_bias = self . out_bias . repeat ( [ 1 , mtx . size ( ) [ 1 ] ] )
return ( self . scal_mult * torch . log ( torch . matmul ( self . weights , mtx ) + long_in_bias ) ) + long_out_bias
class ExpRegKernel ( ) :
parameters = [ ]
weights = None
in_bias = None
scal_mult = None
out_bias = None
def __init__ ( self , num_vars ) :
self . weights = torch . rand ( num_vars , requires_grad = True , device = device )
self . in_bias = torch . rand ( 1 , requires_grad = True , device = device )
self . scal_mult = torch . rand ( 1 , requires_grad = True , device = device )
self . out_bias = torch . rand ( 1 , requires_grad = True , device = device )
self . parameters = [ self . weights , self . in_bias , self . scal_mult , self . out_bias ]
def forward ( self , mtx ) :
long_in_bias = self . in_bias . repeat ( [ 1 , mtx . size ( ) [ 1 ] ] )
long_out_bias = self . out_bias . repeat ( [ 1 , mtx . size ( ) [ 1 ] ] )
return ( self . scal_mult * torch . exp ( torch . matmul ( self . weights , mtx ) + long_in_bias ) ) + long_out_bias
class PolyRegKernel ( ) :
parameters = [ ]
weights = None
bias = None
power = None
def __init__ ( self , num_vars , power ) :
self . power = power
2019-10-08 18:49:19 +00:00
num_terms = self . num_poly_terms ( num_vars , power )
2019-09-30 21:02:32 +00:00
self . weights = torch . rand ( num_terms , requires_grad = True , device = device )
self . bias = torch . rand ( 1 , requires_grad = True , device = device )
self . parameters = [ self . weights , self . bias ]
2019-10-08 18:49:19 +00:00
def num_poly_terms ( self , num_vars , power ) :
if power == 0 :
return 0
return int ( self . factorial ( num_vars + power - 1 ) / self . factorial ( power ) / self . factorial ( num_vars - 1 ) ) + self . num_poly_terms ( num_vars , power - 1 )
def factorial ( self , n ) :
if n == 0 :
return 1
else :
return n * self . factorial ( n - 1 )
2019-10-10 03:23:56 +00:00
def take_all_pwrs ( self , vec , pwr ) :
2019-10-08 18:49:19 +00:00
#todo: vectorize (kinda)
combins = torch . combinations ( vec , r = pwr , with_replacement = True )
2019-10-10 03:23:56 +00:00
out = torch . ones ( combins . size ( ) [ 0 ] ) . to ( device ) . to ( torch . float )
for i in torch . t ( combins ) . to ( device ) . to ( torch . float ) :
2019-10-08 18:49:19 +00:00
out * = i
2019-10-10 03:23:56 +00:00
if pwr == 1 :
return out
else :
return torch . cat ( ( out , self . take_all_pwrs ( vec , pwr - 1 ) ) )
2019-09-30 21:02:32 +00:00
def forward ( self , mtx ) :
#TODO: Vectorize the last part
cols = [ ]
for i in torch . t ( mtx ) :
2019-10-08 18:49:19 +00:00
cols . append ( self . take_all_pwrs ( i , self . power ) )
2019-09-30 21:02:32 +00:00
new_mtx = torch . t ( torch . stack ( cols ) )
long_bias = self . bias . repeat ( [ 1 , mtx . size ( ) [ 1 ] ] )
return torch . matmul ( self . weights , new_mtx ) + long_bias
def SGDTrain ( kernel , data , ground , loss = torch . nn . MSELoss ( ) , iterations = 1000 , learning_rate = .1 , return_losses = False ) :
optim = torch . optim . SGD ( kernel . parameters , lr = learning_rate )
data_cuda = data . to ( device )
ground_cuda = ground . to ( device )
if ( return_losses ) :
losses = [ ]
for i in range ( iterations ) :
with torch . set_grad_enabled ( True ) :
optim . zero_grad ( )
pred = kernel . forward ( data_cuda )
ls = loss ( pred , ground_cuda )
losses . append ( ls . item ( ) )
ls . backward ( )
optim . step ( )
return [ kernel , losses ]
else :
for i in range ( iterations ) :
with torch . set_grad_enabled ( True ) :
optim . zero_grad ( )
pred = kernel . forward ( data_cuda )
ls = loss ( pred , ground_cuda )
ls . backward ( )
optim . step ( )
return kernel
def CustomTrain ( kernel , optim , data , ground , loss = torch . nn . MSELoss ( ) , iterations = 1000 , return_losses = False ) :
data_cuda = data . to ( device )
ground_cuda = ground . to ( device )
if ( return_losses ) :
losses = [ ]
for i in range ( iterations ) :
with torch . set_grad_enabled ( True ) :
optim . zero_grad ( )
pred = kernel . forward ( data )
ls = loss ( pred , ground )
losses . append ( ls . item ( ) )
ls . backward ( )
optim . step ( )
return [ kernel , losses ]
else :
for i in range ( iterations ) :
with torch . set_grad_enabled ( True ) :
optim . zero_grad ( )
pred = kernel . forward ( data_cuda )
ls = loss ( pred , ground_cuda )
ls . backward ( )
optim . step ( )
2019-10-04 05:26:21 +00:00
return kernel
2019-10-04 15:34:31 +00:00
class Gliko2 :
2019-10-04 05:26:21 +00:00
_tau = 0.5
def getRating ( self ) :
return ( self . __rating * 173.7178 ) + 1500
def setRating ( self , rating ) :
self . __rating = ( rating - 1500 ) / 173.7178
rating = property ( getRating , setRating )
def getRd ( self ) :
return self . __rd * 173.7178
def setRd ( self , rd ) :
self . __rd = rd / 173.7178
rd = property ( getRd , setRd )
def __init__ ( self , rating = 1500 , rd = 350 , vol = 0.06 ) :
self . setRating ( rating )
self . setRd ( rd )
self . vol = vol
def _preRatingRD ( self ) :
self . __rd = math . sqrt ( math . pow ( self . __rd , 2 ) + math . pow ( self . vol , 2 ) )
def update_player ( self , rating_list , RD_list , outcome_list ) :
rating_list = [ ( x - 1500 ) / 173.7178 for x in rating_list ]
RD_list = [ x / 173.7178 for x in RD_list ]
v = self . _v ( rating_list , RD_list )
self . vol = self . _newVol ( rating_list , RD_list , outcome_list , v )
self . _preRatingRD ( )
self . __rd = 1 / math . sqrt ( ( 1 / math . pow ( self . __rd , 2 ) ) + ( 1 / v ) )
tempSum = 0
for i in range ( len ( rating_list ) ) :
tempSum + = self . _g ( RD_list [ i ] ) * \
( outcome_list [ i ] - self . _E ( rating_list [ i ] , RD_list [ i ] ) )
self . __rating + = math . pow ( self . __rd , 2 ) * tempSum
def _newVol ( self , rating_list , RD_list , outcome_list , v ) :
i = 0
delta = self . _delta ( rating_list , RD_list , outcome_list , v )
a = math . log ( math . pow ( self . vol , 2 ) )
tau = self . _tau
x0 = a
x1 = 0
while x0 != x1 :
# New iteration, so x(i) becomes x(i-1)
x0 = x1
d = math . pow ( self . __rating , 2 ) + v + math . exp ( x0 )
h1 = - ( x0 - a ) / math . pow ( tau , 2 ) - 0.5 * math . exp ( x0 ) \
/ d + 0.5 * math . exp ( x0 ) * math . pow ( delta / d , 2 )
h2 = - 1 / math . pow ( tau , 2 ) - 0.5 * math . exp ( x0 ) * \
( math . pow ( self . __rating , 2 ) + v ) \
/ math . pow ( d , 2 ) + 0.5 * math . pow ( delta , 2 ) * math . exp ( x0 ) \
* ( math . pow ( self . __rating , 2 ) + v - math . exp ( x0 ) ) / math . pow ( d , 3 )
x1 = x0 - ( h1 / h2 )
return math . exp ( x1 / 2 )
def _delta ( self , rating_list , RD_list , outcome_list , v ) :
tempSum = 0
for i in range ( len ( rating_list ) ) :
tempSum + = self . _g ( RD_list [ i ] ) * ( outcome_list [ i ] - self . _E ( rating_list [ i ] , RD_list [ i ] ) )
return v * tempSum
def _v ( self , rating_list , RD_list ) :
tempSum = 0
for i in range ( len ( rating_list ) ) :
tempE = self . _E ( rating_list [ i ] , RD_list [ i ] )
tempSum + = math . pow ( self . _g ( RD_list [ i ] ) , 2 ) * tempE * ( 1 - tempE )
return 1 / tempSum
def _E ( self , p2rating , p2RD ) :
return 1 / ( 1 + math . exp ( - 1 * self . _g ( p2RD ) * \
( self . __rating - p2rating ) ) )
def _g ( self , RD ) :
return 1 / math . sqrt ( 1 + 3 * math . pow ( RD , 2 ) / math . pow ( math . pi , 2 ) )
def did_not_compete ( self ) :
2019-11-01 18:12:01 +00:00
self . _preRatingRD ( )