tra-analysis/data analysis/superscript.py

381 lines
11 KiB
Python
Raw Normal View History

2020-02-18 17:31:20 +00:00
# Titan Robotics Team 2022: Superscript Script
# Written by Arthur Lu & Jacob Levine
# Notes:
# setup:
2020-03-06 20:44:13 +00:00
__version__ = "0.0.4.001"
2020-02-18 17:31:20 +00:00
# changelog should be viewed using print(analysis.__changelog__)
__changelog__ = """changelog:
2020-03-06 20:44:13 +00:00
0.0.4.001:
- fixed bug where X range for regression was determined before sanitization
- better sanitized data
2020-03-06 17:50:07 +00:00
0.0.4.000:
- fixed spelling issue in __changelog__
- addressed nan bug in regression
- fixed errors on line 335 with metrics calling incorrect key "glicko2"
- fixed errors in metrics computing
0.0.3.000:
2020-03-06 04:52:02 +00:00
- added analysis to pit data
2020-03-05 05:59:50 +00:00
0.0.2.001:
- minor stability patches
- implemented db syncing for timestamps
- fixed bugs
0.0.2.000:
- finalized testing and small fixes
2020-03-05 02:12:09 +00:00
0.0.1.004:
- finished metrics implement, trueskill is bugged
2020-03-04 22:53:25 +00:00
0.0.1.003:
- working
2020-03-04 21:57:20 +00:00
0.0.1.002:
- started implement of metrics
2020-03-04 02:10:29 +00:00
0.0.1.001:
- cleaned up imports
2020-03-04 01:39:58 +00:00
0.0.1.000:
- tested working, can push to database
0.0.0.009:
- tested working
- prints out stats for the time being, will push to database later
0.0.0.008:
- added data import
- removed tba import
- finished main method
2020-03-03 22:01:07 +00:00
0.0.0.007:
- added load_config
- optimized simpleloop for readibility
- added __all__ entries
- added simplestats engine
- pending testing
2020-03-03 21:42:37 +00:00
0.0.0.006:
- fixes
2020-02-20 01:51:45 +00:00
0.0.0.005:
- imported pickle
- created custom database object
2020-02-20 01:21:48 +00:00
0.0.0.004:
- fixed simpleloop to actually return a vector
0.0.0.003:
- added metricsloop which is unfinished
2020-02-19 01:54:09 +00:00
0.0.0.002:
- added simpleloop which is untested until data is provided
2020-02-18 17:31:20 +00:00
0.0.0.001:
- created script
- added analysis, numba, numpy imports
"""
__author__ = (
"Arthur Lu <learthurgo@gmail.com>",
"Jacob Levine <jlevine@imsa.edu>",
)
__all__ = [
2020-03-03 22:01:07 +00:00
"main",
"load_config",
"simpleloop",
"simplestats",
"metricsloop"
2020-02-18 17:31:20 +00:00
]
# imports:
from analysis import analysis as an
import data as d
2020-03-06 20:44:13 +00:00
import numpy as np
2020-03-06 04:52:02 +00:00
import matplotlib.pyplot as plt
2020-03-04 19:42:54 +00:00
import time
import warnings
2020-02-18 21:25:23 +00:00
def main():
warnings.filterwarnings("ignore")
2020-03-03 22:01:07 +00:00
while(True):
2020-03-05 05:59:50 +00:00
2020-03-04 19:42:54 +00:00
current_time = time.time()
2020-03-04 22:53:25 +00:00
print("time: " + str(current_time))
2020-03-04 19:47:56 +00:00
2020-03-05 02:12:09 +00:00
print(" loading config")
competition, config = load_config("config.csv")
2020-03-05 02:12:09 +00:00
print(" config loaded")
2020-03-04 19:47:56 +00:00
2020-03-05 02:12:09 +00:00
print(" loading database keys")
apikey = an.load_csv("keys.txt")[0][0]
2020-03-04 21:57:20 +00:00
tbakey = an.load_csv("keys.txt")[1][0]
2020-03-05 02:12:09 +00:00
print(" loaded keys")
2020-03-04 19:47:56 +00:00
2020-03-05 05:59:50 +00:00
previous_time = d.get_analysis_flags(apikey, "latest_update")
if(previous_time == None):
d.set_analysis_flags(apikey, "latest_update", 0)
previous_time = 0
else:
previous_time = previous_time["latest_update"]
print(" analysis backtimed to: " + str(previous_time))
2020-03-05 02:12:09 +00:00
print(" loading data")
2020-03-06 04:52:02 +00:00
data = d.get_match_data_formatted(apikey, competition)
pit_data = d.pit = d.get_pit_data_formatted(apikey, competition)
2020-03-05 02:12:09 +00:00
print(" loaded data")
2020-03-04 19:47:56 +00:00
2020-03-05 02:12:09 +00:00
print(" running tests")
results = simpleloop(data, config)
2020-03-05 02:12:09 +00:00
print(" finished tests")
2020-03-04 21:57:20 +00:00
2020-03-05 02:12:09 +00:00
print(" running metrics")
2020-03-06 17:27:32 +00:00
metricsloop(tbakey, apikey, competition, previous_time)
2020-03-05 02:12:09 +00:00
print(" finished metrics")
2020-03-05 05:59:50 +00:00
2020-03-06 04:52:02 +00:00
print(" running pit analysis")
pit = pitloop(pit_data, config)
print(" finished pit analysis")
2020-03-05 05:59:50 +00:00
d.set_analysis_flags(apikey, "latest_update", {"latest_update":current_time})
2020-03-04 19:47:56 +00:00
2020-03-05 02:12:09 +00:00
print(" pushing to database")
2020-03-06 17:50:07 +00:00
push_to_database(apikey, competition, results, pit)
2020-03-05 02:12:09 +00:00
print(" pushed to database")
2020-03-03 22:01:07 +00:00
def load_config(file):
config_vector = {}
file = an.load_csv(file)
2020-03-04 00:13:03 +00:00
for line in file[1:]:
config_vector[line[0]] = line[1:]
2020-03-03 22:01:07 +00:00
return (file[0][0], config_vector)
2020-02-18 21:25:23 +00:00
2020-02-19 01:54:09 +00:00
def simpleloop(data, tests): # expects 3D array with [Team][Variable][Match]
2020-03-05 02:12:09 +00:00
2020-03-03 21:42:37 +00:00
return_vector = {}
2020-02-20 01:53:23 +00:00
for team in data:
2020-03-03 21:42:37 +00:00
variable_vector = {}
for variable in data[team]:
test_vector = {}
variable_data = data[team][variable]
2020-03-04 00:13:03 +00:00
if(variable in tests):
for test in tests[variable]:
2020-03-06 20:44:13 +00:00
print(team)
print(variable)
2020-03-04 00:13:03 +00:00
test_vector[test] = simplestats(variable_data, test)
else:
pass
2020-03-03 21:42:37 +00:00
variable_vector[variable] = test_vector
return_vector[team] = variable_vector
2020-02-19 01:54:09 +00:00
2020-03-03 21:42:37 +00:00
return return_vector
2020-02-19 01:54:09 +00:00
2020-03-03 21:42:37 +00:00
def simplestats(data, test):
2020-02-19 01:54:09 +00:00
2020-03-06 20:44:13 +00:00
data = np.array(data)
data = data[np.isfinite(data)]
ranges = list(range(len(data)))
2020-03-04 00:13:03 +00:00
if(test == "basic_stats"):
2020-03-03 21:42:37 +00:00
return an.basic_stats(data)
2020-02-19 01:54:09 +00:00
2020-03-03 22:01:07 +00:00
if(test == "historical_analysis"):
2020-03-06 20:44:13 +00:00
return an.histo_analysis([ranges, data])
2020-03-03 22:01:07 +00:00
if(test == "regression_linear"):
2020-03-06 20:44:13 +00:00
return an.regression(ranges, data, ['lin'])
2020-03-03 22:01:07 +00:00
if(test == "regression_logarithmic"):
2020-03-06 20:44:13 +00:00
return an.regression(ranges, data, ['log'])
2020-03-03 22:01:07 +00:00
if(test == "regression_exponential"):
2020-03-06 20:44:13 +00:00
return an.regression(ranges, data, ['exp'])
2020-03-03 22:01:07 +00:00
if(test == "regression_polynomial"):
2020-03-06 20:44:13 +00:00
return an.regression(ranges, data, ['ply'])
2020-03-03 22:01:07 +00:00
if(test == "regression_sigmoidal"):
2020-03-06 20:44:13 +00:00
return an.regression(ranges, data, ['sig'])
2020-02-20 01:21:48 +00:00
2020-03-06 17:27:32 +00:00
def push_to_database(apikey, competition, results, pit):
2020-03-04 01:39:58 +00:00
for team in results:
2020-03-04 19:42:54 +00:00
d.push_team_tests_data(apikey, competition, team, results[team])
2020-03-04 01:39:58 +00:00
2020-03-06 04:52:02 +00:00
for variable in pit:
d.push_team_pit_data(apikey, competition, variable, pit[variable])
2020-03-04 22:53:25 +00:00
def metricsloop(tbakey, apikey, competition, timestamp): # listener based metrics update
elo_N = 400
elo_K = 24
2020-03-04 22:53:25 +00:00
matches = d.pull_new_tba_matches(tbakey, competition, timestamp)
return_vector = {}
2020-03-04 22:53:25 +00:00
2020-03-05 02:12:09 +00:00
red = {}
blu = {}
for match in matches:
2020-03-06 17:27:32 +00:00
red = load_metrics(apikey, competition, match, "red")
blu = load_metrics(apikey, competition, match, "blue")
elo_red_total = 0
elo_blu_total = 0
2020-03-04 23:54:30 +00:00
gl2_red_score_total = 0
gl2_blu_score_total = 0
2020-03-04 22:53:25 +00:00
gl2_red_rd_total = 0
gl2_blu_rd_total = 0
2020-03-04 22:53:25 +00:00
gl2_red_vol_total = 0
gl2_blu_vol_total = 0
2020-03-04 22:53:25 +00:00
for team in red:
2020-03-04 22:53:25 +00:00
elo_red_total += red[team]["elo"]["score"]
2020-03-04 22:53:25 +00:00
gl2_red_score_total += red[team]["gl2"]["score"]
gl2_red_rd_total += red[team]["gl2"]["rd"]
gl2_red_vol_total += red[team]["gl2"]["vol"]
2020-03-04 22:53:25 +00:00
for team in blu:
2020-03-04 22:53:25 +00:00
elo_blu_total += blu[team]["elo"]["score"]
2020-03-04 22:53:25 +00:00
gl2_blu_score_total += blu[team]["gl2"]["score"]
gl2_blu_rd_total += blu[team]["gl2"]["rd"]
gl2_blu_vol_total += blu[team]["gl2"]["vol"]
2020-03-04 22:53:25 +00:00
red_elo = {"score": elo_red_total / len(red)}
blu_elo = {"score": elo_blu_total / len(blu)}
2020-03-04 22:53:25 +00:00
red_gl2 = {"score": gl2_red_score_total / len(red), "rd": gl2_red_rd_total / len(red), "vol": gl2_red_vol_total / len(red)}
blu_gl2 = {"score": gl2_blu_score_total / len(blu), "rd": gl2_blu_rd_total / len(blu), "vol": gl2_blu_vol_total / len(blu)}
if(match["winner"] == "red"):
2020-03-05 02:12:09 +00:00
observations = {"red": 1, "blu": 0}
elif(match["winner"] == "blue"):
2020-03-05 02:12:09 +00:00
observations = {"red": 0, "blu": 1}
else:
2020-03-05 02:12:09 +00:00
observations = {"red": 0.5, "blu": 0.5}
2020-03-05 02:12:09 +00:00
red_elo_delta = an.elo(red_elo["score"], blu_elo["score"], observations["red"], elo_N, elo_K) - red_elo["score"]
blu_elo_delta = an.elo(blu_elo["score"], red_elo["score"], observations["blu"], elo_N, elo_K) - blu_elo["score"]
2020-03-05 02:12:09 +00:00
new_red_gl2_score, new_red_gl2_rd, new_red_gl2_vol = an.glicko2(red_gl2["score"], red_gl2["rd"], red_gl2["vol"], [blu_gl2["score"]], [blu_gl2["rd"]], [observations["red"], observations["blu"]])
new_blu_gl2_score, new_blu_gl2_rd, new_blu_gl2_vol = an.glicko2(blu_gl2["score"], blu_gl2["rd"], blu_gl2["vol"], [red_gl2["score"]], [red_gl2["rd"]], [observations["blu"], observations["red"]])
red_gl2_delta = {"score": new_red_gl2_score - red_gl2["score"], "rd": new_red_gl2_rd - red_gl2["rd"], "vol": new_red_gl2_vol - red_gl2["vol"]}
blu_gl2_delta = {"score": new_blu_gl2_score - blu_gl2["score"], "rd": new_blu_gl2_rd - blu_gl2["rd"], "vol": new_blu_gl2_vol - blu_gl2["vol"]}
for team in red:
red[team]["elo"]["score"] = red[team]["elo"]["score"] + red_elo_delta
red[team]["gl2"]["score"] = red[team]["gl2"]["score"] + red_gl2_delta["score"]
red[team]["gl2"]["rd"] = red[team]["gl2"]["rd"] + red_gl2_delta["rd"]
red[team]["gl2"]["vol"] = red[team]["gl2"]["vol"] + red_gl2_delta["vol"]
for team in blu:
blu[team]["elo"]["score"] = blu[team]["elo"]["score"] + blu_elo_delta
blu[team]["gl2"]["score"] = blu[team]["gl2"]["score"] + blu_gl2_delta["score"]
blu[team]["gl2"]["rd"] = blu[team]["gl2"]["rd"] + blu_gl2_delta["rd"]
blu[team]["gl2"]["vol"] = blu[team]["gl2"]["vol"] + blu_gl2_delta["vol"]
2020-03-06 17:27:32 +00:00
temp_vector = {}
temp_vector.update(red)
temp_vector.update(blu)
for team in temp_vector:
d.push_team_metrics_data(apikey, competition, team, temp_vector[team])
2020-03-05 02:12:09 +00:00
""" not functional for now
red_trueskill = []
blu_trueskill = []
red_ts_team_lookup = []
blu_ts_team_lookup = []
for team in red:
red_trueskill.append((red[team]["ts"]["mu"], red[team]["ts"]["sigma"]))
red_ts_team_lookup.append(team)
for team in blu:
blu_trueskill.append((blu[team]["ts"]["mu"], blu[team]["ts"]["sigma"]))
blu_ts_team_lookup.append(team)
print(red_trueskill)
print(blu_trueskill)
results = an.trueskill([red_trueskill, blu_trueskill], [observations["red"], observations["blu"]])
print(results)
"""
def load_metrics(apikey, competition, match, group_name):
2020-03-05 02:12:09 +00:00
group = {}
2020-03-05 02:12:09 +00:00
for team in match[group_name]:
db_data = d.get_team_metrics_data(apikey, competition, team)
if d.get_team_metrics_data(apikey, competition, team) == None:
elo = {"score": 1500}
gl2 = {"score": 1500, "rd": 250, "vol": 0.06}
ts = {"mu": 25, "sigma": 25/3}
2020-03-06 16:57:39 +00:00
#d.push_team_metrics_data(apikey, competition, team, {"elo":elo, "gl2":gl2,"trueskill":ts})
group[team] = {"elo": elo, "gl2": gl2, "ts": ts}
else:
2020-03-04 22:53:25 +00:00
metrics = db_data["metrics"]
2020-03-06 16:43:45 +00:00
elo = metrics["elo"]
2020-03-06 16:25:20 +00:00
gl2 = metrics["gl2"]
2020-03-06 17:14:10 +00:00
ts = metrics["ts"]
2020-03-04 22:53:25 +00:00
group[team] = {"elo": elo, "gl2": gl2, "ts": ts}
2020-03-04 22:53:25 +00:00
return group
2020-03-06 04:52:02 +00:00
def pitloop(pit, tests):
return_vector = {}
for team in pit:
for variable in pit[team]:
if(variable in tests):
if(not variable in return_vector):
return_vector[variable] = []
return_vector[variable].append(pit[team][variable])
return return_vector
2020-03-05 02:12:09 +00:00
main()
2020-03-04 22:53:25 +00:00
"""
Metrics Defaults:
2020-02-20 01:51:45 +00:00
2020-03-04 22:53:25 +00:00
elo starting score = 1500
2020-03-04 23:54:30 +00:00
elo N = 400
elo K = 24
2020-02-20 01:51:45 +00:00
2020-03-04 22:53:25 +00:00
gl2 starting score = 1500
gl2 starting rd = 350
gl2 starting vol = 0.06
"""