changed data analysis folder to data-analysis

added egg-info and build folders to git ignore
deleted egg-info and build folders
This commit is contained in:
ltcptgeneral 2020-05-12 20:54:19 -05:00
commit 48c69a48e0
10 changed files with 619 additions and 0 deletions

Binary file not shown.

View File

@ -0,0 +1 @@
2020ilch

0
config/database.config Normal file
View File

2
config/keys.config Normal file
View File

@ -0,0 +1,2 @@
mongodb+srv://api-user-new:titanscout2022@2022-scouting-4vfuu.mongodb.net/test?authSource=admin&replicaSet=2022-scouting-shard-0&readPreference=primary&appname=MongoDB%20Compass&ssl=true
UDvKmPjPRfwwUdDX1JxbmkyecYBJhCtXeyVk9vmO2i7K0Zn4wqQPMfzuEINXJ7e5

14
config/stats.config Normal file
View File

@ -0,0 +1,14 @@
balls-blocked,basic_stats,historical_analysis,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
balls-collected,basic_stats,historical_analysis,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
balls-lower-teleop,basic_stats,historical_analysis,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
balls-lower-auto,basic_stats,historical_analysis,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
balls-started,basic_stats,historical_analyss,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
balls-upper-teleop,basic_stats,historical_analysis,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
balls-upper-auto,basic_stats,historical_analysis,regression_linear,regression_logarithmic,regression_exponential,regression_polynomial,regression_sigmoidal
wheel-mechanism
low-balls
high-balls
wheel-success
strategic-focus
climb-mechanism
attitude

102
data.py Normal file
View File

@ -0,0 +1,102 @@
import requests
import pymongo
import pandas as pd
import time
def pull_new_tba_matches(apikey, competition, cutoff):
api_key= apikey
x=requests.get("https://www.thebluealliance.com/api/v3/event/"+competition+"/matches/simple", headers={"X-TBA-Auth_Key":api_key})
out = []
for i in x.json():
if (i["actual_time"] != None and i["actual_time"]-cutoff >= 0 and i["comp_level"] == "qm"):
out.append({"match" : i['match_number'], "blue" : list(map(lambda x: int(x[3:]), i['alliances']['blue']['team_keys'])), "red" : list(map(lambda x: int(x[3:]), i['alliances']['red']['team_keys'])), "winner": i["winning_alliance"]})
return out
def get_team_match_data(apikey, competition, team_num):
client = pymongo.MongoClient(apikey)
db = client.data_scouting
mdata = db.matchdata
out = {}
for i in mdata.find({"competition" : competition, "team_scouted": team_num}):
out[i['match']] = i['data']
return pd.DataFrame(out)
def get_team_pit_data(apikey, competition, team_num):
client = pymongo.MongoClient(apikey)
db = client.data_scouting
mdata = db.pitdata
out = {}
return mdata.find_one({"competition" : competition, "team_scouted": team_num})["data"]
def get_team_metrics_data(apikey, competition, team_num):
client = pymongo.MongoClient(apikey)
db = client.data_processing
mdata = db.team_metrics
return mdata.find_one({"competition" : competition, "team": team_num})
def unkeyify_2l(layered_dict):
out = {}
for i in layered_dict.keys():
add = []
sortkey = []
for j in layered_dict[i].keys():
add.append([j,layered_dict[i][j]])
add.sort(key = lambda x: x[0])
out[i] = list(map(lambda x: x[1], add))
return out
def get_match_data_formatted(apikey, competition):
client = pymongo.MongoClient(apikey)
db = client.data_scouting
mdata = db.teamlist
x=mdata.find_one({"competition":competition})
out = {}
for i in x:
try:
out[int(i)] = unkeyify_2l(get_team_match_data(apikey, competition, int(i)).transpose().to_dict())
except:
pass
return out
def get_pit_data_formatted(apikey, competition):
client = pymongo.MongoClient(apikey)
db = client.data_scouting
mdata = db.teamlist
x=mdata.find_one({"competition":competition})
out = {}
for i in x:
try:
out[int(i)] = get_team_pit_data(apikey, competition, int(i))
except:
pass
return out
def push_team_tests_data(apikey, competition, team_num, data, dbname = "data_processing", colname = "team_tests"):
client = pymongo.MongoClient(apikey)
db = client[dbname]
mdata = db[colname]
mdata.replace_one({"competition" : competition, "team": team_num}, {"_id": competition+str(team_num)+"am", "competition" : competition, "team" : team_num, "data" : data}, True)
def push_team_metrics_data(apikey, competition, team_num, data, dbname = "data_processing", colname = "team_metrics"):
client = pymongo.MongoClient(apikey)
db = client[dbname]
mdata = db[colname]
mdata.replace_one({"competition" : competition, "team": team_num}, {"_id": competition+str(team_num)+"am", "competition" : competition, "team" : team_num, "metrics" : data}, True)
def push_team_pit_data(apikey, competition, variable, data, dbname = "data_processing", colname = "team_pit"):
client = pymongo.MongoClient(apikey)
db = client[dbname]
mdata = db[colname]
mdata.replace_one({"competition" : competition, "variable": variable}, {"competition" : competition, "variable" : variable, "data" : data}, True)
def get_analysis_flags(apikey, flag):
client = pymongo.MongoClient(apikey)
db = client.data_processing
mdata = db.flags
return mdata.find_one({flag:{"$exists":True}})
def set_analysis_flags(apikey, flag, data):
client = pymongo.MongoClient(apikey)
db = client.data_processing
mdata = db.flags
return mdata.replace_one({flag:{"$exists":True}}, data, True)

59
get_team_rankings.py Normal file
View File

@ -0,0 +1,59 @@
import data as d
from analysis import analysis as an
import pymongo
import operator
def load_config(file):
config_vector = {}
file = an.load_csv(file)
for line in file[1:]:
config_vector[line[0]] = line[1:]
return (file[0][0], config_vector)
def get_metrics_processed_formatted(apikey, competition):
client = pymongo.MongoClient(apikey)
db = client.data_scouting
mdata = db.teamlist
x=mdata.find_one({"competition":competition})
out = {}
for i in x:
try:
out[int(i)] = d.get_team_metrics_data(apikey, competition, int(i))
except:
pass
return out
def main():
apikey = an.load_csv("keys.txt")[0][0]
tbakey = an.load_csv("keys.txt")[1][0]
competition, config = load_config("config.csv")
metrics = get_metrics_processed_formatted(apikey, competition)
elo = {}
gl2 = {}
for team in metrics:
elo[team] = metrics[team]["metrics"]["elo"]["score"]
gl2[team] = metrics[team]["metrics"]["gl2"]["score"]
elo = {k: v for k, v in sorted(elo.items(), key=lambda item: item[1])}
gl2 = {k: v for k, v in sorted(gl2.items(), key=lambda item: item[1])}
for team in elo:
print("teams sorted by elo:")
print("" + str(team) + " | " + str(elo[team]))
print("*"*25)
for team in gl2:
print("teams sorted by glicko2:")
print("" + str(team) + " | " + str(gl2[team]))
main()

4
requirements.txt Normal file
View File

@ -0,0 +1,4 @@
requests
pymongo
pandas
dnspython

378
superscript.py Normal file
View File

@ -0,0 +1,378 @@
# Titan Robotics Team 2022: Superscript Script
# Written by Arthur Lu & Jacob Levine
# Notes:
# setup:
__version__ = "0.0.5.002"
# changelog should be viewed using print(analysis.__changelog__)
__changelog__ = """changelog:
0.0.5.002:
- made changes due to refactoring of analysis
0.0.5.001:
- text fixes
- removed matplotlib requirement
0.0.5.000:
- improved user interface
0.0.4.002:
- removed unessasary code
0.0.4.001:
- fixed bug where X range for regression was determined before sanitization
- better sanitized data
0.0.4.000:
- fixed spelling issue in __changelog__
- addressed nan bug in regression
- fixed errors on line 335 with metrics calling incorrect key "glicko2"
- fixed errors in metrics computing
0.0.3.000:
- added analysis to pit data
0.0.2.001:
- minor stability patches
- implemented db syncing for timestamps
- fixed bugs
0.0.2.000:
- finalized testing and small fixes
0.0.1.004:
- finished metrics implement, trueskill is bugged
0.0.1.003:
- working
0.0.1.002:
- started implement of metrics
0.0.1.001:
- cleaned up imports
0.0.1.000:
- tested working, can push to database
0.0.0.009:
- tested working
- prints out stats for the time being, will push to database later
0.0.0.008:
- added data import
- removed tba import
- finished main method
0.0.0.007:
- added load_config
- optimized simpleloop for readibility
- added __all__ entries
- added simplestats engine
- pending testing
0.0.0.006:
- fixes
0.0.0.005:
- imported pickle
- created custom database object
0.0.0.004:
- fixed simpleloop to actually return a vector
0.0.0.003:
- added metricsloop which is unfinished
0.0.0.002:
- added simpleloop which is untested until data is provided
0.0.0.001:
- created script
- added analysis, numba, numpy imports
"""
__author__ = (
"Arthur Lu <learthurgo@gmail.com>",
"Jacob Levine <jlevine@imsa.edu>",
)
__all__ = [
"main",
"load_config",
"simpleloop",
"simplestats",
"metricsloop"
]
# imports:
from analysis import analysis as an
import data as d
import numpy as np
from os import system, name
from pathlib import Path
import time
import warnings
def main():
warnings.filterwarnings("ignore")
while(True):
current_time = time.time()
print("[OK] time: " + str(current_time))
start = time.time()
config = load_config(Path("config/stats.config"))
competition = an.load_csv(Path("config/competition.config"))[0][0]
print("[OK] configs loaded")
apikey = an.load_csv(Path("config/keys.config"))[0][0]
tbakey = an.load_csv(Path("config/keys.config"))[1][0]
print("[OK] loaded keys")
previous_time = d.get_analysis_flags(apikey, "latest_update")
if(previous_time == None):
d.set_analysis_flags(apikey, "latest_update", 0)
previous_time = 0
else:
previous_time = previous_time["latest_update"]
print("[OK] analysis backtimed to: " + str(previous_time))
print("[OK] loading data")
start = time.time()
data = d.get_match_data_formatted(apikey, competition)
pit_data = d.pit = d.get_pit_data_formatted(apikey, competition)
print("[OK] loaded data in " + str(time.time() - start) + " seconds")
print("[OK] running tests")
start = time.time()
results = simpleloop(data, config)
print("[OK] finished tests in " + str(time.time() - start) + " seconds")
print("[OK] running metrics")
start = time.time()
metricsloop(tbakey, apikey, competition, previous_time)
print("[OK] finished metrics in " + str(time.time() - start) + " seconds")
print("[OK] running pit analysis")
start = time.time()
pit = pitloop(pit_data, config)
print("[OK] finished pit analysis in " + str(time.time() - start) + " seconds")
d.set_analysis_flags(apikey, "latest_update", {"latest_update":current_time})
print("[OK] pushing to database")
start = time.time()
push_to_database(apikey, competition, results, pit)
print("[OK] pushed to database in " + str(time.time() - start) + " seconds")
clear()
def clear():
# for windows
if name == 'nt':
_ = system('cls')
# for mac and linux(here, os.name is 'posix')
else:
_ = system('clear')
def load_config(file):
config_vector = {}
file = an.load_csv(file)
for line in file:
config_vector[line[0]] = line[1:]
return config_vector
def simpleloop(data, tests): # expects 3D array with [Team][Variable][Match]
return_vector = {}
for team in data:
variable_vector = {}
for variable in data[team]:
test_vector = {}
variable_data = data[team][variable]
if(variable in tests):
for test in tests[variable]:
test_vector[test] = simplestats(variable_data, test)
else:
pass
variable_vector[variable] = test_vector
return_vector[team] = variable_vector
return return_vector
def simplestats(data, test):
data = np.array(data)
data = data[np.isfinite(data)]
ranges = list(range(len(data)))
if(test == "basic_stats"):
return an.basic_stats(data)
if(test == "historical_analysis"):
return an.histo_analysis([ranges, data])
if(test == "regression_linear"):
return an.regression(ranges, data, ['lin'])
if(test == "regression_logarithmic"):
return an.regression(ranges, data, ['log'])
if(test == "regression_exponential"):
return an.regression(ranges, data, ['exp'])
if(test == "regression_polynomial"):
return an.regression(ranges, data, ['ply'])
if(test == "regression_sigmoidal"):
return an.regression(ranges, data, ['sig'])
def push_to_database(apikey, competition, results, pit):
for team in results:
d.push_team_tests_data(apikey, competition, team, results[team])
for variable in pit:
d.push_team_pit_data(apikey, competition, variable, pit[variable])
def metricsloop(tbakey, apikey, competition, timestamp): # listener based metrics update
elo_N = 400
elo_K = 24
matches = d.pull_new_tba_matches(tbakey, competition, timestamp)
red = {}
blu = {}
for match in matches:
red = load_metrics(apikey, competition, match, "red")
blu = load_metrics(apikey, competition, match, "blue")
elo_red_total = 0
elo_blu_total = 0
gl2_red_score_total = 0
gl2_blu_score_total = 0
gl2_red_rd_total = 0
gl2_blu_rd_total = 0
gl2_red_vol_total = 0
gl2_blu_vol_total = 0
for team in red:
elo_red_total += red[team]["elo"]["score"]
gl2_red_score_total += red[team]["gl2"]["score"]
gl2_red_rd_total += red[team]["gl2"]["rd"]
gl2_red_vol_total += red[team]["gl2"]["vol"]
for team in blu:
elo_blu_total += blu[team]["elo"]["score"]
gl2_blu_score_total += blu[team]["gl2"]["score"]
gl2_blu_rd_total += blu[team]["gl2"]["rd"]
gl2_blu_vol_total += blu[team]["gl2"]["vol"]
red_elo = {"score": elo_red_total / len(red)}
blu_elo = {"score": elo_blu_total / len(blu)}
red_gl2 = {"score": gl2_red_score_total / len(red), "rd": gl2_red_rd_total / len(red), "vol": gl2_red_vol_total / len(red)}
blu_gl2 = {"score": gl2_blu_score_total / len(blu), "rd": gl2_blu_rd_total / len(blu), "vol": gl2_blu_vol_total / len(blu)}
if(match["winner"] == "red"):
observations = {"red": 1, "blu": 0}
elif(match["winner"] == "blue"):
observations = {"red": 0, "blu": 1}
else:
observations = {"red": 0.5, "blu": 0.5}
red_elo_delta = an.Metrics.elo(red_elo["score"], blu_elo["score"], observations["red"], elo_N, elo_K) - red_elo["score"]
blu_elo_delta = an.Metrics.elo(blu_elo["score"], red_elo["score"], observations["blu"], elo_N, elo_K) - blu_elo["score"]
new_red_gl2_score, new_red_gl2_rd, new_red_gl2_vol = an.Metrics.glicko2(red_gl2["score"], red_gl2["rd"], red_gl2["vol"], [blu_gl2["score"]], [blu_gl2["rd"]], [observations["red"], observations["blu"]])
new_blu_gl2_score, new_blu_gl2_rd, new_blu_gl2_vol = an.Metrics.glicko2(blu_gl2["score"], blu_gl2["rd"], blu_gl2["vol"], [red_gl2["score"]], [red_gl2["rd"]], [observations["blu"], observations["red"]])
red_gl2_delta = {"score": new_red_gl2_score - red_gl2["score"], "rd": new_red_gl2_rd - red_gl2["rd"], "vol": new_red_gl2_vol - red_gl2["vol"]}
blu_gl2_delta = {"score": new_blu_gl2_score - blu_gl2["score"], "rd": new_blu_gl2_rd - blu_gl2["rd"], "vol": new_blu_gl2_vol - blu_gl2["vol"]}
for team in red:
red[team]["elo"]["score"] = red[team]["elo"]["score"] + red_elo_delta
red[team]["gl2"]["score"] = red[team]["gl2"]["score"] + red_gl2_delta["score"]
red[team]["gl2"]["rd"] = red[team]["gl2"]["rd"] + red_gl2_delta["rd"]
red[team]["gl2"]["vol"] = red[team]["gl2"]["vol"] + red_gl2_delta["vol"]
for team in blu:
blu[team]["elo"]["score"] = blu[team]["elo"]["score"] + blu_elo_delta
blu[team]["gl2"]["score"] = blu[team]["gl2"]["score"] + blu_gl2_delta["score"]
blu[team]["gl2"]["rd"] = blu[team]["gl2"]["rd"] + blu_gl2_delta["rd"]
blu[team]["gl2"]["vol"] = blu[team]["gl2"]["vol"] + blu_gl2_delta["vol"]
temp_vector = {}
temp_vector.update(red)
temp_vector.update(blu)
for team in temp_vector:
d.push_team_metrics_data(apikey, competition, team, temp_vector[team])
def load_metrics(apikey, competition, match, group_name):
group = {}
for team in match[group_name]:
db_data = d.get_team_metrics_data(apikey, competition, team)
if d.get_team_metrics_data(apikey, competition, team) == None:
elo = {"score": 1500}
gl2 = {"score": 1500, "rd": 250, "vol": 0.06}
ts = {"mu": 25, "sigma": 25/3}
#d.push_team_metrics_data(apikey, competition, team, {"elo":elo, "gl2":gl2,"trueskill":ts})
group[team] = {"elo": elo, "gl2": gl2, "ts": ts}
else:
metrics = db_data["metrics"]
elo = metrics["elo"]
gl2 = metrics["gl2"]
ts = metrics["ts"]
group[team] = {"elo": elo, "gl2": gl2, "ts": ts}
return group
def pitloop(pit, tests):
return_vector = {}
for team in pit:
for variable in pit[team]:
if(variable in tests):
if(not variable in return_vector):
return_vector[variable] = []
return_vector[variable].append(pit[team][variable])
return return_vector
main()
"""
Metrics Defaults:
elo starting score = 1500
elo N = 400
elo K = 24
gl2 starting score = 1500
gl2 starting rd = 350
gl2 starting vol = 0.06
"""

59
visualize_pit.py Normal file
View File

@ -0,0 +1,59 @@
# To add a new cell, type '# %%'
# To add a new markdown cell, type '# %% [markdown]'
# %%
import matplotlib.pyplot as plt
import data as d
import pymongo
# %%
def get_pit_variable_data(apikey, competition):
client = pymongo.MongoClient(apikey)
db = client.data_processing
mdata = db.team_pit
out = {}
return mdata.find()
# %%
def get_pit_variable_formatted(apikey, competition):
temp = get_pit_variable_data(apikey, competition)
out = {}
for i in temp:
out[i["variable"]] = i["data"]
return out
# %%
pit = get_pit_variable_formatted("mongodb+srv://api-user-new:titanscout2022@2022-scouting-4vfuu.mongodb.net/test?authSource=admin&replicaSet=2022-scouting-shard-0&readPreference=primary&appname=MongoDB%20Compass&ssl=true", "2020ilch")
# %%
import matplotlib.pyplot as plt
import numpy as np
# %%
fig, ax = plt.subplots(1, len(pit), sharey=True, figsize=(80,15))
i = 0
for variable in pit:
ax[i].hist(pit[variable])
ax[i].invert_xaxis()
ax[i].set_xlabel('')
ax[i].set_ylabel('Frequency')
ax[i].set_title(variable)
plt.yticks(np.arange(len(pit[variable])))
i+=1
plt.show()
# %%