tra-analysis/data analysis/dep/2019/superscripts/superscript.py

398 lines
11 KiB
Python
Raw Normal View History

2019-04-03 18:34:31 +00:00
# Titan Robotics Team 2022: Super Script
# Written by Arthur Lu & Jacob Levine
# Notes:
# setup:
__version__ = "1.0.6.001"
__changelog__ = """changelog:
1.0.6.001:
- fixed multiple bugs
- works now
1.0.6.000:
- added pulldata function
- service now pulls in, computes data, and outputs data as planned
1.0.5.003:
- hotfix: actually pushes data correctly now
2019-02-27 20:01:25 +00:00
1.0.5.002:
- more information given
- performance improvements
1.0.5.001:
- grammar
1.0.5.000:
- service now iterates forever
- ready for production other than pulling json data
1.0.4.001:
- grammar fixes
1.0.4.000:
- actually pushes to firebase
1.0.3.001:
- processes data more efficiently
1.0.3.000:
- actually processes data
1.0.2.000:
- added data reading from folder
- nearly crashed computer reading from 20 GiB of data
1.0.1.000:
- added data reading from file
- added superstructure to code
1.0.0.000:
- added import statements (revolutionary)
2019-04-02 18:40:02 +00:00
"""
__author__ = (
"Arthur Lu <arthurlu@ttic.edu>, "
"Jacob Levine <jlevine@ttic.edu>,"
2019-04-03 18:34:31 +00:00
)
2019-03-10 16:42:43 +00:00
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
import analysis
2019-03-20 19:10:47 +00:00
#import titanlearn
import visualization
import os
import sys
import warnings
import glob
import numpy as np
import time
import tbarequest as tba
import csv
2019-04-03 18:34:31 +00:00
def titanservice():
2019-04-02 18:40:02 +00:00
print("[OK] loading data")
start = time.time()
2019-04-02 18:40:02 +00:00
source_dir = 'data'
2019-04-03 18:34:31 +00:00
# supposedly sorts by alphabetical order, skips reading teams.csv because of redundancy
file_list = glob.glob(source_dir + '/*.csv')
data = []
2019-04-02 18:40:02 +00:00
files = [fn for fn in glob.glob('data/*.csv')
2019-04-03 18:34:31 +00:00
if not (os.path.basename(fn).startswith('scores') or os.path.basename(fn).startswith('teams') or os.path.basename(fn).startswith('match') or os.path.basename(fn).startswith('notes') or os.path.basename(fn).startswith('observationType') or os.path.basename(fn).startswith('teamDBRef'))] # scores will be handled sperately
for i in files:
2019-04-03 18:34:31 +00:00
data.append(analysis.load_csv(i))
2019-04-03 18:34:31 +00:00
# print(files)
2019-03-21 22:25:16 +00:00
stats = []
measure_stats = []
teams = analysis.load_csv("data/teams.csv")
2019-03-04 22:38:40 +00:00
scores = analysis.load_csv("data/scores.csv")
end = time.time()
print("[OK] loaded data in " + str(end - start) + " seconds")
2019-04-03 18:34:31 +00:00
# assumes that team number is in the first column, and that the order of teams is the same across all files
# unhelpful comment
for measure in data: # unpacks 3d array into 2ds
2019-03-07 15:14:20 +00:00
measure_stats = []
2019-04-03 18:34:31 +00:00
for i in range(len(measure)): # unpacks into specific teams
2019-04-03 19:39:22 +00:00
ofbest_curve = [None]
r2best_curve = [None]
2019-04-03 18:34:31 +00:00
line = measure[i]
2019-04-03 18:34:31 +00:00
# print(line)
2019-04-03 19:39:22 +00:00
x = list(range(len(line)))
eqs, rmss, r2s, overfit = analysis.optimize_regression(x, line, 10, 1)
2019-04-03 19:39:22 +00:00
beqs, brmss, br2s, boverfit = analysis.select_best_regression(eqs, rmss, r2s, overfit, "min_overfit")
2019-04-02 18:40:02 +00:00
2019-04-03 19:39:22 +00:00
print(eqs, rmss, r2s, overfit)
2019-04-03 19:39:22 +00:00
ofbest_curve.append(beqs)
ofbest_curve.append(brmss)
ofbest_curve.append(br2s)
ofbest_curve.append(boverfit)
ofbest_curve.pop(0)
2019-04-03 19:39:22 +00:00
print(ofbest_curve)
2019-04-03 19:39:22 +00:00
beqs, brmss, br2s, boverfit = analysis.select_best_regression(eqs, rmss, r2s, overfit, "max_r2s")
2019-04-03 19:39:22 +00:00
r2best_curve.append(beqs)
r2best_curve.append(brmss)
r2best_curve.append(br2s)
r2best_curve.append(boverfit)
r2best_curve.pop(0)
2019-03-04 22:38:40 +00:00
2019-04-03 19:39:22 +00:00
print(r2best_curve)
2019-04-02 18:40:02 +00:00
2019-04-03 18:34:31 +00:00
measure_stats.append(teams[i] + list(analysis.basic_stats(
line, 0, 0)) + list(analysis.histo_analysis(line, 1, -3, 3)))
stats.append(list(measure_stats))
nishant = []
2019-03-21 21:52:37 +00:00
2019-03-21 22:25:16 +00:00
for i in range(len(scores)):
2019-03-04 22:38:40 +00:00
2019-04-03 18:34:31 +00:00
# print(scores)
2019-03-04 22:38:40 +00:00
2019-04-03 18:34:31 +00:00
ofbest_curve = [None]
r2best_curve = [None]
2019-03-04 22:38:40 +00:00
2019-04-03 18:34:31 +00:00
line = scores[i]
2019-03-04 22:38:40 +00:00
2019-04-03 18:34:31 +00:00
if len(line) < 4:
2019-03-22 16:54:40 +00:00
2019-04-03 18:34:31 +00:00
nishant.append('no_data')
2019-03-21 21:52:37 +00:00
2019-04-03 18:34:31 +00:00
continue
2019-03-22 16:54:40 +00:00
2019-04-03 18:34:31 +00:00
# print(line)
2019-03-04 22:38:40 +00:00
2019-04-03 18:34:31 +00:00
# print(line)
2019-03-04 22:38:40 +00:00
2019-04-03 18:34:31 +00:00
x = list(range(len(line)))
eqs, rmss, r2s, overfit = analysis.optimize_regression(x, line, 10, 1)
2019-03-21 22:25:16 +00:00
2019-04-03 18:34:31 +00:00
beqs, brmss, br2s, boverfit = analysis.select_best_regression(
eqs, rmss, r2s, overfit, "min_overfit")
2019-03-21 22:25:16 +00:00
2019-04-03 18:34:31 +00:00
#print(eqs, rmss, r2s, overfit)
2019-04-02 18:40:02 +00:00
2019-04-03 18:34:31 +00:00
ofbest_curve.append(beqs)
ofbest_curve.append(brmss)
ofbest_curve.append(br2s)
ofbest_curve.append(boverfit)
ofbest_curve.pop(0)
2019-03-04 22:38:40 +00:00
2019-04-03 18:34:31 +00:00
# print(ofbest_curve)
2019-03-04 22:38:40 +00:00
2019-04-03 18:34:31 +00:00
beqs, brmss, br2s, boverfit = analysis.select_best_regression(
eqs, rmss, r2s, overfit, "max_r2s")
2019-03-04 22:38:40 +00:00
2019-04-03 18:34:31 +00:00
r2best_curve.append(beqs)
r2best_curve.append(brmss)
r2best_curve.append(br2s)
r2best_curve.append(boverfit)
r2best_curve.pop(0)
2019-03-04 22:38:40 +00:00
2019-04-03 18:34:31 +00:00
# print(r2best_curve)
2019-04-02 18:40:02 +00:00
2019-04-03 18:34:31 +00:00
z = len(scores[0]) + 1
nis_num = []
2019-03-04 22:38:40 +00:00
2019-04-03 18:34:31 +00:00
nis_num.append(eval(str(ofbest_curve[0])))
nis_num.append(eval(str(r2best_curve[0])))
2019-03-04 22:38:40 +00:00
2019-04-03 18:34:31 +00:00
nis_num.append((eval(ofbest_curve[0]) + eval(r2best_curve[0])) / 2)
2019-03-04 22:38:40 +00:00
2019-04-03 18:34:31 +00:00
nishant.append(teams[i] + nis_num)
2019-03-21 21:52:37 +00:00
json_out = {}
2019-03-04 22:38:40 +00:00
score_out = {}
for i in range(len(teams)):
2019-04-03 18:34:31 +00:00
score_out[str(teams[i][0])] = (nishant[i])
2019-03-04 22:38:40 +00:00
location = db.collection(u'stats').document(u'stats-noNN')
for i in range(len(teams)):
general_general_stats = location.collection(teams[i][0])
2019-04-02 18:40:02 +00:00
2019-03-04 22:38:40 +00:00
for j in range(len(files)):
json_out[str(teams[i][0])] = (stats[j][i])
2019-03-20 22:23:41 +00:00
name = os.path.basename(files[j])
2019-04-03 18:34:31 +00:00
general_general_stats.document(name).set(
{'stats': json_out.get(teams[i][0])})
2019-03-04 22:38:40 +00:00
for i in range(len(teams)):
2019-04-03 18:34:31 +00:00
nnum = location.collection(teams[i][0]).document(
u'nishant_number').set({'nishant': score_out.get(teams[i][0])})
2019-02-28 15:04:37 +00:00
def pulldata():
teams = analysis.load_csv('data/teams.csv')
scores = []
for i in range(len(teams)):
team_scores = []
2019-04-03 18:34:31 +00:00
# print(teams[i][0])
request_data_object = tba.req_team_matches(
teams[i][0], 2019, "UDvKmPjPRfwwUdDX1JxbmkyecYBJhCtXeyVk9vmO2i7K0Zn4wqQPMfzuEINXJ7e5")
json_data = request_data_object.json()
2019-03-21 21:17:45 +00:00
2019-03-21 21:50:32 +00:00
for match in range(len(json_data) - 1, -1, -1):
if json_data[match].get('winning_alliance') == "":
2019-04-03 18:34:31 +00:00
# print(json_data[match])
2019-03-21 21:50:32 +00:00
json_data.remove(json_data[match])
2019-04-02 18:40:02 +00:00
2019-04-03 18:34:31 +00:00
json_data = sorted(json_data, key=lambda k: k.get(
'actual_time', 0), reverse=False)
for j in range(len(json_data)):
if "frc" + teams[i][0] in json_data[j].get('alliances').get('blue').get('team_keys'):
2019-04-03 18:34:31 +00:00
team_scores.append(json_data[j].get(
'alliances').get('blue').get('score'))
elif "frc" + teams[i][0] in json_data[j].get('alliances').get('red').get('team_keys'):
2019-04-03 18:34:31 +00:00
team_scores.append(json_data[j].get(
'alliances').get('red').get('score'))
scores.append(team_scores)
2019-04-03 18:34:31 +00:00
with open("data/scores.csv", "w+", newline='') as file:
writer = csv.writer(file, delimiter=',')
2019-03-21 20:06:54 +00:00
writer.writerows(scores)
2019-03-21 21:50:32 +00:00
list_teams = teams
2019-04-03 18:34:31 +00:00
teams = db.collection('data').document(
'team-2022').collection("Central 2019").get()
full = []
tms = []
2019-03-21 21:50:32 +00:00
for team in teams:
2019-04-02 18:40:02 +00:00
2019-03-21 21:50:32 +00:00
tms.append(team.id)
2019-04-03 18:34:31 +00:00
reports = db.collection('data').document(
'team-2022').collection("Central 2019").document(team.id).collection("matches").get()
2019-03-21 21:50:32 +00:00
for report in reports:
2019-04-03 18:34:31 +00:00
data = []
data.append(db.collection('data').document('team-2022').collection("Central 2019").document(
team.id).collection("matches").document(report.id).get().to_dict())
2019-03-21 21:50:32 +00:00
full.append(data)
quant_keys = []
out = []
var = {}
temp = []
for i in range(len(list_teams)):
temp.append(list_teams[i][0])
list_teams = temp
2019-03-21 21:50:32 +00:00
for i in range(len(full)):
for j in range(len(full[i])):
for key in list(full[i][j].keys()):
2019-04-02 18:40:02 +00:00
2019-03-21 21:50:32 +00:00
if "Quantitative" in key:
2019-04-02 18:40:02 +00:00
2019-03-21 21:50:32 +00:00
quant_keys.append(key)
#print(full[i][j].get(key).get('teamDBRef')[5:] in list_teams)
2019-03-22 16:54:40 +00:00
2019-04-03 18:34:31 +00:00
# print(full[i][j].get(key).get('teamDBRef'))
2019-03-22 16:54:40 +00:00
2019-04-03 18:34:31 +00:00
# print(list(full[i][j].keys()))
2019-03-22 20:21:58 +00:00
2019-04-03 18:34:31 +00:00
# print(list_teams)
2019-04-02 18:40:02 +00:00
2019-03-21 21:50:32 +00:00
if full[i][j].get(key).get('teamDBRef')[5:] in list_teams:
2019-04-02 18:40:02 +00:00
2019-03-21 21:50:32 +00:00
var = {}
measured_vars = []
2019-04-02 18:40:02 +00:00
2019-03-21 21:50:32 +00:00
for k in range(len(list(full[i][j].get(key).keys()))):
individual_keys = list(full[i][j].get(key).keys())
2019-04-02 18:40:02 +00:00
2019-04-03 18:34:31 +00:00
var[individual_keys[k]] = full[i][j].get(
key).get(individual_keys[k])
2019-03-21 21:50:32 +00:00
out.append(var)
2019-03-21 21:52:37 +00:00
2019-03-21 21:50:32 +00:00
sorted_out = []
for i in out:
j_list = []
key_list = []
sorted_keys = sorted(i.keys())
for j in sorted_keys:
key_list.append(i[j])
j_list.append(j)
sorted_out.append(key_list)
var_index = 0
team_index = 0
big_out = []
for j in range(len(i)):
big_out.append([])
for t in range(len(list_teams)):
big_out[j].append([])
for i in sorted_out:
2019-04-03 18:34:31 +00:00
team_index = list_teams.index(
sorted_out[sorted_out.index(i)][j_list.index('teamDBRef')][5:])
2019-03-21 21:50:32 +00:00
for j in range(len(i)):
big_out[j][team_index].append(i[j])
for i in range(len(big_out)):
2019-03-21 21:50:32 +00:00
2019-04-03 18:34:31 +00:00
with open('data/' + j_list[i] + '.csv', "w+", newline='') as file:
2019-03-21 21:50:32 +00:00
2019-04-03 18:34:31 +00:00
writer = csv.writer(file, delimiter=',')
writer.writerows(big_out[i])
2019-04-02 18:40:02 +00:00
2019-04-03 18:34:31 +00:00
def service():
while True:
2019-03-01 18:18:38 +00:00
pulldata()
start = time.time()
print("[OK] starting calculations")
fucked = False
2019-04-02 18:40:02 +00:00
for i in range(0, 5):
2019-03-22 16:54:40 +00:00
try:
titanservice()
break
2019-03-22 16:54:40 +00:00
except:
if (i != 4):
2019-04-03 18:34:31 +00:00
print("[WARNING] failed, trying " +
str(5 - i - 1) + " more times")
else:
print("[ERROR] failed to compute data, skipping")
fucked = True
end = time.time()
if (fucked == True):
break
else:
2019-04-02 18:40:02 +00:00
print("[OK] finished calculations")
print("[OK] waiting: " + str(300 - (end - start)) + " seconds" + "\n")
2019-04-03 18:34:31 +00:00
time.sleep(300 - (end - start)) # executes once every 5 minutes
warnings.simplefilter("ignore")
2019-04-03 18:34:31 +00:00
# Use a service account
2019-03-20 21:53:53 +00:00
try:
cred = credentials.Certificate('keys/firebasekey.json')
except:
cred = credentials.Certificate('keys/keytemp.json')
firebase_admin.initialize_app(cred)
db = firestore.client()
2019-04-03 18:34:31 +00:00
service() # finally we write something that isn't a function definition
2019-04-03 19:39:22 +00:00
# titanservice()