mirror of
https://github.com/titanscouting/tra-analysis.git
synced 2025-09-07 07:27:20 +00:00
restructured file management part 2
This commit is contained in:
28
dep/2019/superscripts/repack_json.py
Normal file
28
dep/2019/superscripts/repack_json.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import os
|
||||
import json
|
||||
import ordereddict
|
||||
import collections
|
||||
import unicodecsv
|
||||
|
||||
content = open("data/realtimeDatabaseExport2018.json").read()
|
||||
|
||||
dict_content = json.loads(content)
|
||||
list_of_new_data = []
|
||||
|
||||
for datak, datav in dict_content.iteritems():
|
||||
for teamk, teamv in datav["teams"].iteritems():
|
||||
for matchk, matchv in teamv.iteritems():
|
||||
for detailk, detailv in matchv.iteritems():
|
||||
new_data = collections.OrderedDict(detailv)
|
||||
new_data["uuid"] = detailk
|
||||
new_data["match"] = matchk
|
||||
new_data["team"] = teamk
|
||||
|
||||
list_of_new_data.append(new_data)
|
||||
|
||||
allkey = reduce(lambda x, y: x.union(y.keys()), list_of_new_data, set())
|
||||
output_file = open('realtimeDatabaseExport2018.csv', 'wb')
|
||||
dict_writer = unicodecsv.DictWriter(csvfile=output_file, fieldnames=allkey)
|
||||
dict_writer.writerow(dict((fn,fn) for fn in dict_writer.fieldnames))
|
||||
dict_writer.writerows(list_of_new_data)
|
||||
output_file.close()
|
61
dep/2019/superscripts/scoutflex2019.py
Normal file
61
dep/2019/superscripts/scoutflex2019.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Wed Mar 20 12:21:31 2019
|
||||
|
||||
@author: creek
|
||||
"""
|
||||
import firebase_admin
|
||||
from firebase_admin import credentials
|
||||
from firebase_admin import firestore
|
||||
import pprint
|
||||
from pylatex import Document, Section, Subsection, Command
|
||||
from pylatex.utils import italic, NoEscape
|
||||
import requests
|
||||
|
||||
def generate_team_report(team):
|
||||
doc = Document('basic')
|
||||
matches = team.reference.collection(u'matches').get()
|
||||
matchnums = []
|
||||
for match in matches:
|
||||
matchnums.append(match.id)
|
||||
with doc.create(Section('Qualification matches scouted')):
|
||||
for matchnum in matchnums:
|
||||
doc.append(str(matchnum))
|
||||
with doc.create(Section('Details')):
|
||||
hab = "Hab 1"
|
||||
balls = 42
|
||||
hatches = 0
|
||||
count = 0
|
||||
for match in matches:
|
||||
for analysis in match:
|
||||
if analysis.key().startswith('Quant'):
|
||||
balls = balls + analysis['cargoBalls']
|
||||
hatches = hatches + analysis['hatchPanels']
|
||||
count = count + 1
|
||||
if analysis.key().startswith('Qual'):
|
||||
strategy = analysis['StrategyType']
|
||||
strongObject = analysis['TeleopStrongObject']
|
||||
if count > 0:
|
||||
doc.append("Average balls: " + str(float(balls)/count))
|
||||
doc.append("Average hatches: " + str(float(hatches)/count))
|
||||
doc.append("Strategy Type: " + str(strategy))
|
||||
doc.append("Strongest object in teleop: " + str(strongObject))
|
||||
|
||||
|
||||
doc.preamble.append(Command('title', team.id))
|
||||
doc.preamble.append(Command('author', 'Generated by Team 2022'))
|
||||
doc.preamble.append(Command('date', NoEscape(r'\today')))
|
||||
doc.append(NoEscape(r'\maketitle'))
|
||||
|
||||
doc.generate_pdf(filepath= str(team.id), clean_tex=False)
|
||||
|
||||
credential = credentials.Certificate('keys/firebasekey.json')
|
||||
|
||||
firebase_admin.initialize_app(credential)
|
||||
|
||||
db = firestore.Client()
|
||||
teams_ref = db.collection(u'data').document(u'team-2022').collection(u'Central 2019')
|
||||
teams = teams_ref.get()
|
||||
|
||||
for team in teams:
|
||||
generate_team_report(team)
|
398
dep/2019/superscripts/superscript.py
Normal file
398
dep/2019/superscripts/superscript.py
Normal file
@@ -0,0 +1,398 @@
|
||||
# Titan Robotics Team 2022: Super Script
|
||||
# Written by Arthur Lu & Jacob Levine
|
||||
# Notes:
|
||||
# setup:
|
||||
|
||||
__version__ = "1.0.6.001"
|
||||
|
||||
__changelog__ = """changelog:
|
||||
1.0.6.001:
|
||||
- fixed multiple bugs
|
||||
- works now
|
||||
1.0.6.000:
|
||||
- added pulldata function
|
||||
- service now pulls in, computes data, and outputs data as planned
|
||||
1.0.5.003:
|
||||
- hotfix: actually pushes data correctly now
|
||||
1.0.5.002:
|
||||
- more information given
|
||||
- performance improvements
|
||||
1.0.5.001:
|
||||
- grammar
|
||||
1.0.5.000:
|
||||
- service now iterates forever
|
||||
- ready for production other than pulling json data
|
||||
1.0.4.001:
|
||||
- grammar fixes
|
||||
1.0.4.000:
|
||||
- actually pushes to firebase
|
||||
1.0.3.001:
|
||||
- processes data more efficiently
|
||||
1.0.3.000:
|
||||
- actually processes data
|
||||
1.0.2.000:
|
||||
- added data reading from folder
|
||||
- nearly crashed computer reading from 20 GiB of data
|
||||
1.0.1.000:
|
||||
- added data reading from file
|
||||
- added superstructure to code
|
||||
1.0.0.000:
|
||||
- added import statements (revolutionary)
|
||||
"""
|
||||
|
||||
__author__ = (
|
||||
"Arthur Lu <arthurlu@ttic.edu>, "
|
||||
"Jacob Levine <jlevine@ttic.edu>,"
|
||||
)
|
||||
|
||||
import firebase_admin
|
||||
from firebase_admin import credentials
|
||||
from firebase_admin import firestore
|
||||
import analysis
|
||||
#import titanlearn
|
||||
import visualization
|
||||
import os
|
||||
import sys
|
||||
import warnings
|
||||
import glob
|
||||
import numpy as np
|
||||
import time
|
||||
import tbarequest as tba
|
||||
import csv
|
||||
|
||||
|
||||
def titanservice():
|
||||
|
||||
print("[OK] loading data")
|
||||
|
||||
start = time.time()
|
||||
|
||||
source_dir = 'data'
|
||||
# supposedly sorts by alphabetical order, skips reading teams.csv because of redundancy
|
||||
file_list = glob.glob(source_dir + '/*.csv')
|
||||
data = []
|
||||
files = [fn for fn in glob.glob('data/*.csv')
|
||||
if not (os.path.basename(fn).startswith('scores') or os.path.basename(fn).startswith('teams') or os.path.basename(fn).startswith('match') or os.path.basename(fn).startswith('notes') or os.path.basename(fn).startswith('observationType') or os.path.basename(fn).startswith('teamDBRef'))] # scores will be handled sperately
|
||||
|
||||
for i in files:
|
||||
data.append(analysis.load_csv(i))
|
||||
|
||||
# print(files)
|
||||
|
||||
stats = []
|
||||
measure_stats = []
|
||||
teams = analysis.load_csv("data/teams.csv")
|
||||
scores = analysis.load_csv("data/scores.csv")
|
||||
|
||||
end = time.time()
|
||||
|
||||
print("[OK] loaded data in " + str(end - start) + " seconds")
|
||||
|
||||
# assumes that team number is in the first column, and that the order of teams is the same across all files
|
||||
# unhelpful comment
|
||||
for measure in data: # unpacks 3d array into 2ds
|
||||
|
||||
measure_stats = []
|
||||
|
||||
for i in range(len(measure)): # unpacks into specific teams
|
||||
|
||||
ofbest_curve = [None]
|
||||
r2best_curve = [None]
|
||||
|
||||
line = measure[i]
|
||||
|
||||
# print(line)
|
||||
|
||||
x = list(range(len(line)))
|
||||
eqs, rmss, r2s, overfit = analysis.optimize_regression(x, line, 10, 1)
|
||||
|
||||
beqs, brmss, br2s, boverfit = analysis.select_best_regression(eqs, rmss, r2s, overfit, "min_overfit")
|
||||
|
||||
print(eqs, rmss, r2s, overfit)
|
||||
|
||||
ofbest_curve.append(beqs)
|
||||
ofbest_curve.append(brmss)
|
||||
ofbest_curve.append(br2s)
|
||||
ofbest_curve.append(boverfit)
|
||||
ofbest_curve.pop(0)
|
||||
|
||||
print(ofbest_curve)
|
||||
|
||||
beqs, brmss, br2s, boverfit = analysis.select_best_regression(eqs, rmss, r2s, overfit, "max_r2s")
|
||||
|
||||
r2best_curve.append(beqs)
|
||||
r2best_curve.append(brmss)
|
||||
r2best_curve.append(br2s)
|
||||
r2best_curve.append(boverfit)
|
||||
r2best_curve.pop(0)
|
||||
|
||||
print(r2best_curve)
|
||||
|
||||
measure_stats.append(teams[i] + list(analysis.basic_stats(
|
||||
line, 0, 0)) + list(analysis.histo_analysis(line, 1, -3, 3)))
|
||||
|
||||
stats.append(list(measure_stats))
|
||||
nishant = []
|
||||
|
||||
for i in range(len(scores)):
|
||||
|
||||
# print(scores)
|
||||
|
||||
ofbest_curve = [None]
|
||||
r2best_curve = [None]
|
||||
|
||||
line = scores[i]
|
||||
|
||||
if len(line) < 4:
|
||||
|
||||
nishant.append('no_data')
|
||||
|
||||
continue
|
||||
|
||||
# print(line)
|
||||
|
||||
# print(line)
|
||||
|
||||
x = list(range(len(line)))
|
||||
eqs, rmss, r2s, overfit = analysis.optimize_regression(x, line, 10, 1)
|
||||
|
||||
beqs, brmss, br2s, boverfit = analysis.select_best_regression(
|
||||
eqs, rmss, r2s, overfit, "min_overfit")
|
||||
|
||||
#print(eqs, rmss, r2s, overfit)
|
||||
|
||||
ofbest_curve.append(beqs)
|
||||
ofbest_curve.append(brmss)
|
||||
ofbest_curve.append(br2s)
|
||||
ofbest_curve.append(boverfit)
|
||||
ofbest_curve.pop(0)
|
||||
|
||||
# print(ofbest_curve)
|
||||
|
||||
beqs, brmss, br2s, boverfit = analysis.select_best_regression(
|
||||
eqs, rmss, r2s, overfit, "max_r2s")
|
||||
|
||||
r2best_curve.append(beqs)
|
||||
r2best_curve.append(brmss)
|
||||
r2best_curve.append(br2s)
|
||||
r2best_curve.append(boverfit)
|
||||
r2best_curve.pop(0)
|
||||
|
||||
# print(r2best_curve)
|
||||
|
||||
z = len(scores[0]) + 1
|
||||
nis_num = []
|
||||
|
||||
nis_num.append(eval(str(ofbest_curve[0])))
|
||||
nis_num.append(eval(str(r2best_curve[0])))
|
||||
|
||||
nis_num.append((eval(ofbest_curve[0]) + eval(r2best_curve[0])) / 2)
|
||||
|
||||
nishant.append(teams[i] + nis_num)
|
||||
|
||||
json_out = {}
|
||||
score_out = {}
|
||||
|
||||
for i in range(len(teams)):
|
||||
score_out[str(teams[i][0])] = (nishant[i])
|
||||
|
||||
location = db.collection(u'stats').document(u'stats-noNN')
|
||||
for i in range(len(teams)):
|
||||
general_general_stats = location.collection(teams[i][0])
|
||||
|
||||
for j in range(len(files)):
|
||||
json_out[str(teams[i][0])] = (stats[j][i])
|
||||
name = os.path.basename(files[j])
|
||||
general_general_stats.document(name).set(
|
||||
{'stats': json_out.get(teams[i][0])})
|
||||
|
||||
for i in range(len(teams)):
|
||||
nnum = location.collection(teams[i][0]).document(
|
||||
u'nishant_number').set({'nishant': score_out.get(teams[i][0])})
|
||||
|
||||
|
||||
def pulldata():
|
||||
teams = analysis.load_csv('data/teams.csv')
|
||||
scores = []
|
||||
for i in range(len(teams)):
|
||||
team_scores = []
|
||||
# print(teams[i][0])
|
||||
request_data_object = tba.req_team_matches(
|
||||
teams[i][0], 2019, "UDvKmPjPRfwwUdDX1JxbmkyecYBJhCtXeyVk9vmO2i7K0Zn4wqQPMfzuEINXJ7e5")
|
||||
json_data = request_data_object.json()
|
||||
|
||||
for match in range(len(json_data) - 1, -1, -1):
|
||||
if json_data[match].get('winning_alliance') == "":
|
||||
# print(json_data[match])
|
||||
json_data.remove(json_data[match])
|
||||
|
||||
json_data = sorted(json_data, key=lambda k: k.get(
|
||||
'actual_time', 0), reverse=False)
|
||||
for j in range(len(json_data)):
|
||||
if "frc" + teams[i][0] in json_data[j].get('alliances').get('blue').get('team_keys'):
|
||||
team_scores.append(json_data[j].get(
|
||||
'alliances').get('blue').get('score'))
|
||||
elif "frc" + teams[i][0] in json_data[j].get('alliances').get('red').get('team_keys'):
|
||||
team_scores.append(json_data[j].get(
|
||||
'alliances').get('red').get('score'))
|
||||
scores.append(team_scores)
|
||||
|
||||
with open("data/scores.csv", "w+", newline='') as file:
|
||||
writer = csv.writer(file, delimiter=',')
|
||||
writer.writerows(scores)
|
||||
|
||||
list_teams = teams
|
||||
teams = db.collection('data').document(
|
||||
'team-2022').collection("Central 2019").get()
|
||||
full = []
|
||||
tms = []
|
||||
for team in teams:
|
||||
|
||||
tms.append(team.id)
|
||||
reports = db.collection('data').document(
|
||||
'team-2022').collection("Central 2019").document(team.id).collection("matches").get()
|
||||
|
||||
for report in reports:
|
||||
data = []
|
||||
data.append(db.collection('data').document('team-2022').collection("Central 2019").document(
|
||||
team.id).collection("matches").document(report.id).get().to_dict())
|
||||
full.append(data)
|
||||
|
||||
quant_keys = []
|
||||
|
||||
out = []
|
||||
var = {}
|
||||
|
||||
temp = []
|
||||
|
||||
for i in range(len(list_teams)):
|
||||
|
||||
temp.append(list_teams[i][0])
|
||||
|
||||
list_teams = temp
|
||||
|
||||
for i in range(len(full)):
|
||||
for j in range(len(full[i])):
|
||||
for key in list(full[i][j].keys()):
|
||||
|
||||
if "Quantitative" in key:
|
||||
|
||||
quant_keys.append(key)
|
||||
|
||||
#print(full[i][j].get(key).get('teamDBRef')[5:] in list_teams)
|
||||
|
||||
# print(full[i][j].get(key).get('teamDBRef'))
|
||||
|
||||
# print(list(full[i][j].keys()))
|
||||
|
||||
# print(list_teams)
|
||||
|
||||
if full[i][j].get(key).get('teamDBRef')[5:] in list_teams:
|
||||
|
||||
var = {}
|
||||
measured_vars = []
|
||||
|
||||
for k in range(len(list(full[i][j].get(key).keys()))):
|
||||
|
||||
individual_keys = list(full[i][j].get(key).keys())
|
||||
|
||||
var[individual_keys[k]] = full[i][j].get(
|
||||
key).get(individual_keys[k])
|
||||
|
||||
out.append(var)
|
||||
|
||||
sorted_out = []
|
||||
|
||||
for i in out:
|
||||
|
||||
j_list = []
|
||||
|
||||
key_list = []
|
||||
|
||||
sorted_keys = sorted(i.keys())
|
||||
|
||||
for j in sorted_keys:
|
||||
|
||||
key_list.append(i[j])
|
||||
|
||||
j_list.append(j)
|
||||
|
||||
sorted_out.append(key_list)
|
||||
|
||||
var_index = 0
|
||||
team_index = 0
|
||||
|
||||
big_out = []
|
||||
|
||||
for j in range(len(i)):
|
||||
big_out.append([])
|
||||
for t in range(len(list_teams)):
|
||||
big_out[j].append([])
|
||||
|
||||
for i in sorted_out:
|
||||
|
||||
team_index = list_teams.index(
|
||||
sorted_out[sorted_out.index(i)][j_list.index('teamDBRef')][5:])
|
||||
|
||||
for j in range(len(i)):
|
||||
|
||||
big_out[j][team_index].append(i[j])
|
||||
|
||||
for i in range(len(big_out)):
|
||||
|
||||
with open('data/' + j_list[i] + '.csv', "w+", newline='') as file:
|
||||
|
||||
writer = csv.writer(file, delimiter=',')
|
||||
writer.writerows(big_out[i])
|
||||
|
||||
|
||||
def service():
|
||||
|
||||
while True:
|
||||
|
||||
pulldata()
|
||||
|
||||
start = time.time()
|
||||
|
||||
print("[OK] starting calculations")
|
||||
|
||||
fucked = False
|
||||
|
||||
for i in range(0, 5):
|
||||
try:
|
||||
titanservice()
|
||||
break
|
||||
except:
|
||||
if (i != 4):
|
||||
print("[WARNING] failed, trying " +
|
||||
str(5 - i - 1) + " more times")
|
||||
else:
|
||||
print("[ERROR] failed to compute data, skipping")
|
||||
fucked = True
|
||||
|
||||
end = time.time()
|
||||
if (fucked == True):
|
||||
|
||||
break
|
||||
|
||||
else:
|
||||
|
||||
print("[OK] finished calculations")
|
||||
|
||||
print("[OK] waiting: " + str(300 - (end - start)) + " seconds" + "\n")
|
||||
|
||||
time.sleep(300 - (end - start)) # executes once every 5 minutes
|
||||
|
||||
|
||||
warnings.simplefilter("ignore")
|
||||
# Use a service account
|
||||
try:
|
||||
cred = credentials.Certificate('keys/firebasekey.json')
|
||||
except:
|
||||
cred = credentials.Certificate('keys/keytemp.json')
|
||||
firebase_admin.initialize_app(cred)
|
||||
|
||||
db = firestore.client()
|
||||
|
||||
service() # finally we write something that isn't a function definition
|
||||
# titanservice()
|
292
dep/2019/superscripts/superscript_nishant_only.py
Normal file
292
dep/2019/superscripts/superscript_nishant_only.py
Normal file
@@ -0,0 +1,292 @@
|
||||
# Titan Robotics Team 2022: Super Script
|
||||
# Written by Arthur Lu & Jacob Levine
|
||||
# Notes:
|
||||
# setup:
|
||||
|
||||
__version__ = "1.0.6.001"
|
||||
|
||||
__changelog__ = """changelog:
|
||||
1.0.6.001:
|
||||
- fixed multiple bugs
|
||||
- works now
|
||||
1.0.6.000:
|
||||
- added pulldata function
|
||||
- service now pulls in, computes data, and outputs data as planned
|
||||
1.0.5.003:
|
||||
- hotfix: actually pushes data correctly now
|
||||
1.0.5.002:
|
||||
- more information given
|
||||
- performance improvements
|
||||
1.0.5.001:
|
||||
- grammar
|
||||
1.0.5.000:
|
||||
- service now iterates forever
|
||||
- ready for production other than pulling json data
|
||||
1.0.4.001:
|
||||
- grammar fixes
|
||||
1.0.4.000:
|
||||
- actually pushes to firebase
|
||||
1.0.3.001:
|
||||
- processes data more efficiently
|
||||
1.0.3.000:
|
||||
- actually processes data
|
||||
1.0.2.000:
|
||||
- added data reading from folder
|
||||
- nearly crashed computer reading from 20 GiB of data
|
||||
1.0.1.000:
|
||||
- added data reading from file
|
||||
- added superstructure to code
|
||||
1.0.0.000:
|
||||
- added import statements (revolutionary)
|
||||
"""
|
||||
|
||||
__author__ = (
|
||||
"Arthur Lu <arthurlu@ttic.edu>, "
|
||||
"Jacob Levine <jlevine@ttic.edu>,"
|
||||
)
|
||||
|
||||
import firebase_admin
|
||||
from firebase_admin import credentials
|
||||
from firebase_admin import firestore
|
||||
import analysis
|
||||
#import titanlearn
|
||||
import visualization
|
||||
import os
|
||||
import sys
|
||||
import warnings
|
||||
import glob
|
||||
import numpy as np
|
||||
import time
|
||||
import tbarequest as tba
|
||||
import csv
|
||||
|
||||
|
||||
def titanservice():
|
||||
|
||||
print("[OK] loading data")
|
||||
|
||||
start = time.time()
|
||||
|
||||
source_dir = 'data'
|
||||
# supposedly sorts by alphabetical order, skips reading teams.csv because of redundancy
|
||||
file_list = glob.glob(source_dir + '/*.csv')
|
||||
data = []
|
||||
files = [fn for fn in glob.glob('data/*.csv')
|
||||
if not (os.path.basename(fn).startswith('scores') or os.path.basename(fn).startswith('teams') or os.path.basename(fn).startswith('match') or os.path.basename(fn).startswith('notes') or os.path.basename(fn).startswith('observationType') or os.path.basename(fn).startswith('teamDBRef'))] # scores will be handled sperately
|
||||
|
||||
for i in files:
|
||||
data.append(analysis.load_csv(i))
|
||||
|
||||
# print(files)
|
||||
|
||||
stats = []
|
||||
measure_stats = []
|
||||
teams = analysis.load_csv("data/teams.csv")
|
||||
scores = analysis.load_csv("data/scores.csv")
|
||||
|
||||
end = time.time()
|
||||
|
||||
print("[OK] loaded data in " + str(end - start) + " seconds")
|
||||
|
||||
# assumes that team number is in the first column, and that the order of teams is the same across all files
|
||||
# unhelpful comment
|
||||
# for measure in data: #unpacks 3d array into 2ds
|
||||
|
||||
#measure_stats = []
|
||||
|
||||
# for i in range(len(measure)): #unpacks into specific teams
|
||||
|
||||
#ofbest_curve = [None]
|
||||
#r2best_curve = [None]
|
||||
|
||||
#line = measure[i]
|
||||
|
||||
# print(line)
|
||||
|
||||
#x = list(range(len(line)))
|
||||
#eqs, rmss, r2s, overfit = analysis.optimize_regression(x, line, 10, 1)
|
||||
|
||||
#beqs, brmss, br2s, boverfit = analysis.select_best_regression(eqs, rmss, r2s, overfit, "min_overfit")
|
||||
|
||||
#print(eqs, rmss, r2s, overfit)
|
||||
|
||||
# ofbest_curve.append(beqs)
|
||||
# ofbest_curve.append(brmss)
|
||||
# ofbest_curve.append(br2s)
|
||||
# ofbest_curve.append(boverfit)
|
||||
# ofbest_curve.pop(0)
|
||||
|
||||
# print(ofbest_curve)
|
||||
|
||||
#beqs, brmss, br2s, boverfit = analysis.select_best_regression(eqs, rmss, r2s, overfit, "max_r2s")
|
||||
|
||||
# r2best_curve.append(beqs)
|
||||
# r2best_curve.append(brmss)
|
||||
# r2best_curve.append(br2s)
|
||||
# r2best_curve.append(boverfit)
|
||||
# r2best_curve.pop(0)
|
||||
|
||||
# print(r2best_curve)
|
||||
|
||||
#measure_stats.append(teams[i] + list(analysis.basic_stats(line, 0, 0)) + list(analysis.histo_analysis(line, 1, -3, 3)))
|
||||
|
||||
# stats.append(list(measure_stats))
|
||||
nishant = []
|
||||
|
||||
for i in range(len(scores)):
|
||||
|
||||
# print(scores)
|
||||
|
||||
ofbest_curve = [None]
|
||||
r2best_curve = [None]
|
||||
|
||||
line = scores[i]
|
||||
|
||||
if len(line) < 4:
|
||||
|
||||
nishant.append('no_data')
|
||||
|
||||
continue
|
||||
|
||||
# print(line)
|
||||
|
||||
# print(line)
|
||||
|
||||
x = list(range(len(line)))
|
||||
eqs, rmss, r2s, overfit = analysis.optimize_regression(x, line, 10, 1)
|
||||
|
||||
beqs, brmss, br2s, boverfit = analysis.select_best_regression(
|
||||
eqs, rmss, r2s, overfit, "min_overfit")
|
||||
|
||||
#print(eqs, rmss, r2s, overfit)
|
||||
|
||||
ofbest_curve.append(beqs)
|
||||
ofbest_curve.append(brmss)
|
||||
ofbest_curve.append(br2s)
|
||||
ofbest_curve.append(boverfit)
|
||||
ofbest_curve.pop(0)
|
||||
|
||||
# print(ofbest_curve)
|
||||
|
||||
beqs, brmss, br2s, boverfit = analysis.select_best_regression(
|
||||
eqs, rmss, r2s, overfit, "max_r2s")
|
||||
|
||||
r2best_curve.append(beqs)
|
||||
r2best_curve.append(brmss)
|
||||
r2best_curve.append(br2s)
|
||||
r2best_curve.append(boverfit)
|
||||
r2best_curve.pop(0)
|
||||
|
||||
# print(r2best_curve)
|
||||
|
||||
z = len(scores[0]) + 1
|
||||
nis_num = []
|
||||
|
||||
nis_num.append(eval(str(ofbest_curve[0])))
|
||||
nis_num.append(eval(str(r2best_curve[0])))
|
||||
|
||||
nis_num.append((eval(ofbest_curve[0]) + eval(r2best_curve[0])) / 2)
|
||||
|
||||
nishant.append(teams[i] + nis_num)
|
||||
|
||||
json_out = {}
|
||||
score_out = {}
|
||||
|
||||
for i in range(len(teams)):
|
||||
score_out[str(teams[i][0])] = (nishant[i])
|
||||
|
||||
location = db.collection(u'stats').document(u'stats-noNN')
|
||||
# for i in range(len(teams)):
|
||||
#general_general_stats = location.collection(teams[i][0])
|
||||
|
||||
# for j in range(len(files)):
|
||||
# json_out[str(teams[i][0])] = (stats[j][i])
|
||||
# name = os.path.basename(files[j])
|
||||
# general_general_stats.document(name).set({'stats':json_out.get(teams[i][0])})
|
||||
|
||||
for i in range(len(teams)):
|
||||
nnum = location.collection(teams[i][0]).document(
|
||||
u'nishant_number').set({'nishant': score_out.get(teams[i][0])})
|
||||
|
||||
|
||||
def pulldata():
|
||||
teams = analysis.load_csv('data/teams.csv')
|
||||
scores = []
|
||||
for i in range(len(teams)):
|
||||
team_scores = []
|
||||
# print(teams[i][0])
|
||||
request_data_object = tba.req_team_matches(
|
||||
teams[i][0], 2019, "UDvKmPjPRfwwUdDX1JxbmkyecYBJhCtXeyVk9vmO2i7K0Zn4wqQPMfzuEINXJ7e5")
|
||||
json_data = request_data_object.json()
|
||||
|
||||
for match in range(len(json_data) - 1, -1, -1):
|
||||
if json_data[match].get('winning_alliance') == "":
|
||||
# print(json_data[match])
|
||||
json_data.remove(json_data[match])
|
||||
|
||||
json_data = sorted(json_data, key=lambda k: k.get(
|
||||
'actual_time', 0), reverse=False)
|
||||
for j in range(len(json_data)):
|
||||
if "frc" + teams[i][0] in json_data[j].get('alliances').get('blue').get('team_keys'):
|
||||
team_scores.append(json_data[j].get(
|
||||
'alliances').get('blue').get('score'))
|
||||
elif "frc" + teams[i][0] in json_data[j].get('alliances').get('red').get('team_keys'):
|
||||
team_scores.append(json_data[j].get(
|
||||
'alliances').get('red').get('score'))
|
||||
scores.append(team_scores)
|
||||
|
||||
with open("data/scores.csv", "w+", newline='') as file:
|
||||
writer = csv.writer(file, delimiter=',')
|
||||
writer.writerows(scores)
|
||||
|
||||
|
||||
def service():
|
||||
|
||||
while True:
|
||||
|
||||
pulldata()
|
||||
|
||||
start = time.time()
|
||||
|
||||
print("[OK] starting calculations")
|
||||
|
||||
fucked = False
|
||||
|
||||
for i in range(0, 5):
|
||||
try:
|
||||
titanservice()
|
||||
break
|
||||
except:
|
||||
if (i != 4):
|
||||
print("[WARNING] failed, trying " +
|
||||
str(5 - i - 1) + " more times")
|
||||
else:
|
||||
print("[ERROR] failed to compute data, skipping")
|
||||
fucked = True
|
||||
|
||||
end = time.time()
|
||||
if (fucked == True):
|
||||
|
||||
break
|
||||
|
||||
else:
|
||||
|
||||
print("[OK] finished calculations")
|
||||
|
||||
print("[OK] waiting: " + str(300 - (end - start)) + " seconds" + "\n")
|
||||
|
||||
time.sleep(300 - (end - start)) # executes once every 5 minutes
|
||||
|
||||
|
||||
warnings.simplefilter("ignore")
|
||||
# Use a service account
|
||||
try:
|
||||
cred = credentials.Certificate('keys/firebasekey.json')
|
||||
except:
|
||||
cred = credentials.Certificate('keys/keytemp.json')
|
||||
firebase_admin.initialize_app(cred)
|
||||
|
||||
db = firestore.client()
|
||||
|
||||
service() # finally we write something that isn't a function definition
|
||||
# titanservice()
|
110
dep/2019/superscripts/tbarequest.py
Normal file
110
dep/2019/superscripts/tbarequest.py
Normal file
@@ -0,0 +1,110 @@
|
||||
#Titan Robotics Team 2022: TBA Requests Module
|
||||
#Written by Arthur Lu & Jacob Levine
|
||||
#Notes:
|
||||
# this should be imported as a python module using 'import tbarequest'
|
||||
# this should be included in the local directory or environment variable
|
||||
# this module has not been optimized for multhreaded computing
|
||||
#Number of easter eggs: none yet
|
||||
#setup:
|
||||
|
||||
__version__ = "1.0.0.001"
|
||||
|
||||
#changelog should be viewed using print(tbarequest.__changelog__)
|
||||
__changelog__ = """changelog:
|
||||
1.0.1.000:
|
||||
- fixed a simple error
|
||||
1.0.0.xxx:
|
||||
-added common requests and JSON processing"""
|
||||
__author__ = (
|
||||
"Arthur Lu <arthurlu@ttic.edu>, "
|
||||
"Jacob Levine <jlevine@ttic.edu>,"
|
||||
)
|
||||
__all__ = [
|
||||
'process_json_ret',
|
||||
'req_all_events',
|
||||
'req_event_matches',
|
||||
'req_event_insights',
|
||||
'req_event_elim_alli'
|
||||
'req_team_events',
|
||||
'req_team_matches'
|
||||
]
|
||||
#imports
|
||||
import requests
|
||||
|
||||
#as this code is public, i'm not putting 2022's API key in here. just add it as a var in your script and go
|
||||
#requests basic team info
|
||||
def req_team_info(team,apikey):
|
||||
headers={'X-TBA-Auth-Key':apikey}
|
||||
r=requests.get('https://www.thebluealliance.com/api/v3/team/frc'+str(team)+'/simple/',headers=headers)
|
||||
return r
|
||||
|
||||
#requests a list of events that a team went to
|
||||
def req_team_events(team,year,apikey):
|
||||
headers={'X-TBA-Auth-Key':apikey}
|
||||
r=requests.get('https://www.thebluealliance.com/api/v3/team/frc'+str(team)+'/events/'+str(year),headers=headers)
|
||||
return r
|
||||
|
||||
#gets every match that a team played in
|
||||
def req_team_matches(team,year,apikey):
|
||||
headers={'X-TBA-Auth-Key':apikey}
|
||||
r=requests.get('https://www.thebluealliance.com/api/v3/team/frc'+str(team)+'/matches/'+str(year), headers=headers)
|
||||
return r
|
||||
|
||||
#gets all events in a certain year
|
||||
def req_all_events(year, apikey):
|
||||
headers={'X-TBA-Auth-Key':apikey}
|
||||
r=requests.get('https://www.thebluealliance.com/api/v3/events/'+str(year), headers=headers)
|
||||
return r
|
||||
|
||||
#gets all matches for an event
|
||||
def req_event_matches(event_key,apikey):
|
||||
headers={'X-TBA-Auth-Key':apikey}
|
||||
r=requests.get('https://www.thebluealliance.com/api/v3/event/'+str(event_key)+'/matches', headers=headers)
|
||||
return r
|
||||
|
||||
#gets elimination alliances from a event
|
||||
def req_event_elim_alli(event_key, apikey):
|
||||
headers={'X-TBA-Auth-Key':apikey}
|
||||
r=requests.get('https://www.thebluealliance.com/api/v3/event/'+str(event_key)+'/alliances', headers=headers)
|
||||
return r
|
||||
|
||||
#requests oprs and dprs
|
||||
def req_event_opr(event_key, apikey):
|
||||
headers={'X-TBA-Auth-Key':apikey}
|
||||
r=requests.get('https://www.thebluealliance.com/api/v3/event/'+str(event_key)+'/oprs', headers=headers)
|
||||
return r
|
||||
|
||||
|
||||
|
||||
#gets TBA's insights from an event
|
||||
def req_event_insights(event_key, apikey):
|
||||
headers={'X-TBA-Auth-Key':apikey}
|
||||
r=requests.get('https://www.thebluealliance.com/api/v3/event/'+str(event_key)+'/insights', headers=headers)
|
||||
return r
|
||||
|
||||
#processes the json return. right now, it's slow and not great. will throw an exception if it doesn't get a good status code
|
||||
def process_json_ret(req):
|
||||
if req.status_code == 200:
|
||||
keys=[]
|
||||
for i in req.json():
|
||||
for j in i.keys():
|
||||
read=False
|
||||
for k in keys:
|
||||
if k==j:
|
||||
read=True
|
||||
break
|
||||
if not read:
|
||||
keys.append(j)
|
||||
out=[]
|
||||
out.append(keys)
|
||||
for i in req.json():
|
||||
buf=[]
|
||||
for j in keys:
|
||||
try:
|
||||
buf.append(i[j])
|
||||
except:
|
||||
buf.append("")
|
||||
out.append(buf)
|
||||
return out
|
||||
else:
|
||||
raise ValueError('Status code is: '+req.status_code+', not 200')
|
206
dep/2019/superscripts/titanlearn.py
Normal file
206
dep/2019/superscripts/titanlearn.py
Normal file
@@ -0,0 +1,206 @@
|
||||
#Titan Robotics Team 2022: ML Module
|
||||
#Written by Arthur Lu & Jacob Levine
|
||||
#Notes:
|
||||
# this should be imported as a python module using 'import titanlearn'
|
||||
# this should be included in the local directory or environment variable
|
||||
# this module has not been optimized for multhreaded computing
|
||||
# this module learns from its mistakes far faster than 2022's captains
|
||||
#setup:
|
||||
|
||||
__version__ = "1.0.0.001"
|
||||
|
||||
#changelog should be viewed using print(analysis.__changelog__)
|
||||
__changelog__ = """changelog:
|
||||
1.0.0.xxx:
|
||||
-added generation of ANNS, basic SGD training"""
|
||||
__author__ = (
|
||||
"Arthur Lu <arthurlu@ttic.edu>, "
|
||||
"Jacob Levine <jlevine@ttic.edu>,"
|
||||
)
|
||||
__all__ = [
|
||||
'linear_nn',
|
||||
'train_sgd_minibatch',
|
||||
'train_sgd_simple'
|
||||
]
|
||||
#imports
|
||||
import torch
|
||||
import warnings
|
||||
from collections import OrderedDict
|
||||
from sklearn import metrics, datasets
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import math
|
||||
import time
|
||||
|
||||
#enable CUDA if possible
|
||||
device = torch.device("cpu")
|
||||
|
||||
#linear_nn: creates a fully connected network given params
|
||||
def linear_nn(in_dim, hidden_dim, out_dim, num_hidden, act_fn="tanh", end="none"):
|
||||
if act_fn.lower()=="tanh":
|
||||
k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))])
|
||||
for i in range(num_hidden):
|
||||
k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "tanh"+str(i+1):torch.nn.Tanh()})
|
||||
|
||||
elif act_fn.lower()=="sigmoid":
|
||||
k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))])
|
||||
for i in range(num_hidden):
|
||||
k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "sig"+str(i+1):torch.nn.Sigmoid()})
|
||||
|
||||
elif act_fn.lower()=="relu":
|
||||
k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))])
|
||||
for i in range(num_hidden):
|
||||
k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "relu"+str(i+1):torch.nn.ReLU()})
|
||||
|
||||
elif act_fn.lower()=="leaky relu":
|
||||
k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))])
|
||||
for i in range(num_hidden):
|
||||
k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "lre"+str(i+1):torch.nn.LeakyReLU()})
|
||||
else:
|
||||
warnings.warn("Did not specify a valid inner activation function. Returning nothing.")
|
||||
return None
|
||||
|
||||
if end.lower()=="softmax":
|
||||
k.update({"out": torch.nn.Linear(hidden_dim,out_dim), "softmax": torch.nn.Softmax()})
|
||||
elif end.lower()=="none":
|
||||
k.update({"out": torch.nn.Linear(hidden_dim,out_dim)})
|
||||
elif end.lower()=="sigmoid":
|
||||
k.update({"out": torch.nn.Linear(hidden_dim,out_dim), "sigmoid": torch.nn.Sigmoid()})
|
||||
else:
|
||||
warnings.warn("Did not specify a valid final activation function. Returning nothing.")
|
||||
return None
|
||||
|
||||
return torch.nn.Sequential(k)
|
||||
|
||||
#train_sgd_simple: trains network using SGD
|
||||
def train_sgd_simple(net, evalType, data, ground, dev=None, devg=None, iters=1000, learnrate=1e-4, testevery=1, graphsaveloc=None, modelsaveloc=None, loss="mse"):
|
||||
model=net.to(device)
|
||||
data=data.to(device)
|
||||
ground=ground.to(device)
|
||||
if dev != None:
|
||||
dev=dev.to(device)
|
||||
losses=[]
|
||||
dev_losses=[]
|
||||
if loss.lower()=="mse":
|
||||
loss_fn = torch.nn.MSELoss()
|
||||
elif loss.lower()=="cross entropy":
|
||||
loss_fn = torch.nn.CrossEntropyLoss()
|
||||
elif loss.lower()=="nll":
|
||||
loss_fn = torch.nn.NLLLoss()
|
||||
elif loss.lower()=="poisson nll":
|
||||
loss_fn = torch.nn.PoissonNLLLoss()
|
||||
else:
|
||||
warnings.warn("Did not specify a valid loss function. Returning nothing.")
|
||||
return None
|
||||
optimizer=torch.optim.SGD(model.parameters(), lr=learnrate)
|
||||
for i in range(iters):
|
||||
if i%testevery==0:
|
||||
with torch.no_grad():
|
||||
output = model(data)
|
||||
if evalType == "ap":
|
||||
ap = metrics.average_precision_score(ground.cpu().numpy(), output.cpu().numpy())
|
||||
if evalType == "regression":
|
||||
ap = metrics.explained_variance_score(ground.cpu().numpy(), output.cpu().numpy())
|
||||
losses.append(ap)
|
||||
print(str(i)+": "+str(ap))
|
||||
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="train AP")
|
||||
if dev != None:
|
||||
output = model(dev)
|
||||
print(evalType)
|
||||
if evalType == "ap":
|
||||
|
||||
ap = metrics.average_precision_score(devg.numpy(), output.numpy())
|
||||
dev_losses.append(ap)
|
||||
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev AP")
|
||||
elif evalType == "regression":
|
||||
ev = metrics.explained_variance_score(devg.numpy(), output.numpy())
|
||||
dev_losses.append(ev)
|
||||
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev EV")
|
||||
|
||||
|
||||
if graphsaveloc != None:
|
||||
plt.savefig(graphsaveloc+".pdf")
|
||||
with torch.enable_grad():
|
||||
optimizer.zero_grad()
|
||||
output = model(data)
|
||||
loss = loss_fn(output, ground)
|
||||
print(loss.item())
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
if modelsaveloc != None:
|
||||
torch.save(model, modelsaveloc)
|
||||
plt.show()
|
||||
return model
|
||||
|
||||
#train_sgd_minibatch: same as above, but with minibatches
|
||||
def train_sgd_minibatch(net, data, ground, dev=None, devg=None, epoch=100, batchsize=20, learnrate=1e-4, testevery=20, graphsaveloc=None, modelsaveloc=None, loss="mse"):
|
||||
model=net.to(device)
|
||||
data=data.to(device)
|
||||
ground=ground.to(device)
|
||||
if dev != None:
|
||||
dev=dev.to(device)
|
||||
losses=[]
|
||||
dev_losses=[]
|
||||
if loss.lower()=="mse":
|
||||
loss_fn = torch.nn.MSELoss()
|
||||
elif loss.lower()=="cross entropy":
|
||||
loss_fn = torch.nn.CrossEntropyLoss()
|
||||
elif loss.lower()=="nll":
|
||||
loss_fn = torch.nn.NLLLoss()
|
||||
elif loss.lower()=="poisson nll":
|
||||
loss_fn = torch.nn.PoissonNLLLoss()
|
||||
else:
|
||||
warnings.warn("Did not specify a valid loss function. Returning nothing.")
|
||||
return None
|
||||
optimizer=torch.optim.LBFGS(model.parameters(), lr=learnrate)
|
||||
itercount=0
|
||||
for i in range(epoch):
|
||||
print("EPOCH "+str(i)+" OF "+str(epoch-1))
|
||||
batches=math.ceil(data.size()[0].item()/batchsize)
|
||||
for j in range(batches):
|
||||
batchdata=[]
|
||||
batchground=[]
|
||||
for k in range(j*batchsize, min((j+1)*batchsize, data.size()[0].item()),1):
|
||||
batchdata.append(data[k])
|
||||
batchground.append(ground[k])
|
||||
batchdata=torch.stack(batchdata)
|
||||
batchground=torch.stack(batchground)
|
||||
if itercount%testevery==0:
|
||||
with torch.no_grad():
|
||||
output = model(data)
|
||||
ap = metrics.average_precision_score(ground.numpy(), output.numpy())
|
||||
losses.append(ap)
|
||||
print(str(i)+": "+str(ap))
|
||||
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses))
|
||||
if dev != None:
|
||||
output = model(dev)
|
||||
ap = metrics.average_precision_score(devg.numpy(), output.numpy())
|
||||
dev_losses.append(ap)
|
||||
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev AP")
|
||||
if graphsaveloc != None:
|
||||
plt.savefig(graphsaveloc+".pdf")
|
||||
with torch.enable_grad():
|
||||
optimizer.zero_grad()
|
||||
output = model(batchdata)
|
||||
loss = loss_fn(output, ground)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
itercount +=1
|
||||
if modelsaveloc != None:
|
||||
torch.save(model, modelsaveloc)
|
||||
plt.show()
|
||||
return model
|
||||
|
||||
def retyuoipufdyu():
|
||||
|
||||
data = torch.tensor(datasets.fetch_california_housing()['data']).to(torch.float)
|
||||
ground = datasets.fetch_california_housing()['target']
|
||||
ground = torch.tensor(ground).to(torch.float)
|
||||
model = linear_nn(8, 100, 1, 20, act_fn = "relu")
|
||||
print(model)
|
||||
return train_sgd_simple(model,"regression", data, ground, learnrate=1e-4, iters=1000)
|
||||
|
||||
start = time.time()
|
||||
retyuoipufdyu()
|
||||
end = time.time()
|
||||
print(end-start)
|
130
dep/2019/superscripts/visualization.py
Normal file
130
dep/2019/superscripts/visualization.py
Normal file
@@ -0,0 +1,130 @@
|
||||
#Titan Robotics Team 2022: Visualization Module
|
||||
#Written by Arthur Lu & Jacob Levine
|
||||
#Notes:
|
||||
# this should be imported as a python module using 'import visualization'
|
||||
# this should be included in the local directory or environment variable
|
||||
# this module has not been optimized for multhreaded computing
|
||||
#Number of easter eggs: Jake is Jewish and does not observe easter.
|
||||
#setup:
|
||||
|
||||
__version__ = "1.0.0.001"
|
||||
|
||||
#changelog should be viewed using print(analysis.__changelog__)
|
||||
__changelog__ = """changelog:
|
||||
1.0.0.xxx:
|
||||
-added basic plotting, clustering, and regression comparisons"""
|
||||
__author__ = (
|
||||
"Arthur Lu <arthurlu@ttic.edu>, "
|
||||
"Jacob Levine <jlevine@ttic.edu>,"
|
||||
)
|
||||
__all__ = [
|
||||
'affinity_prop',
|
||||
'bar_graph',
|
||||
'dbscan',
|
||||
'kmeans',
|
||||
'line_plot',
|
||||
'pca_comp',
|
||||
'regression_comp',
|
||||
'scatter_plot',
|
||||
'spectral',
|
||||
'vis_2d'
|
||||
]
|
||||
#imports
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from sklearn.decomposition import PCA, KernelPCA, IncrementalPCA
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.cluster import AffinityPropagation, DBSCAN, KMeans, SpectralClustering
|
||||
|
||||
#bar of x,y
|
||||
def bar_graph(x,y):
|
||||
x=np.asarray(x)
|
||||
y=np.asarray(y)
|
||||
plt.bar(x,y)
|
||||
plt.show()
|
||||
|
||||
#scatter of x,y
|
||||
def scatter_plot(x,y):
|
||||
x=np.asarray(x)
|
||||
y=np.asarray(y)
|
||||
plt.scatter(x,y)
|
||||
plt.show()
|
||||
|
||||
#line of x,y
|
||||
def line_plot(x,y):
|
||||
x=np.asarray(x)
|
||||
y=np.asarray(y)
|
||||
plt.scatter(x,y)
|
||||
plt.show()
|
||||
|
||||
#plot data + regression fit
|
||||
def regression_comp(x,y,reg):
|
||||
x=np.asarray(x)
|
||||
y=np.asarray(y)
|
||||
regx=np.arange(x.min(),x.max(),(x.max()-x.min())/1000)
|
||||
regy=[]
|
||||
for i in regx:
|
||||
regy.append(eval(reg[0].replace("z",str(i))))
|
||||
regy=np.asarray(regy)
|
||||
plt.scatter(x,y)
|
||||
plt.plot(regx,regy,color="orange",linewidth=3)
|
||||
plt.text(.85*max([x.max(),regx.max()]),.95*max([y.max(),regy.max()]),
|
||||
u"R\u00b2="+str(round(reg[2],5)),
|
||||
horizontalalignment='center', verticalalignment='center')
|
||||
plt.text(.85*max([x.max(),regx.max()]),.85*max([y.max(),regy.max()]),
|
||||
"MSE="+str(round(reg[1],5)),
|
||||
horizontalalignment='center', verticalalignment='center')
|
||||
plt.show()
|
||||
|
||||
#PCA to compress down to 2d
|
||||
def pca_comp(big_multidim):
|
||||
pca=PCA(n_components=2)
|
||||
td_norm=StandardScaler().fit_transform(big_multidim)
|
||||
td_pca=pca.fit_transform(td_norm)
|
||||
return td_pca
|
||||
|
||||
#one-stop visualization of multidim datasets
|
||||
def vis_2d(big_multidim):
|
||||
td_pca=pca_comp(big_multidim)
|
||||
plt.scatter(td_pca[:,0], td_pca[:,1])
|
||||
|
||||
def cluster_vis(data, cluster_assign):
|
||||
pca=PCA(n_components=2)
|
||||
td_norm=StandardScaler().fit_transform(data)
|
||||
td_pca=pca.fit_transform(td_norm)
|
||||
colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',
|
||||
'#f781bf', '#a65628', '#984ea3',
|
||||
'#999999', '#e41a1c', '#dede00']),
|
||||
int(max(clu) + 1))))
|
||||
colors = np.append(colors, ["#000000"])
|
||||
plt.figure(figsize=(8, 8))
|
||||
plt.scatter(td_norm[:, 0], td_norm[:, 1], s=10, color=colors[cluster_assign])
|
||||
plt.show()
|
||||
|
||||
#affinity prop- slow, but ok if you don't have any idea how many you want
|
||||
def affinity_prop(data, damping=.77, preference=-70):
|
||||
td_norm=StandardScaler().fit_transform(data)
|
||||
db = AffinityPropagation(damping=damping,preference=preference).fit(td)
|
||||
y=db.predict(td_norm)
|
||||
return y
|
||||
|
||||
#DBSCAN- slightly faster but can label your dataset as all outliers
|
||||
def dbscan(data, eps=.3):
|
||||
td_norm=StandardScaler().fit_transform(data)
|
||||
db = DBSCAN(eps=eps).fit(td)
|
||||
y=db.labels_.astype(np.int)
|
||||
return y
|
||||
|
||||
#K-means clustering- the classic
|
||||
def kmeans(data, num_clusters):
|
||||
td_norm=StandardScaler().fit_transform(data)
|
||||
db = KMeans(n_clusters=num_clusters).fit(td)
|
||||
y=db.labels_.astype(np.int)
|
||||
return y
|
||||
|
||||
#Spectral Clustering- Seems to work really well
|
||||
def spectral(data, num_clusters):
|
||||
td_norm=StandardScaler().fit_transform(data)
|
||||
db = SpectralClustering(n_clusters=num_clusters).fit(td)
|
||||
y=db.labels_.astype(np.int)
|
||||
return y
|
Reference in New Issue
Block a user