diff --git a/data analysis/repack_json.py b/data analysis/dep/2019/repack_json.py similarity index 100% rename from data analysis/repack_json.py rename to data analysis/dep/2019/repack_json.py diff --git a/data analysis/scoutflex2019.py b/data analysis/dep/2019/scoutflex2019.py similarity index 97% rename from data analysis/scoutflex2019.py rename to data analysis/dep/2019/scoutflex2019.py index 2309e7b8..d30a554e 100644 --- a/data analysis/scoutflex2019.py +++ b/data analysis/dep/2019/scoutflex2019.py @@ -1,61 +1,61 @@ -# -*- coding: utf-8 -*- -""" -Created on Wed Mar 20 12:21:31 2019 - -@author: creek -""" -import firebase_admin -from firebase_admin import credentials -from firebase_admin import firestore -import pprint -from pylatex import Document, Section, Subsection, Command -from pylatex.utils import italic, NoEscape -import requests - -def generate_team_report(team): - doc = Document('basic') - matches = team.reference.collection(u'matches').get() - matchnums = [] - for match in matches: - matchnums.append(match.id) - with doc.create(Section('Qualification matches scouted')): - for matchnum in matchnums: - doc.append(str(matchnum)) - with doc.create(Section('Details')): - hab = "Hab 1" - balls = 42 - hatches = 0 - count = 0 - for match in matches: - for analysis in match: - if analysis.key().startswith('Quant'): - balls = balls + analysis['cargoBalls'] - hatches = hatches + analysis['hatchPanels'] - count = count + 1 - if analysis.key().startswith('Qual'): - strategy = analysis['StrategyType'] - strongObject = analysis['TeleopStrongObject'] - if count > 0: - doc.append("Average balls: " + str(float(balls)/count)) - doc.append("Average hatches: " + str(float(hatches)/count)) - doc.append("Strategy Type: " + str(strategy)) - doc.append("Strongest object in teleop: " + str(strongObject)) - - - doc.preamble.append(Command('title', team.id)) - doc.preamble.append(Command('author', 'Generated by Team 2022')) - doc.preamble.append(Command('date', NoEscape(r'\today'))) - doc.append(NoEscape(r'\maketitle')) - - doc.generate_pdf(filepath= str(team.id), clean_tex=False) - -credential = credentials.Certificate('keys/firebasekey.json') - -firebase_admin.initialize_app(credential) - -db = firestore.Client() -teams_ref = db.collection(u'data').document(u'team-2022').collection(u'Central 2019') -teams = teams_ref.get() - -for team in teams: - generate_team_report(team) +# -*- coding: utf-8 -*- +""" +Created on Wed Mar 20 12:21:31 2019 + +@author: creek +""" +import firebase_admin +from firebase_admin import credentials +from firebase_admin import firestore +import pprint +from pylatex import Document, Section, Subsection, Command +from pylatex.utils import italic, NoEscape +import requests + +def generate_team_report(team): + doc = Document('basic') + matches = team.reference.collection(u'matches').get() + matchnums = [] + for match in matches: + matchnums.append(match.id) + with doc.create(Section('Qualification matches scouted')): + for matchnum in matchnums: + doc.append(str(matchnum)) + with doc.create(Section('Details')): + hab = "Hab 1" + balls = 42 + hatches = 0 + count = 0 + for match in matches: + for analysis in match: + if analysis.key().startswith('Quant'): + balls = balls + analysis['cargoBalls'] + hatches = hatches + analysis['hatchPanels'] + count = count + 1 + if analysis.key().startswith('Qual'): + strategy = analysis['StrategyType'] + strongObject = analysis['TeleopStrongObject'] + if count > 0: + doc.append("Average balls: " + str(float(balls)/count)) + doc.append("Average hatches: " + str(float(hatches)/count)) + doc.append("Strategy Type: " + str(strategy)) + doc.append("Strongest object in teleop: " + str(strongObject)) + + + doc.preamble.append(Command('title', team.id)) + doc.preamble.append(Command('author', 'Generated by Team 2022')) + doc.preamble.append(Command('date', NoEscape(r'\today'))) + doc.append(NoEscape(r'\maketitle')) + + doc.generate_pdf(filepath= str(team.id), clean_tex=False) + +credential = credentials.Certificate('keys/firebasekey.json') + +firebase_admin.initialize_app(credential) + +db = firestore.Client() +teams_ref = db.collection(u'data').document(u'team-2022').collection(u'Central 2019') +teams = teams_ref.get() + +for team in teams: + generate_team_report(team) diff --git a/data analysis/superscript.py b/data analysis/dep/2019/superscript.py similarity index 100% rename from data analysis/superscript.py rename to data analysis/dep/2019/superscript.py diff --git a/data analysis/superscript_nishant_only.py b/data analysis/dep/2019/superscript_nishant_only.py similarity index 100% rename from data analysis/superscript_nishant_only.py rename to data analysis/dep/2019/superscript_nishant_only.py diff --git a/data analysis/tbarequest.py b/data analysis/dep/2019/tbarequest.py similarity index 100% rename from data analysis/tbarequest.py rename to data analysis/dep/2019/tbarequest.py diff --git a/data analysis/titanlearn.py b/data analysis/dep/2019/titanlearn.py similarity index 97% rename from data analysis/titanlearn.py rename to data analysis/dep/2019/titanlearn.py index 59920869..a3730003 100644 --- a/data analysis/titanlearn.py +++ b/data analysis/dep/2019/titanlearn.py @@ -1,206 +1,206 @@ -#Titan Robotics Team 2022: ML Module -#Written by Arthur Lu & Jacob Levine -#Notes: -# this should be imported as a python module using 'import titanlearn' -# this should be included in the local directory or environment variable -# this module has not been optimized for multhreaded computing -# this module learns from its mistakes far faster than 2022's captains -#setup: - -__version__ = "1.0.0.001" - -#changelog should be viewed using print(analysis.__changelog__) -__changelog__ = """changelog: -1.0.0.xxx: - -added generation of ANNS, basic SGD training""" -__author__ = ( - "Arthur Lu , " - "Jacob Levine ," - ) -__all__ = [ - 'linear_nn', - 'train_sgd_minibatch', - 'train_sgd_simple' - ] -#imports -import torch -import warnings -from collections import OrderedDict -from sklearn import metrics, datasets -import numpy as np -import matplotlib.pyplot as plt -import math -import time - -#enable CUDA if possible -device = torch.device("cpu") - -#linear_nn: creates a fully connected network given params -def linear_nn(in_dim, hidden_dim, out_dim, num_hidden, act_fn="tanh", end="none"): - if act_fn.lower()=="tanh": - k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))]) - for i in range(num_hidden): - k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "tanh"+str(i+1):torch.nn.Tanh()}) - - elif act_fn.lower()=="sigmoid": - k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))]) - for i in range(num_hidden): - k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "sig"+str(i+1):torch.nn.Sigmoid()}) - - elif act_fn.lower()=="relu": - k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))]) - for i in range(num_hidden): - k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "relu"+str(i+1):torch.nn.ReLU()}) - - elif act_fn.lower()=="leaky relu": - k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))]) - for i in range(num_hidden): - k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "lre"+str(i+1):torch.nn.LeakyReLU()}) - else: - warnings.warn("Did not specify a valid inner activation function. Returning nothing.") - return None - - if end.lower()=="softmax": - k.update({"out": torch.nn.Linear(hidden_dim,out_dim), "softmax": torch.nn.Softmax()}) - elif end.lower()=="none": - k.update({"out": torch.nn.Linear(hidden_dim,out_dim)}) - elif end.lower()=="sigmoid": - k.update({"out": torch.nn.Linear(hidden_dim,out_dim), "sigmoid": torch.nn.Sigmoid()}) - else: - warnings.warn("Did not specify a valid final activation function. Returning nothing.") - return None - - return torch.nn.Sequential(k) - -#train_sgd_simple: trains network using SGD -def train_sgd_simple(net, evalType, data, ground, dev=None, devg=None, iters=1000, learnrate=1e-4, testevery=1, graphsaveloc=None, modelsaveloc=None, loss="mse"): - model=net.to(device) - data=data.to(device) - ground=ground.to(device) - if dev != None: - dev=dev.to(device) - losses=[] - dev_losses=[] - if loss.lower()=="mse": - loss_fn = torch.nn.MSELoss() - elif loss.lower()=="cross entropy": - loss_fn = torch.nn.CrossEntropyLoss() - elif loss.lower()=="nll": - loss_fn = torch.nn.NLLLoss() - elif loss.lower()=="poisson nll": - loss_fn = torch.nn.PoissonNLLLoss() - else: - warnings.warn("Did not specify a valid loss function. Returning nothing.") - return None - optimizer=torch.optim.SGD(model.parameters(), lr=learnrate) - for i in range(iters): - if i%testevery==0: - with torch.no_grad(): - output = model(data) - if evalType == "ap": - ap = metrics.average_precision_score(ground.cpu().numpy(), output.cpu().numpy()) - if evalType == "regression": - ap = metrics.explained_variance_score(ground.cpu().numpy(), output.cpu().numpy()) - losses.append(ap) - print(str(i)+": "+str(ap)) - plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="train AP") - if dev != None: - output = model(dev) - print(evalType) - if evalType == "ap": - - ap = metrics.average_precision_score(devg.numpy(), output.numpy()) - dev_losses.append(ap) - plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev AP") - elif evalType == "regression": - ev = metrics.explained_variance_score(devg.numpy(), output.numpy()) - dev_losses.append(ev) - plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev EV") - - - if graphsaveloc != None: - plt.savefig(graphsaveloc+".pdf") - with torch.enable_grad(): - optimizer.zero_grad() - output = model(data) - loss = loss_fn(output, ground) - print(loss.item()) - loss.backward() - optimizer.step() - if modelsaveloc != None: - torch.save(model, modelsaveloc) - plt.show() - return model - -#train_sgd_minibatch: same as above, but with minibatches -def train_sgd_minibatch(net, data, ground, dev=None, devg=None, epoch=100, batchsize=20, learnrate=1e-4, testevery=20, graphsaveloc=None, modelsaveloc=None, loss="mse"): - model=net.to(device) - data=data.to(device) - ground=ground.to(device) - if dev != None: - dev=dev.to(device) - losses=[] - dev_losses=[] - if loss.lower()=="mse": - loss_fn = torch.nn.MSELoss() - elif loss.lower()=="cross entropy": - loss_fn = torch.nn.CrossEntropyLoss() - elif loss.lower()=="nll": - loss_fn = torch.nn.NLLLoss() - elif loss.lower()=="poisson nll": - loss_fn = torch.nn.PoissonNLLLoss() - else: - warnings.warn("Did not specify a valid loss function. Returning nothing.") - return None - optimizer=torch.optim.LBFGS(model.parameters(), lr=learnrate) - itercount=0 - for i in range(epoch): - print("EPOCH "+str(i)+" OF "+str(epoch-1)) - batches=math.ceil(data.size()[0].item()/batchsize) - for j in range(batches): - batchdata=[] - batchground=[] - for k in range(j*batchsize, min((j+1)*batchsize, data.size()[0].item()),1): - batchdata.append(data[k]) - batchground.append(ground[k]) - batchdata=torch.stack(batchdata) - batchground=torch.stack(batchground) - if itercount%testevery==0: - with torch.no_grad(): - output = model(data) - ap = metrics.average_precision_score(ground.numpy(), output.numpy()) - losses.append(ap) - print(str(i)+": "+str(ap)) - plt.plot(np.array(range(0,i+1,testevery)),np.array(losses)) - if dev != None: - output = model(dev) - ap = metrics.average_precision_score(devg.numpy(), output.numpy()) - dev_losses.append(ap) - plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev AP") - if graphsaveloc != None: - plt.savefig(graphsaveloc+".pdf") - with torch.enable_grad(): - optimizer.zero_grad() - output = model(batchdata) - loss = loss_fn(output, ground) - loss.backward() - optimizer.step() - itercount +=1 - if modelsaveloc != None: - torch.save(model, modelsaveloc) - plt.show() - return model - -def retyuoipufdyu(): - - data = torch.tensor(datasets.fetch_california_housing()['data']).to(torch.float) - ground = datasets.fetch_california_housing()['target'] - ground = torch.tensor(ground).to(torch.float) - model = linear_nn(8, 100, 1, 20, act_fn = "relu") - print(model) - return train_sgd_simple(model,"regression", data, ground, learnrate=1e-4, iters=1000) - -start = time.time() -retyuoipufdyu() -end = time.time() -print(end-start) +#Titan Robotics Team 2022: ML Module +#Written by Arthur Lu & Jacob Levine +#Notes: +# this should be imported as a python module using 'import titanlearn' +# this should be included in the local directory or environment variable +# this module has not been optimized for multhreaded computing +# this module learns from its mistakes far faster than 2022's captains +#setup: + +__version__ = "1.0.0.001" + +#changelog should be viewed using print(analysis.__changelog__) +__changelog__ = """changelog: +1.0.0.xxx: + -added generation of ANNS, basic SGD training""" +__author__ = ( + "Arthur Lu , " + "Jacob Levine ," + ) +__all__ = [ + 'linear_nn', + 'train_sgd_minibatch', + 'train_sgd_simple' + ] +#imports +import torch +import warnings +from collections import OrderedDict +from sklearn import metrics, datasets +import numpy as np +import matplotlib.pyplot as plt +import math +import time + +#enable CUDA if possible +device = torch.device("cpu") + +#linear_nn: creates a fully connected network given params +def linear_nn(in_dim, hidden_dim, out_dim, num_hidden, act_fn="tanh", end="none"): + if act_fn.lower()=="tanh": + k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))]) + for i in range(num_hidden): + k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "tanh"+str(i+1):torch.nn.Tanh()}) + + elif act_fn.lower()=="sigmoid": + k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))]) + for i in range(num_hidden): + k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "sig"+str(i+1):torch.nn.Sigmoid()}) + + elif act_fn.lower()=="relu": + k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))]) + for i in range(num_hidden): + k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "relu"+str(i+1):torch.nn.ReLU()}) + + elif act_fn.lower()=="leaky relu": + k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))]) + for i in range(num_hidden): + k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "lre"+str(i+1):torch.nn.LeakyReLU()}) + else: + warnings.warn("Did not specify a valid inner activation function. Returning nothing.") + return None + + if end.lower()=="softmax": + k.update({"out": torch.nn.Linear(hidden_dim,out_dim), "softmax": torch.nn.Softmax()}) + elif end.lower()=="none": + k.update({"out": torch.nn.Linear(hidden_dim,out_dim)}) + elif end.lower()=="sigmoid": + k.update({"out": torch.nn.Linear(hidden_dim,out_dim), "sigmoid": torch.nn.Sigmoid()}) + else: + warnings.warn("Did not specify a valid final activation function. Returning nothing.") + return None + + return torch.nn.Sequential(k) + +#train_sgd_simple: trains network using SGD +def train_sgd_simple(net, evalType, data, ground, dev=None, devg=None, iters=1000, learnrate=1e-4, testevery=1, graphsaveloc=None, modelsaveloc=None, loss="mse"): + model=net.to(device) + data=data.to(device) + ground=ground.to(device) + if dev != None: + dev=dev.to(device) + losses=[] + dev_losses=[] + if loss.lower()=="mse": + loss_fn = torch.nn.MSELoss() + elif loss.lower()=="cross entropy": + loss_fn = torch.nn.CrossEntropyLoss() + elif loss.lower()=="nll": + loss_fn = torch.nn.NLLLoss() + elif loss.lower()=="poisson nll": + loss_fn = torch.nn.PoissonNLLLoss() + else: + warnings.warn("Did not specify a valid loss function. Returning nothing.") + return None + optimizer=torch.optim.SGD(model.parameters(), lr=learnrate) + for i in range(iters): + if i%testevery==0: + with torch.no_grad(): + output = model(data) + if evalType == "ap": + ap = metrics.average_precision_score(ground.cpu().numpy(), output.cpu().numpy()) + if evalType == "regression": + ap = metrics.explained_variance_score(ground.cpu().numpy(), output.cpu().numpy()) + losses.append(ap) + print(str(i)+": "+str(ap)) + plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="train AP") + if dev != None: + output = model(dev) + print(evalType) + if evalType == "ap": + + ap = metrics.average_precision_score(devg.numpy(), output.numpy()) + dev_losses.append(ap) + plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev AP") + elif evalType == "regression": + ev = metrics.explained_variance_score(devg.numpy(), output.numpy()) + dev_losses.append(ev) + plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev EV") + + + if graphsaveloc != None: + plt.savefig(graphsaveloc+".pdf") + with torch.enable_grad(): + optimizer.zero_grad() + output = model(data) + loss = loss_fn(output, ground) + print(loss.item()) + loss.backward() + optimizer.step() + if modelsaveloc != None: + torch.save(model, modelsaveloc) + plt.show() + return model + +#train_sgd_minibatch: same as above, but with minibatches +def train_sgd_minibatch(net, data, ground, dev=None, devg=None, epoch=100, batchsize=20, learnrate=1e-4, testevery=20, graphsaveloc=None, modelsaveloc=None, loss="mse"): + model=net.to(device) + data=data.to(device) + ground=ground.to(device) + if dev != None: + dev=dev.to(device) + losses=[] + dev_losses=[] + if loss.lower()=="mse": + loss_fn = torch.nn.MSELoss() + elif loss.lower()=="cross entropy": + loss_fn = torch.nn.CrossEntropyLoss() + elif loss.lower()=="nll": + loss_fn = torch.nn.NLLLoss() + elif loss.lower()=="poisson nll": + loss_fn = torch.nn.PoissonNLLLoss() + else: + warnings.warn("Did not specify a valid loss function. Returning nothing.") + return None + optimizer=torch.optim.LBFGS(model.parameters(), lr=learnrate) + itercount=0 + for i in range(epoch): + print("EPOCH "+str(i)+" OF "+str(epoch-1)) + batches=math.ceil(data.size()[0].item()/batchsize) + for j in range(batches): + batchdata=[] + batchground=[] + for k in range(j*batchsize, min((j+1)*batchsize, data.size()[0].item()),1): + batchdata.append(data[k]) + batchground.append(ground[k]) + batchdata=torch.stack(batchdata) + batchground=torch.stack(batchground) + if itercount%testevery==0: + with torch.no_grad(): + output = model(data) + ap = metrics.average_precision_score(ground.numpy(), output.numpy()) + losses.append(ap) + print(str(i)+": "+str(ap)) + plt.plot(np.array(range(0,i+1,testevery)),np.array(losses)) + if dev != None: + output = model(dev) + ap = metrics.average_precision_score(devg.numpy(), output.numpy()) + dev_losses.append(ap) + plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev AP") + if graphsaveloc != None: + plt.savefig(graphsaveloc+".pdf") + with torch.enable_grad(): + optimizer.zero_grad() + output = model(batchdata) + loss = loss_fn(output, ground) + loss.backward() + optimizer.step() + itercount +=1 + if modelsaveloc != None: + torch.save(model, modelsaveloc) + plt.show() + return model + +def retyuoipufdyu(): + + data = torch.tensor(datasets.fetch_california_housing()['data']).to(torch.float) + ground = datasets.fetch_california_housing()['target'] + ground = torch.tensor(ground).to(torch.float) + model = linear_nn(8, 100, 1, 20, act_fn = "relu") + print(model) + return train_sgd_simple(model,"regression", data, ground, learnrate=1e-4, iters=1000) + +start = time.time() +retyuoipufdyu() +end = time.time() +print(end-start) diff --git a/data analysis/visualization.py b/data analysis/dep/2019/visualization.py similarity index 96% rename from data analysis/visualization.py rename to data analysis/dep/2019/visualization.py index cffcde7e..21e86beb 100644 --- a/data analysis/visualization.py +++ b/data analysis/dep/2019/visualization.py @@ -1,130 +1,130 @@ -#Titan Robotics Team 2022: Visualization Module -#Written by Arthur Lu & Jacob Levine -#Notes: -# this should be imported as a python module using 'import visualization' -# this should be included in the local directory or environment variable -# this module has not been optimized for multhreaded computing -#Number of easter eggs: Jake is Jewish and does not observe easter. -#setup: - -__version__ = "1.0.0.001" - -#changelog should be viewed using print(analysis.__changelog__) -__changelog__ = """changelog: -1.0.0.xxx: - -added basic plotting, clustering, and regression comparisons""" -__author__ = ( - "Arthur Lu , " - "Jacob Levine ," - ) -__all__ = [ - 'affinity_prop', - 'bar_graph', - 'dbscan', - 'kmeans', - 'line_plot', - 'pca_comp', - 'regression_comp', - 'scatter_plot', - 'spectral', - 'vis_2d' - ] -#imports -import matplotlib.pyplot as plt -import numpy as np -from sklearn.decomposition import PCA, KernelPCA, IncrementalPCA -from sklearn.preprocessing import StandardScaler -from sklearn.cluster import AffinityPropagation, DBSCAN, KMeans, SpectralClustering - -#bar of x,y -def bar_graph(x,y): - x=np.asarray(x) - y=np.asarray(y) - plt.bar(x,y) - plt.show() - -#scatter of x,y -def scatter_plot(x,y): - x=np.asarray(x) - y=np.asarray(y) - plt.scatter(x,y) - plt.show() - -#line of x,y -def line_plot(x,y): - x=np.asarray(x) - y=np.asarray(y) - plt.scatter(x,y) - plt.show() - -#plot data + regression fit -def regression_comp(x,y,reg): - x=np.asarray(x) - y=np.asarray(y) - regx=np.arange(x.min(),x.max(),(x.max()-x.min())/1000) - regy=[] - for i in regx: - regy.append(eval(reg[0].replace("z",str(i)))) - regy=np.asarray(regy) - plt.scatter(x,y) - plt.plot(regx,regy,color="orange",linewidth=3) - plt.text(.85*max([x.max(),regx.max()]),.95*max([y.max(),regy.max()]), - u"R\u00b2="+str(round(reg[2],5)), - horizontalalignment='center', verticalalignment='center') - plt.text(.85*max([x.max(),regx.max()]),.85*max([y.max(),regy.max()]), - "MSE="+str(round(reg[1],5)), - horizontalalignment='center', verticalalignment='center') - plt.show() - -#PCA to compress down to 2d -def pca_comp(big_multidim): - pca=PCA(n_components=2) - td_norm=StandardScaler().fit_transform(big_multidim) - td_pca=pca.fit_transform(td_norm) - return td_pca - -#one-stop visualization of multidim datasets -def vis_2d(big_multidim): - td_pca=pca_comp(big_multidim) - plt.scatter(td_pca[:,0], td_pca[:,1]) - -def cluster_vis(data, cluster_assign): - pca=PCA(n_components=2) - td_norm=StandardScaler().fit_transform(data) - td_pca=pca.fit_transform(td_norm) - colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a', - '#f781bf', '#a65628', '#984ea3', - '#999999', '#e41a1c', '#dede00']), - int(max(clu) + 1)))) - colors = np.append(colors, ["#000000"]) - plt.figure(figsize=(8, 8)) - plt.scatter(td_norm[:, 0], td_norm[:, 1], s=10, color=colors[cluster_assign]) - plt.show() - -#affinity prop- slow, but ok if you don't have any idea how many you want -def affinity_prop(data, damping=.77, preference=-70): - td_norm=StandardScaler().fit_transform(data) - db = AffinityPropagation(damping=damping,preference=preference).fit(td) - y=db.predict(td_norm) - return y - -#DBSCAN- slightly faster but can label your dataset as all outliers -def dbscan(data, eps=.3): - td_norm=StandardScaler().fit_transform(data) - db = DBSCAN(eps=eps).fit(td) - y=db.labels_.astype(np.int) - return y - -#K-means clustering- the classic -def kmeans(data, num_clusters): - td_norm=StandardScaler().fit_transform(data) - db = KMeans(n_clusters=num_clusters).fit(td) - y=db.labels_.astype(np.int) - return y - -#Spectral Clustering- Seems to work really well -def spectral(data, num_clusters): - td_norm=StandardScaler().fit_transform(data) - db = SpectralClustering(n_clusters=num_clusters).fit(td) - y=db.labels_.astype(np.int) - return y +#Titan Robotics Team 2022: Visualization Module +#Written by Arthur Lu & Jacob Levine +#Notes: +# this should be imported as a python module using 'import visualization' +# this should be included in the local directory or environment variable +# this module has not been optimized for multhreaded computing +#Number of easter eggs: Jake is Jewish and does not observe easter. +#setup: + +__version__ = "1.0.0.001" + +#changelog should be viewed using print(analysis.__changelog__) +__changelog__ = """changelog: +1.0.0.xxx: + -added basic plotting, clustering, and regression comparisons""" +__author__ = ( + "Arthur Lu , " + "Jacob Levine ," + ) +__all__ = [ + 'affinity_prop', + 'bar_graph', + 'dbscan', + 'kmeans', + 'line_plot', + 'pca_comp', + 'regression_comp', + 'scatter_plot', + 'spectral', + 'vis_2d' + ] +#imports +import matplotlib.pyplot as plt +import numpy as np +from sklearn.decomposition import PCA, KernelPCA, IncrementalPCA +from sklearn.preprocessing import StandardScaler +from sklearn.cluster import AffinityPropagation, DBSCAN, KMeans, SpectralClustering + +#bar of x,y +def bar_graph(x,y): + x=np.asarray(x) + y=np.asarray(y) + plt.bar(x,y) + plt.show() + +#scatter of x,y +def scatter_plot(x,y): + x=np.asarray(x) + y=np.asarray(y) + plt.scatter(x,y) + plt.show() + +#line of x,y +def line_plot(x,y): + x=np.asarray(x) + y=np.asarray(y) + plt.scatter(x,y) + plt.show() + +#plot data + regression fit +def regression_comp(x,y,reg): + x=np.asarray(x) + y=np.asarray(y) + regx=np.arange(x.min(),x.max(),(x.max()-x.min())/1000) + regy=[] + for i in regx: + regy.append(eval(reg[0].replace("z",str(i)))) + regy=np.asarray(regy) + plt.scatter(x,y) + plt.plot(regx,regy,color="orange",linewidth=3) + plt.text(.85*max([x.max(),regx.max()]),.95*max([y.max(),regy.max()]), + u"R\u00b2="+str(round(reg[2],5)), + horizontalalignment='center', verticalalignment='center') + plt.text(.85*max([x.max(),regx.max()]),.85*max([y.max(),regy.max()]), + "MSE="+str(round(reg[1],5)), + horizontalalignment='center', verticalalignment='center') + plt.show() + +#PCA to compress down to 2d +def pca_comp(big_multidim): + pca=PCA(n_components=2) + td_norm=StandardScaler().fit_transform(big_multidim) + td_pca=pca.fit_transform(td_norm) + return td_pca + +#one-stop visualization of multidim datasets +def vis_2d(big_multidim): + td_pca=pca_comp(big_multidim) + plt.scatter(td_pca[:,0], td_pca[:,1]) + +def cluster_vis(data, cluster_assign): + pca=PCA(n_components=2) + td_norm=StandardScaler().fit_transform(data) + td_pca=pca.fit_transform(td_norm) + colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a', + '#f781bf', '#a65628', '#984ea3', + '#999999', '#e41a1c', '#dede00']), + int(max(clu) + 1)))) + colors = np.append(colors, ["#000000"]) + plt.figure(figsize=(8, 8)) + plt.scatter(td_norm[:, 0], td_norm[:, 1], s=10, color=colors[cluster_assign]) + plt.show() + +#affinity prop- slow, but ok if you don't have any idea how many you want +def affinity_prop(data, damping=.77, preference=-70): + td_norm=StandardScaler().fit_transform(data) + db = AffinityPropagation(damping=damping,preference=preference).fit(td) + y=db.predict(td_norm) + return y + +#DBSCAN- slightly faster but can label your dataset as all outliers +def dbscan(data, eps=.3): + td_norm=StandardScaler().fit_transform(data) + db = DBSCAN(eps=eps).fit(td) + y=db.labels_.astype(np.int) + return y + +#K-means clustering- the classic +def kmeans(data, num_clusters): + td_norm=StandardScaler().fit_transform(data) + db = KMeans(n_clusters=num_clusters).fit(td) + y=db.labels_.astype(np.int) + return y + +#Spectral Clustering- Seems to work really well +def spectral(data, num_clusters): + td_norm=StandardScaler().fit_transform(data) + db = SpectralClustering(n_clusters=num_clusters).fit(td) + y=db.labels_.astype(np.int) + return y