depreciated 2019 superscripts and company

This commit is contained in:
art 2019-10-29 09:23:00 -05:00
parent 886735d9c8
commit 03431fc5eb
7 changed files with 397 additions and 397 deletions

View File

@ -1,61 +1,61 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on Wed Mar 20 12:21:31 2019 Created on Wed Mar 20 12:21:31 2019
@author: creek @author: creek
""" """
import firebase_admin import firebase_admin
from firebase_admin import credentials from firebase_admin import credentials
from firebase_admin import firestore from firebase_admin import firestore
import pprint import pprint
from pylatex import Document, Section, Subsection, Command from pylatex import Document, Section, Subsection, Command
from pylatex.utils import italic, NoEscape from pylatex.utils import italic, NoEscape
import requests import requests
def generate_team_report(team): def generate_team_report(team):
doc = Document('basic') doc = Document('basic')
matches = team.reference.collection(u'matches').get() matches = team.reference.collection(u'matches').get()
matchnums = [] matchnums = []
for match in matches: for match in matches:
matchnums.append(match.id) matchnums.append(match.id)
with doc.create(Section('Qualification matches scouted')): with doc.create(Section('Qualification matches scouted')):
for matchnum in matchnums: for matchnum in matchnums:
doc.append(str(matchnum)) doc.append(str(matchnum))
with doc.create(Section('Details')): with doc.create(Section('Details')):
hab = "Hab 1" hab = "Hab 1"
balls = 42 balls = 42
hatches = 0 hatches = 0
count = 0 count = 0
for match in matches: for match in matches:
for analysis in match: for analysis in match:
if analysis.key().startswith('Quant'): if analysis.key().startswith('Quant'):
balls = balls + analysis['cargoBalls'] balls = balls + analysis['cargoBalls']
hatches = hatches + analysis['hatchPanels'] hatches = hatches + analysis['hatchPanels']
count = count + 1 count = count + 1
if analysis.key().startswith('Qual'): if analysis.key().startswith('Qual'):
strategy = analysis['StrategyType'] strategy = analysis['StrategyType']
strongObject = analysis['TeleopStrongObject'] strongObject = analysis['TeleopStrongObject']
if count > 0: if count > 0:
doc.append("Average balls: " + str(float(balls)/count)) doc.append("Average balls: " + str(float(balls)/count))
doc.append("Average hatches: " + str(float(hatches)/count)) doc.append("Average hatches: " + str(float(hatches)/count))
doc.append("Strategy Type: " + str(strategy)) doc.append("Strategy Type: " + str(strategy))
doc.append("Strongest object in teleop: " + str(strongObject)) doc.append("Strongest object in teleop: " + str(strongObject))
doc.preamble.append(Command('title', team.id)) doc.preamble.append(Command('title', team.id))
doc.preamble.append(Command('author', 'Generated by Team 2022')) doc.preamble.append(Command('author', 'Generated by Team 2022'))
doc.preamble.append(Command('date', NoEscape(r'\today'))) doc.preamble.append(Command('date', NoEscape(r'\today')))
doc.append(NoEscape(r'\maketitle')) doc.append(NoEscape(r'\maketitle'))
doc.generate_pdf(filepath= str(team.id), clean_tex=False) doc.generate_pdf(filepath= str(team.id), clean_tex=False)
credential = credentials.Certificate('keys/firebasekey.json') credential = credentials.Certificate('keys/firebasekey.json')
firebase_admin.initialize_app(credential) firebase_admin.initialize_app(credential)
db = firestore.Client() db = firestore.Client()
teams_ref = db.collection(u'data').document(u'team-2022').collection(u'Central 2019') teams_ref = db.collection(u'data').document(u'team-2022').collection(u'Central 2019')
teams = teams_ref.get() teams = teams_ref.get()
for team in teams: for team in teams:
generate_team_report(team) generate_team_report(team)

View File

@ -1,206 +1,206 @@
#Titan Robotics Team 2022: ML Module #Titan Robotics Team 2022: ML Module
#Written by Arthur Lu & Jacob Levine #Written by Arthur Lu & Jacob Levine
#Notes: #Notes:
# this should be imported as a python module using 'import titanlearn' # this should be imported as a python module using 'import titanlearn'
# this should be included in the local directory or environment variable # this should be included in the local directory or environment variable
# this module has not been optimized for multhreaded computing # this module has not been optimized for multhreaded computing
# this module learns from its mistakes far faster than 2022's captains # this module learns from its mistakes far faster than 2022's captains
#setup: #setup:
__version__ = "1.0.0.001" __version__ = "1.0.0.001"
#changelog should be viewed using print(analysis.__changelog__) #changelog should be viewed using print(analysis.__changelog__)
__changelog__ = """changelog: __changelog__ = """changelog:
1.0.0.xxx: 1.0.0.xxx:
-added generation of ANNS, basic SGD training""" -added generation of ANNS, basic SGD training"""
__author__ = ( __author__ = (
"Arthur Lu <arthurlu@ttic.edu>, " "Arthur Lu <arthurlu@ttic.edu>, "
"Jacob Levine <jlevine@ttic.edu>," "Jacob Levine <jlevine@ttic.edu>,"
) )
__all__ = [ __all__ = [
'linear_nn', 'linear_nn',
'train_sgd_minibatch', 'train_sgd_minibatch',
'train_sgd_simple' 'train_sgd_simple'
] ]
#imports #imports
import torch import torch
import warnings import warnings
from collections import OrderedDict from collections import OrderedDict
from sklearn import metrics, datasets from sklearn import metrics, datasets
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import math import math
import time import time
#enable CUDA if possible #enable CUDA if possible
device = torch.device("cpu") device = torch.device("cpu")
#linear_nn: creates a fully connected network given params #linear_nn: creates a fully connected network given params
def linear_nn(in_dim, hidden_dim, out_dim, num_hidden, act_fn="tanh", end="none"): def linear_nn(in_dim, hidden_dim, out_dim, num_hidden, act_fn="tanh", end="none"):
if act_fn.lower()=="tanh": if act_fn.lower()=="tanh":
k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))]) k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))])
for i in range(num_hidden): for i in range(num_hidden):
k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "tanh"+str(i+1):torch.nn.Tanh()}) k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "tanh"+str(i+1):torch.nn.Tanh()})
elif act_fn.lower()=="sigmoid": elif act_fn.lower()=="sigmoid":
k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))]) k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))])
for i in range(num_hidden): for i in range(num_hidden):
k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "sig"+str(i+1):torch.nn.Sigmoid()}) k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "sig"+str(i+1):torch.nn.Sigmoid()})
elif act_fn.lower()=="relu": elif act_fn.lower()=="relu":
k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))]) k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))])
for i in range(num_hidden): for i in range(num_hidden):
k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "relu"+str(i+1):torch.nn.ReLU()}) k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "relu"+str(i+1):torch.nn.ReLU()})
elif act_fn.lower()=="leaky relu": elif act_fn.lower()=="leaky relu":
k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))]) k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))])
for i in range(num_hidden): for i in range(num_hidden):
k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "lre"+str(i+1):torch.nn.LeakyReLU()}) k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "lre"+str(i+1):torch.nn.LeakyReLU()})
else: else:
warnings.warn("Did not specify a valid inner activation function. Returning nothing.") warnings.warn("Did not specify a valid inner activation function. Returning nothing.")
return None return None
if end.lower()=="softmax": if end.lower()=="softmax":
k.update({"out": torch.nn.Linear(hidden_dim,out_dim), "softmax": torch.nn.Softmax()}) k.update({"out": torch.nn.Linear(hidden_dim,out_dim), "softmax": torch.nn.Softmax()})
elif end.lower()=="none": elif end.lower()=="none":
k.update({"out": torch.nn.Linear(hidden_dim,out_dim)}) k.update({"out": torch.nn.Linear(hidden_dim,out_dim)})
elif end.lower()=="sigmoid": elif end.lower()=="sigmoid":
k.update({"out": torch.nn.Linear(hidden_dim,out_dim), "sigmoid": torch.nn.Sigmoid()}) k.update({"out": torch.nn.Linear(hidden_dim,out_dim), "sigmoid": torch.nn.Sigmoid()})
else: else:
warnings.warn("Did not specify a valid final activation function. Returning nothing.") warnings.warn("Did not specify a valid final activation function. Returning nothing.")
return None return None
return torch.nn.Sequential(k) return torch.nn.Sequential(k)
#train_sgd_simple: trains network using SGD #train_sgd_simple: trains network using SGD
def train_sgd_simple(net, evalType, data, ground, dev=None, devg=None, iters=1000, learnrate=1e-4, testevery=1, graphsaveloc=None, modelsaveloc=None, loss="mse"): def train_sgd_simple(net, evalType, data, ground, dev=None, devg=None, iters=1000, learnrate=1e-4, testevery=1, graphsaveloc=None, modelsaveloc=None, loss="mse"):
model=net.to(device) model=net.to(device)
data=data.to(device) data=data.to(device)
ground=ground.to(device) ground=ground.to(device)
if dev != None: if dev != None:
dev=dev.to(device) dev=dev.to(device)
losses=[] losses=[]
dev_losses=[] dev_losses=[]
if loss.lower()=="mse": if loss.lower()=="mse":
loss_fn = torch.nn.MSELoss() loss_fn = torch.nn.MSELoss()
elif loss.lower()=="cross entropy": elif loss.lower()=="cross entropy":
loss_fn = torch.nn.CrossEntropyLoss() loss_fn = torch.nn.CrossEntropyLoss()
elif loss.lower()=="nll": elif loss.lower()=="nll":
loss_fn = torch.nn.NLLLoss() loss_fn = torch.nn.NLLLoss()
elif loss.lower()=="poisson nll": elif loss.lower()=="poisson nll":
loss_fn = torch.nn.PoissonNLLLoss() loss_fn = torch.nn.PoissonNLLLoss()
else: else:
warnings.warn("Did not specify a valid loss function. Returning nothing.") warnings.warn("Did not specify a valid loss function. Returning nothing.")
return None return None
optimizer=torch.optim.SGD(model.parameters(), lr=learnrate) optimizer=torch.optim.SGD(model.parameters(), lr=learnrate)
for i in range(iters): for i in range(iters):
if i%testevery==0: if i%testevery==0:
with torch.no_grad(): with torch.no_grad():
output = model(data) output = model(data)
if evalType == "ap": if evalType == "ap":
ap = metrics.average_precision_score(ground.cpu().numpy(), output.cpu().numpy()) ap = metrics.average_precision_score(ground.cpu().numpy(), output.cpu().numpy())
if evalType == "regression": if evalType == "regression":
ap = metrics.explained_variance_score(ground.cpu().numpy(), output.cpu().numpy()) ap = metrics.explained_variance_score(ground.cpu().numpy(), output.cpu().numpy())
losses.append(ap) losses.append(ap)
print(str(i)+": "+str(ap)) print(str(i)+": "+str(ap))
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="train AP") plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="train AP")
if dev != None: if dev != None:
output = model(dev) output = model(dev)
print(evalType) print(evalType)
if evalType == "ap": if evalType == "ap":
ap = metrics.average_precision_score(devg.numpy(), output.numpy()) ap = metrics.average_precision_score(devg.numpy(), output.numpy())
dev_losses.append(ap) dev_losses.append(ap)
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev AP") plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev AP")
elif evalType == "regression": elif evalType == "regression":
ev = metrics.explained_variance_score(devg.numpy(), output.numpy()) ev = metrics.explained_variance_score(devg.numpy(), output.numpy())
dev_losses.append(ev) dev_losses.append(ev)
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev EV") plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev EV")
if graphsaveloc != None: if graphsaveloc != None:
plt.savefig(graphsaveloc+".pdf") plt.savefig(graphsaveloc+".pdf")
with torch.enable_grad(): with torch.enable_grad():
optimizer.zero_grad() optimizer.zero_grad()
output = model(data) output = model(data)
loss = loss_fn(output, ground) loss = loss_fn(output, ground)
print(loss.item()) print(loss.item())
loss.backward() loss.backward()
optimizer.step() optimizer.step()
if modelsaveloc != None: if modelsaveloc != None:
torch.save(model, modelsaveloc) torch.save(model, modelsaveloc)
plt.show() plt.show()
return model return model
#train_sgd_minibatch: same as above, but with minibatches #train_sgd_minibatch: same as above, but with minibatches
def train_sgd_minibatch(net, data, ground, dev=None, devg=None, epoch=100, batchsize=20, learnrate=1e-4, testevery=20, graphsaveloc=None, modelsaveloc=None, loss="mse"): def train_sgd_minibatch(net, data, ground, dev=None, devg=None, epoch=100, batchsize=20, learnrate=1e-4, testevery=20, graphsaveloc=None, modelsaveloc=None, loss="mse"):
model=net.to(device) model=net.to(device)
data=data.to(device) data=data.to(device)
ground=ground.to(device) ground=ground.to(device)
if dev != None: if dev != None:
dev=dev.to(device) dev=dev.to(device)
losses=[] losses=[]
dev_losses=[] dev_losses=[]
if loss.lower()=="mse": if loss.lower()=="mse":
loss_fn = torch.nn.MSELoss() loss_fn = torch.nn.MSELoss()
elif loss.lower()=="cross entropy": elif loss.lower()=="cross entropy":
loss_fn = torch.nn.CrossEntropyLoss() loss_fn = torch.nn.CrossEntropyLoss()
elif loss.lower()=="nll": elif loss.lower()=="nll":
loss_fn = torch.nn.NLLLoss() loss_fn = torch.nn.NLLLoss()
elif loss.lower()=="poisson nll": elif loss.lower()=="poisson nll":
loss_fn = torch.nn.PoissonNLLLoss() loss_fn = torch.nn.PoissonNLLLoss()
else: else:
warnings.warn("Did not specify a valid loss function. Returning nothing.") warnings.warn("Did not specify a valid loss function. Returning nothing.")
return None return None
optimizer=torch.optim.LBFGS(model.parameters(), lr=learnrate) optimizer=torch.optim.LBFGS(model.parameters(), lr=learnrate)
itercount=0 itercount=0
for i in range(epoch): for i in range(epoch):
print("EPOCH "+str(i)+" OF "+str(epoch-1)) print("EPOCH "+str(i)+" OF "+str(epoch-1))
batches=math.ceil(data.size()[0].item()/batchsize) batches=math.ceil(data.size()[0].item()/batchsize)
for j in range(batches): for j in range(batches):
batchdata=[] batchdata=[]
batchground=[] batchground=[]
for k in range(j*batchsize, min((j+1)*batchsize, data.size()[0].item()),1): for k in range(j*batchsize, min((j+1)*batchsize, data.size()[0].item()),1):
batchdata.append(data[k]) batchdata.append(data[k])
batchground.append(ground[k]) batchground.append(ground[k])
batchdata=torch.stack(batchdata) batchdata=torch.stack(batchdata)
batchground=torch.stack(batchground) batchground=torch.stack(batchground)
if itercount%testevery==0: if itercount%testevery==0:
with torch.no_grad(): with torch.no_grad():
output = model(data) output = model(data)
ap = metrics.average_precision_score(ground.numpy(), output.numpy()) ap = metrics.average_precision_score(ground.numpy(), output.numpy())
losses.append(ap) losses.append(ap)
print(str(i)+": "+str(ap)) print(str(i)+": "+str(ap))
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses)) plt.plot(np.array(range(0,i+1,testevery)),np.array(losses))
if dev != None: if dev != None:
output = model(dev) output = model(dev)
ap = metrics.average_precision_score(devg.numpy(), output.numpy()) ap = metrics.average_precision_score(devg.numpy(), output.numpy())
dev_losses.append(ap) dev_losses.append(ap)
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev AP") plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev AP")
if graphsaveloc != None: if graphsaveloc != None:
plt.savefig(graphsaveloc+".pdf") plt.savefig(graphsaveloc+".pdf")
with torch.enable_grad(): with torch.enable_grad():
optimizer.zero_grad() optimizer.zero_grad()
output = model(batchdata) output = model(batchdata)
loss = loss_fn(output, ground) loss = loss_fn(output, ground)
loss.backward() loss.backward()
optimizer.step() optimizer.step()
itercount +=1 itercount +=1
if modelsaveloc != None: if modelsaveloc != None:
torch.save(model, modelsaveloc) torch.save(model, modelsaveloc)
plt.show() plt.show()
return model return model
def retyuoipufdyu(): def retyuoipufdyu():
data = torch.tensor(datasets.fetch_california_housing()['data']).to(torch.float) data = torch.tensor(datasets.fetch_california_housing()['data']).to(torch.float)
ground = datasets.fetch_california_housing()['target'] ground = datasets.fetch_california_housing()['target']
ground = torch.tensor(ground).to(torch.float) ground = torch.tensor(ground).to(torch.float)
model = linear_nn(8, 100, 1, 20, act_fn = "relu") model = linear_nn(8, 100, 1, 20, act_fn = "relu")
print(model) print(model)
return train_sgd_simple(model,"regression", data, ground, learnrate=1e-4, iters=1000) return train_sgd_simple(model,"regression", data, ground, learnrate=1e-4, iters=1000)
start = time.time() start = time.time()
retyuoipufdyu() retyuoipufdyu()
end = time.time() end = time.time()
print(end-start) print(end-start)

View File

@ -1,130 +1,130 @@
#Titan Robotics Team 2022: Visualization Module #Titan Robotics Team 2022: Visualization Module
#Written by Arthur Lu & Jacob Levine #Written by Arthur Lu & Jacob Levine
#Notes: #Notes:
# this should be imported as a python module using 'import visualization' # this should be imported as a python module using 'import visualization'
# this should be included in the local directory or environment variable # this should be included in the local directory or environment variable
# this module has not been optimized for multhreaded computing # this module has not been optimized for multhreaded computing
#Number of easter eggs: Jake is Jewish and does not observe easter. #Number of easter eggs: Jake is Jewish and does not observe easter.
#setup: #setup:
__version__ = "1.0.0.001" __version__ = "1.0.0.001"
#changelog should be viewed using print(analysis.__changelog__) #changelog should be viewed using print(analysis.__changelog__)
__changelog__ = """changelog: __changelog__ = """changelog:
1.0.0.xxx: 1.0.0.xxx:
-added basic plotting, clustering, and regression comparisons""" -added basic plotting, clustering, and regression comparisons"""
__author__ = ( __author__ = (
"Arthur Lu <arthurlu@ttic.edu>, " "Arthur Lu <arthurlu@ttic.edu>, "
"Jacob Levine <jlevine@ttic.edu>," "Jacob Levine <jlevine@ttic.edu>,"
) )
__all__ = [ __all__ = [
'affinity_prop', 'affinity_prop',
'bar_graph', 'bar_graph',
'dbscan', 'dbscan',
'kmeans', 'kmeans',
'line_plot', 'line_plot',
'pca_comp', 'pca_comp',
'regression_comp', 'regression_comp',
'scatter_plot', 'scatter_plot',
'spectral', 'spectral',
'vis_2d' 'vis_2d'
] ]
#imports #imports
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
from sklearn.decomposition import PCA, KernelPCA, IncrementalPCA from sklearn.decomposition import PCA, KernelPCA, IncrementalPCA
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
from sklearn.cluster import AffinityPropagation, DBSCAN, KMeans, SpectralClustering from sklearn.cluster import AffinityPropagation, DBSCAN, KMeans, SpectralClustering
#bar of x,y #bar of x,y
def bar_graph(x,y): def bar_graph(x,y):
x=np.asarray(x) x=np.asarray(x)
y=np.asarray(y) y=np.asarray(y)
plt.bar(x,y) plt.bar(x,y)
plt.show() plt.show()
#scatter of x,y #scatter of x,y
def scatter_plot(x,y): def scatter_plot(x,y):
x=np.asarray(x) x=np.asarray(x)
y=np.asarray(y) y=np.asarray(y)
plt.scatter(x,y) plt.scatter(x,y)
plt.show() plt.show()
#line of x,y #line of x,y
def line_plot(x,y): def line_plot(x,y):
x=np.asarray(x) x=np.asarray(x)
y=np.asarray(y) y=np.asarray(y)
plt.scatter(x,y) plt.scatter(x,y)
plt.show() plt.show()
#plot data + regression fit #plot data + regression fit
def regression_comp(x,y,reg): def regression_comp(x,y,reg):
x=np.asarray(x) x=np.asarray(x)
y=np.asarray(y) y=np.asarray(y)
regx=np.arange(x.min(),x.max(),(x.max()-x.min())/1000) regx=np.arange(x.min(),x.max(),(x.max()-x.min())/1000)
regy=[] regy=[]
for i in regx: for i in regx:
regy.append(eval(reg[0].replace("z",str(i)))) regy.append(eval(reg[0].replace("z",str(i))))
regy=np.asarray(regy) regy=np.asarray(regy)
plt.scatter(x,y) plt.scatter(x,y)
plt.plot(regx,regy,color="orange",linewidth=3) plt.plot(regx,regy,color="orange",linewidth=3)
plt.text(.85*max([x.max(),regx.max()]),.95*max([y.max(),regy.max()]), plt.text(.85*max([x.max(),regx.max()]),.95*max([y.max(),regy.max()]),
u"R\u00b2="+str(round(reg[2],5)), u"R\u00b2="+str(round(reg[2],5)),
horizontalalignment='center', verticalalignment='center') horizontalalignment='center', verticalalignment='center')
plt.text(.85*max([x.max(),regx.max()]),.85*max([y.max(),regy.max()]), plt.text(.85*max([x.max(),regx.max()]),.85*max([y.max(),regy.max()]),
"MSE="+str(round(reg[1],5)), "MSE="+str(round(reg[1],5)),
horizontalalignment='center', verticalalignment='center') horizontalalignment='center', verticalalignment='center')
plt.show() plt.show()
#PCA to compress down to 2d #PCA to compress down to 2d
def pca_comp(big_multidim): def pca_comp(big_multidim):
pca=PCA(n_components=2) pca=PCA(n_components=2)
td_norm=StandardScaler().fit_transform(big_multidim) td_norm=StandardScaler().fit_transform(big_multidim)
td_pca=pca.fit_transform(td_norm) td_pca=pca.fit_transform(td_norm)
return td_pca return td_pca
#one-stop visualization of multidim datasets #one-stop visualization of multidim datasets
def vis_2d(big_multidim): def vis_2d(big_multidim):
td_pca=pca_comp(big_multidim) td_pca=pca_comp(big_multidim)
plt.scatter(td_pca[:,0], td_pca[:,1]) plt.scatter(td_pca[:,0], td_pca[:,1])
def cluster_vis(data, cluster_assign): def cluster_vis(data, cluster_assign):
pca=PCA(n_components=2) pca=PCA(n_components=2)
td_norm=StandardScaler().fit_transform(data) td_norm=StandardScaler().fit_transform(data)
td_pca=pca.fit_transform(td_norm) td_pca=pca.fit_transform(td_norm)
colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a', colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',
'#f781bf', '#a65628', '#984ea3', '#f781bf', '#a65628', '#984ea3',
'#999999', '#e41a1c', '#dede00']), '#999999', '#e41a1c', '#dede00']),
int(max(clu) + 1)))) int(max(clu) + 1))))
colors = np.append(colors, ["#000000"]) colors = np.append(colors, ["#000000"])
plt.figure(figsize=(8, 8)) plt.figure(figsize=(8, 8))
plt.scatter(td_norm[:, 0], td_norm[:, 1], s=10, color=colors[cluster_assign]) plt.scatter(td_norm[:, 0], td_norm[:, 1], s=10, color=colors[cluster_assign])
plt.show() plt.show()
#affinity prop- slow, but ok if you don't have any idea how many you want #affinity prop- slow, but ok if you don't have any idea how many you want
def affinity_prop(data, damping=.77, preference=-70): def affinity_prop(data, damping=.77, preference=-70):
td_norm=StandardScaler().fit_transform(data) td_norm=StandardScaler().fit_transform(data)
db = AffinityPropagation(damping=damping,preference=preference).fit(td) db = AffinityPropagation(damping=damping,preference=preference).fit(td)
y=db.predict(td_norm) y=db.predict(td_norm)
return y return y
#DBSCAN- slightly faster but can label your dataset as all outliers #DBSCAN- slightly faster but can label your dataset as all outliers
def dbscan(data, eps=.3): def dbscan(data, eps=.3):
td_norm=StandardScaler().fit_transform(data) td_norm=StandardScaler().fit_transform(data)
db = DBSCAN(eps=eps).fit(td) db = DBSCAN(eps=eps).fit(td)
y=db.labels_.astype(np.int) y=db.labels_.astype(np.int)
return y return y
#K-means clustering- the classic #K-means clustering- the classic
def kmeans(data, num_clusters): def kmeans(data, num_clusters):
td_norm=StandardScaler().fit_transform(data) td_norm=StandardScaler().fit_transform(data)
db = KMeans(n_clusters=num_clusters).fit(td) db = KMeans(n_clusters=num_clusters).fit(td)
y=db.labels_.astype(np.int) y=db.labels_.astype(np.int)
return y return y
#Spectral Clustering- Seems to work really well #Spectral Clustering- Seems to work really well
def spectral(data, num_clusters): def spectral(data, num_clusters):
td_norm=StandardScaler().fit_transform(data) td_norm=StandardScaler().fit_transform(data)
db = SpectralClustering(n_clusters=num_clusters).fit(td) db = SpectralClustering(n_clusters=num_clusters).fit(td)
y=db.labels_.astype(np.int) y=db.labels_.astype(np.int)
return y return y