depreciated 2019 superscripts and company

This commit is contained in:
art 2019-10-29 09:23:00 -05:00
parent 886735d9c8
commit 03431fc5eb
7 changed files with 397 additions and 397 deletions

View File

@ -1,61 +1,61 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 20 12:21:31 2019
@author: creek
"""
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
import pprint
from pylatex import Document, Section, Subsection, Command
from pylatex.utils import italic, NoEscape
import requests
def generate_team_report(team):
doc = Document('basic')
matches = team.reference.collection(u'matches').get()
matchnums = []
for match in matches:
matchnums.append(match.id)
with doc.create(Section('Qualification matches scouted')):
for matchnum in matchnums:
doc.append(str(matchnum))
with doc.create(Section('Details')):
hab = "Hab 1"
balls = 42
hatches = 0
count = 0
for match in matches:
for analysis in match:
if analysis.key().startswith('Quant'):
balls = balls + analysis['cargoBalls']
hatches = hatches + analysis['hatchPanels']
count = count + 1
if analysis.key().startswith('Qual'):
strategy = analysis['StrategyType']
strongObject = analysis['TeleopStrongObject']
if count > 0:
doc.append("Average balls: " + str(float(balls)/count))
doc.append("Average hatches: " + str(float(hatches)/count))
doc.append("Strategy Type: " + str(strategy))
doc.append("Strongest object in teleop: " + str(strongObject))
doc.preamble.append(Command('title', team.id))
doc.preamble.append(Command('author', 'Generated by Team 2022'))
doc.preamble.append(Command('date', NoEscape(r'\today')))
doc.append(NoEscape(r'\maketitle'))
doc.generate_pdf(filepath= str(team.id), clean_tex=False)
credential = credentials.Certificate('keys/firebasekey.json')
firebase_admin.initialize_app(credential)
db = firestore.Client()
teams_ref = db.collection(u'data').document(u'team-2022').collection(u'Central 2019')
teams = teams_ref.get()
for team in teams:
generate_team_report(team)
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 20 12:21:31 2019
@author: creek
"""
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
import pprint
from pylatex import Document, Section, Subsection, Command
from pylatex.utils import italic, NoEscape
import requests
def generate_team_report(team):
doc = Document('basic')
matches = team.reference.collection(u'matches').get()
matchnums = []
for match in matches:
matchnums.append(match.id)
with doc.create(Section('Qualification matches scouted')):
for matchnum in matchnums:
doc.append(str(matchnum))
with doc.create(Section('Details')):
hab = "Hab 1"
balls = 42
hatches = 0
count = 0
for match in matches:
for analysis in match:
if analysis.key().startswith('Quant'):
balls = balls + analysis['cargoBalls']
hatches = hatches + analysis['hatchPanels']
count = count + 1
if analysis.key().startswith('Qual'):
strategy = analysis['StrategyType']
strongObject = analysis['TeleopStrongObject']
if count > 0:
doc.append("Average balls: " + str(float(balls)/count))
doc.append("Average hatches: " + str(float(hatches)/count))
doc.append("Strategy Type: " + str(strategy))
doc.append("Strongest object in teleop: " + str(strongObject))
doc.preamble.append(Command('title', team.id))
doc.preamble.append(Command('author', 'Generated by Team 2022'))
doc.preamble.append(Command('date', NoEscape(r'\today')))
doc.append(NoEscape(r'\maketitle'))
doc.generate_pdf(filepath= str(team.id), clean_tex=False)
credential = credentials.Certificate('keys/firebasekey.json')
firebase_admin.initialize_app(credential)
db = firestore.Client()
teams_ref = db.collection(u'data').document(u'team-2022').collection(u'Central 2019')
teams = teams_ref.get()
for team in teams:
generate_team_report(team)

View File

@ -1,206 +1,206 @@
#Titan Robotics Team 2022: ML Module
#Written by Arthur Lu & Jacob Levine
#Notes:
# this should be imported as a python module using 'import titanlearn'
# this should be included in the local directory or environment variable
# this module has not been optimized for multhreaded computing
# this module learns from its mistakes far faster than 2022's captains
#setup:
__version__ = "1.0.0.001"
#changelog should be viewed using print(analysis.__changelog__)
__changelog__ = """changelog:
1.0.0.xxx:
-added generation of ANNS, basic SGD training"""
__author__ = (
"Arthur Lu <arthurlu@ttic.edu>, "
"Jacob Levine <jlevine@ttic.edu>,"
)
__all__ = [
'linear_nn',
'train_sgd_minibatch',
'train_sgd_simple'
]
#imports
import torch
import warnings
from collections import OrderedDict
from sklearn import metrics, datasets
import numpy as np
import matplotlib.pyplot as plt
import math
import time
#enable CUDA if possible
device = torch.device("cpu")
#linear_nn: creates a fully connected network given params
def linear_nn(in_dim, hidden_dim, out_dim, num_hidden, act_fn="tanh", end="none"):
if act_fn.lower()=="tanh":
k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))])
for i in range(num_hidden):
k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "tanh"+str(i+1):torch.nn.Tanh()})
elif act_fn.lower()=="sigmoid":
k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))])
for i in range(num_hidden):
k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "sig"+str(i+1):torch.nn.Sigmoid()})
elif act_fn.lower()=="relu":
k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))])
for i in range(num_hidden):
k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "relu"+str(i+1):torch.nn.ReLU()})
elif act_fn.lower()=="leaky relu":
k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))])
for i in range(num_hidden):
k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "lre"+str(i+1):torch.nn.LeakyReLU()})
else:
warnings.warn("Did not specify a valid inner activation function. Returning nothing.")
return None
if end.lower()=="softmax":
k.update({"out": torch.nn.Linear(hidden_dim,out_dim), "softmax": torch.nn.Softmax()})
elif end.lower()=="none":
k.update({"out": torch.nn.Linear(hidden_dim,out_dim)})
elif end.lower()=="sigmoid":
k.update({"out": torch.nn.Linear(hidden_dim,out_dim), "sigmoid": torch.nn.Sigmoid()})
else:
warnings.warn("Did not specify a valid final activation function. Returning nothing.")
return None
return torch.nn.Sequential(k)
#train_sgd_simple: trains network using SGD
def train_sgd_simple(net, evalType, data, ground, dev=None, devg=None, iters=1000, learnrate=1e-4, testevery=1, graphsaveloc=None, modelsaveloc=None, loss="mse"):
model=net.to(device)
data=data.to(device)
ground=ground.to(device)
if dev != None:
dev=dev.to(device)
losses=[]
dev_losses=[]
if loss.lower()=="mse":
loss_fn = torch.nn.MSELoss()
elif loss.lower()=="cross entropy":
loss_fn = torch.nn.CrossEntropyLoss()
elif loss.lower()=="nll":
loss_fn = torch.nn.NLLLoss()
elif loss.lower()=="poisson nll":
loss_fn = torch.nn.PoissonNLLLoss()
else:
warnings.warn("Did not specify a valid loss function. Returning nothing.")
return None
optimizer=torch.optim.SGD(model.parameters(), lr=learnrate)
for i in range(iters):
if i%testevery==0:
with torch.no_grad():
output = model(data)
if evalType == "ap":
ap = metrics.average_precision_score(ground.cpu().numpy(), output.cpu().numpy())
if evalType == "regression":
ap = metrics.explained_variance_score(ground.cpu().numpy(), output.cpu().numpy())
losses.append(ap)
print(str(i)+": "+str(ap))
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="train AP")
if dev != None:
output = model(dev)
print(evalType)
if evalType == "ap":
ap = metrics.average_precision_score(devg.numpy(), output.numpy())
dev_losses.append(ap)
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev AP")
elif evalType == "regression":
ev = metrics.explained_variance_score(devg.numpy(), output.numpy())
dev_losses.append(ev)
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev EV")
if graphsaveloc != None:
plt.savefig(graphsaveloc+".pdf")
with torch.enable_grad():
optimizer.zero_grad()
output = model(data)
loss = loss_fn(output, ground)
print(loss.item())
loss.backward()
optimizer.step()
if modelsaveloc != None:
torch.save(model, modelsaveloc)
plt.show()
return model
#train_sgd_minibatch: same as above, but with minibatches
def train_sgd_minibatch(net, data, ground, dev=None, devg=None, epoch=100, batchsize=20, learnrate=1e-4, testevery=20, graphsaveloc=None, modelsaveloc=None, loss="mse"):
model=net.to(device)
data=data.to(device)
ground=ground.to(device)
if dev != None:
dev=dev.to(device)
losses=[]
dev_losses=[]
if loss.lower()=="mse":
loss_fn = torch.nn.MSELoss()
elif loss.lower()=="cross entropy":
loss_fn = torch.nn.CrossEntropyLoss()
elif loss.lower()=="nll":
loss_fn = torch.nn.NLLLoss()
elif loss.lower()=="poisson nll":
loss_fn = torch.nn.PoissonNLLLoss()
else:
warnings.warn("Did not specify a valid loss function. Returning nothing.")
return None
optimizer=torch.optim.LBFGS(model.parameters(), lr=learnrate)
itercount=0
for i in range(epoch):
print("EPOCH "+str(i)+" OF "+str(epoch-1))
batches=math.ceil(data.size()[0].item()/batchsize)
for j in range(batches):
batchdata=[]
batchground=[]
for k in range(j*batchsize, min((j+1)*batchsize, data.size()[0].item()),1):
batchdata.append(data[k])
batchground.append(ground[k])
batchdata=torch.stack(batchdata)
batchground=torch.stack(batchground)
if itercount%testevery==0:
with torch.no_grad():
output = model(data)
ap = metrics.average_precision_score(ground.numpy(), output.numpy())
losses.append(ap)
print(str(i)+": "+str(ap))
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses))
if dev != None:
output = model(dev)
ap = metrics.average_precision_score(devg.numpy(), output.numpy())
dev_losses.append(ap)
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev AP")
if graphsaveloc != None:
plt.savefig(graphsaveloc+".pdf")
with torch.enable_grad():
optimizer.zero_grad()
output = model(batchdata)
loss = loss_fn(output, ground)
loss.backward()
optimizer.step()
itercount +=1
if modelsaveloc != None:
torch.save(model, modelsaveloc)
plt.show()
return model
def retyuoipufdyu():
data = torch.tensor(datasets.fetch_california_housing()['data']).to(torch.float)
ground = datasets.fetch_california_housing()['target']
ground = torch.tensor(ground).to(torch.float)
model = linear_nn(8, 100, 1, 20, act_fn = "relu")
print(model)
return train_sgd_simple(model,"regression", data, ground, learnrate=1e-4, iters=1000)
start = time.time()
retyuoipufdyu()
end = time.time()
print(end-start)
#Titan Robotics Team 2022: ML Module
#Written by Arthur Lu & Jacob Levine
#Notes:
# this should be imported as a python module using 'import titanlearn'
# this should be included in the local directory or environment variable
# this module has not been optimized for multhreaded computing
# this module learns from its mistakes far faster than 2022's captains
#setup:
__version__ = "1.0.0.001"
#changelog should be viewed using print(analysis.__changelog__)
__changelog__ = """changelog:
1.0.0.xxx:
-added generation of ANNS, basic SGD training"""
__author__ = (
"Arthur Lu <arthurlu@ttic.edu>, "
"Jacob Levine <jlevine@ttic.edu>,"
)
__all__ = [
'linear_nn',
'train_sgd_minibatch',
'train_sgd_simple'
]
#imports
import torch
import warnings
from collections import OrderedDict
from sklearn import metrics, datasets
import numpy as np
import matplotlib.pyplot as plt
import math
import time
#enable CUDA if possible
device = torch.device("cpu")
#linear_nn: creates a fully connected network given params
def linear_nn(in_dim, hidden_dim, out_dim, num_hidden, act_fn="tanh", end="none"):
if act_fn.lower()=="tanh":
k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))])
for i in range(num_hidden):
k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "tanh"+str(i+1):torch.nn.Tanh()})
elif act_fn.lower()=="sigmoid":
k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))])
for i in range(num_hidden):
k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "sig"+str(i+1):torch.nn.Sigmoid()})
elif act_fn.lower()=="relu":
k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))])
for i in range(num_hidden):
k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "relu"+str(i+1):torch.nn.ReLU()})
elif act_fn.lower()=="leaky relu":
k=OrderedDict([("in", torch.nn.Linear(in_dim,hidden_dim))])
for i in range(num_hidden):
k.update({"lin"+str(i+1): torch.nn.Linear(hidden_dim,hidden_dim), "lre"+str(i+1):torch.nn.LeakyReLU()})
else:
warnings.warn("Did not specify a valid inner activation function. Returning nothing.")
return None
if end.lower()=="softmax":
k.update({"out": torch.nn.Linear(hidden_dim,out_dim), "softmax": torch.nn.Softmax()})
elif end.lower()=="none":
k.update({"out": torch.nn.Linear(hidden_dim,out_dim)})
elif end.lower()=="sigmoid":
k.update({"out": torch.nn.Linear(hidden_dim,out_dim), "sigmoid": torch.nn.Sigmoid()})
else:
warnings.warn("Did not specify a valid final activation function. Returning nothing.")
return None
return torch.nn.Sequential(k)
#train_sgd_simple: trains network using SGD
def train_sgd_simple(net, evalType, data, ground, dev=None, devg=None, iters=1000, learnrate=1e-4, testevery=1, graphsaveloc=None, modelsaveloc=None, loss="mse"):
model=net.to(device)
data=data.to(device)
ground=ground.to(device)
if dev != None:
dev=dev.to(device)
losses=[]
dev_losses=[]
if loss.lower()=="mse":
loss_fn = torch.nn.MSELoss()
elif loss.lower()=="cross entropy":
loss_fn = torch.nn.CrossEntropyLoss()
elif loss.lower()=="nll":
loss_fn = torch.nn.NLLLoss()
elif loss.lower()=="poisson nll":
loss_fn = torch.nn.PoissonNLLLoss()
else:
warnings.warn("Did not specify a valid loss function. Returning nothing.")
return None
optimizer=torch.optim.SGD(model.parameters(), lr=learnrate)
for i in range(iters):
if i%testevery==0:
with torch.no_grad():
output = model(data)
if evalType == "ap":
ap = metrics.average_precision_score(ground.cpu().numpy(), output.cpu().numpy())
if evalType == "regression":
ap = metrics.explained_variance_score(ground.cpu().numpy(), output.cpu().numpy())
losses.append(ap)
print(str(i)+": "+str(ap))
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="train AP")
if dev != None:
output = model(dev)
print(evalType)
if evalType == "ap":
ap = metrics.average_precision_score(devg.numpy(), output.numpy())
dev_losses.append(ap)
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev AP")
elif evalType == "regression":
ev = metrics.explained_variance_score(devg.numpy(), output.numpy())
dev_losses.append(ev)
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev EV")
if graphsaveloc != None:
plt.savefig(graphsaveloc+".pdf")
with torch.enable_grad():
optimizer.zero_grad()
output = model(data)
loss = loss_fn(output, ground)
print(loss.item())
loss.backward()
optimizer.step()
if modelsaveloc != None:
torch.save(model, modelsaveloc)
plt.show()
return model
#train_sgd_minibatch: same as above, but with minibatches
def train_sgd_minibatch(net, data, ground, dev=None, devg=None, epoch=100, batchsize=20, learnrate=1e-4, testevery=20, graphsaveloc=None, modelsaveloc=None, loss="mse"):
model=net.to(device)
data=data.to(device)
ground=ground.to(device)
if dev != None:
dev=dev.to(device)
losses=[]
dev_losses=[]
if loss.lower()=="mse":
loss_fn = torch.nn.MSELoss()
elif loss.lower()=="cross entropy":
loss_fn = torch.nn.CrossEntropyLoss()
elif loss.lower()=="nll":
loss_fn = torch.nn.NLLLoss()
elif loss.lower()=="poisson nll":
loss_fn = torch.nn.PoissonNLLLoss()
else:
warnings.warn("Did not specify a valid loss function. Returning nothing.")
return None
optimizer=torch.optim.LBFGS(model.parameters(), lr=learnrate)
itercount=0
for i in range(epoch):
print("EPOCH "+str(i)+" OF "+str(epoch-1))
batches=math.ceil(data.size()[0].item()/batchsize)
for j in range(batches):
batchdata=[]
batchground=[]
for k in range(j*batchsize, min((j+1)*batchsize, data.size()[0].item()),1):
batchdata.append(data[k])
batchground.append(ground[k])
batchdata=torch.stack(batchdata)
batchground=torch.stack(batchground)
if itercount%testevery==0:
with torch.no_grad():
output = model(data)
ap = metrics.average_precision_score(ground.numpy(), output.numpy())
losses.append(ap)
print(str(i)+": "+str(ap))
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses))
if dev != None:
output = model(dev)
ap = metrics.average_precision_score(devg.numpy(), output.numpy())
dev_losses.append(ap)
plt.plot(np.array(range(0,i+1,testevery)),np.array(losses), label="dev AP")
if graphsaveloc != None:
plt.savefig(graphsaveloc+".pdf")
with torch.enable_grad():
optimizer.zero_grad()
output = model(batchdata)
loss = loss_fn(output, ground)
loss.backward()
optimizer.step()
itercount +=1
if modelsaveloc != None:
torch.save(model, modelsaveloc)
plt.show()
return model
def retyuoipufdyu():
data = torch.tensor(datasets.fetch_california_housing()['data']).to(torch.float)
ground = datasets.fetch_california_housing()['target']
ground = torch.tensor(ground).to(torch.float)
model = linear_nn(8, 100, 1, 20, act_fn = "relu")
print(model)
return train_sgd_simple(model,"regression", data, ground, learnrate=1e-4, iters=1000)
start = time.time()
retyuoipufdyu()
end = time.time()
print(end-start)

View File

@ -1,130 +1,130 @@
#Titan Robotics Team 2022: Visualization Module
#Written by Arthur Lu & Jacob Levine
#Notes:
# this should be imported as a python module using 'import visualization'
# this should be included in the local directory or environment variable
# this module has not been optimized for multhreaded computing
#Number of easter eggs: Jake is Jewish and does not observe easter.
#setup:
__version__ = "1.0.0.001"
#changelog should be viewed using print(analysis.__changelog__)
__changelog__ = """changelog:
1.0.0.xxx:
-added basic plotting, clustering, and regression comparisons"""
__author__ = (
"Arthur Lu <arthurlu@ttic.edu>, "
"Jacob Levine <jlevine@ttic.edu>,"
)
__all__ = [
'affinity_prop',
'bar_graph',
'dbscan',
'kmeans',
'line_plot',
'pca_comp',
'regression_comp',
'scatter_plot',
'spectral',
'vis_2d'
]
#imports
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA, KernelPCA, IncrementalPCA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import AffinityPropagation, DBSCAN, KMeans, SpectralClustering
#bar of x,y
def bar_graph(x,y):
x=np.asarray(x)
y=np.asarray(y)
plt.bar(x,y)
plt.show()
#scatter of x,y
def scatter_plot(x,y):
x=np.asarray(x)
y=np.asarray(y)
plt.scatter(x,y)
plt.show()
#line of x,y
def line_plot(x,y):
x=np.asarray(x)
y=np.asarray(y)
plt.scatter(x,y)
plt.show()
#plot data + regression fit
def regression_comp(x,y,reg):
x=np.asarray(x)
y=np.asarray(y)
regx=np.arange(x.min(),x.max(),(x.max()-x.min())/1000)
regy=[]
for i in regx:
regy.append(eval(reg[0].replace("z",str(i))))
regy=np.asarray(regy)
plt.scatter(x,y)
plt.plot(regx,regy,color="orange",linewidth=3)
plt.text(.85*max([x.max(),regx.max()]),.95*max([y.max(),regy.max()]),
u"R\u00b2="+str(round(reg[2],5)),
horizontalalignment='center', verticalalignment='center')
plt.text(.85*max([x.max(),regx.max()]),.85*max([y.max(),regy.max()]),
"MSE="+str(round(reg[1],5)),
horizontalalignment='center', verticalalignment='center')
plt.show()
#PCA to compress down to 2d
def pca_comp(big_multidim):
pca=PCA(n_components=2)
td_norm=StandardScaler().fit_transform(big_multidim)
td_pca=pca.fit_transform(td_norm)
return td_pca
#one-stop visualization of multidim datasets
def vis_2d(big_multidim):
td_pca=pca_comp(big_multidim)
plt.scatter(td_pca[:,0], td_pca[:,1])
def cluster_vis(data, cluster_assign):
pca=PCA(n_components=2)
td_norm=StandardScaler().fit_transform(data)
td_pca=pca.fit_transform(td_norm)
colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',
'#f781bf', '#a65628', '#984ea3',
'#999999', '#e41a1c', '#dede00']),
int(max(clu) + 1))))
colors = np.append(colors, ["#000000"])
plt.figure(figsize=(8, 8))
plt.scatter(td_norm[:, 0], td_norm[:, 1], s=10, color=colors[cluster_assign])
plt.show()
#affinity prop- slow, but ok if you don't have any idea how many you want
def affinity_prop(data, damping=.77, preference=-70):
td_norm=StandardScaler().fit_transform(data)
db = AffinityPropagation(damping=damping,preference=preference).fit(td)
y=db.predict(td_norm)
return y
#DBSCAN- slightly faster but can label your dataset as all outliers
def dbscan(data, eps=.3):
td_norm=StandardScaler().fit_transform(data)
db = DBSCAN(eps=eps).fit(td)
y=db.labels_.astype(np.int)
return y
#K-means clustering- the classic
def kmeans(data, num_clusters):
td_norm=StandardScaler().fit_transform(data)
db = KMeans(n_clusters=num_clusters).fit(td)
y=db.labels_.astype(np.int)
return y
#Spectral Clustering- Seems to work really well
def spectral(data, num_clusters):
td_norm=StandardScaler().fit_transform(data)
db = SpectralClustering(n_clusters=num_clusters).fit(td)
y=db.labels_.astype(np.int)
return y
#Titan Robotics Team 2022: Visualization Module
#Written by Arthur Lu & Jacob Levine
#Notes:
# this should be imported as a python module using 'import visualization'
# this should be included in the local directory or environment variable
# this module has not been optimized for multhreaded computing
#Number of easter eggs: Jake is Jewish and does not observe easter.
#setup:
__version__ = "1.0.0.001"
#changelog should be viewed using print(analysis.__changelog__)
__changelog__ = """changelog:
1.0.0.xxx:
-added basic plotting, clustering, and regression comparisons"""
__author__ = (
"Arthur Lu <arthurlu@ttic.edu>, "
"Jacob Levine <jlevine@ttic.edu>,"
)
__all__ = [
'affinity_prop',
'bar_graph',
'dbscan',
'kmeans',
'line_plot',
'pca_comp',
'regression_comp',
'scatter_plot',
'spectral',
'vis_2d'
]
#imports
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA, KernelPCA, IncrementalPCA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import AffinityPropagation, DBSCAN, KMeans, SpectralClustering
#bar of x,y
def bar_graph(x,y):
x=np.asarray(x)
y=np.asarray(y)
plt.bar(x,y)
plt.show()
#scatter of x,y
def scatter_plot(x,y):
x=np.asarray(x)
y=np.asarray(y)
plt.scatter(x,y)
plt.show()
#line of x,y
def line_plot(x,y):
x=np.asarray(x)
y=np.asarray(y)
plt.scatter(x,y)
plt.show()
#plot data + regression fit
def regression_comp(x,y,reg):
x=np.asarray(x)
y=np.asarray(y)
regx=np.arange(x.min(),x.max(),(x.max()-x.min())/1000)
regy=[]
for i in regx:
regy.append(eval(reg[0].replace("z",str(i))))
regy=np.asarray(regy)
plt.scatter(x,y)
plt.plot(regx,regy,color="orange",linewidth=3)
plt.text(.85*max([x.max(),regx.max()]),.95*max([y.max(),regy.max()]),
u"R\u00b2="+str(round(reg[2],5)),
horizontalalignment='center', verticalalignment='center')
plt.text(.85*max([x.max(),regx.max()]),.85*max([y.max(),regy.max()]),
"MSE="+str(round(reg[1],5)),
horizontalalignment='center', verticalalignment='center')
plt.show()
#PCA to compress down to 2d
def pca_comp(big_multidim):
pca=PCA(n_components=2)
td_norm=StandardScaler().fit_transform(big_multidim)
td_pca=pca.fit_transform(td_norm)
return td_pca
#one-stop visualization of multidim datasets
def vis_2d(big_multidim):
td_pca=pca_comp(big_multidim)
plt.scatter(td_pca[:,0], td_pca[:,1])
def cluster_vis(data, cluster_assign):
pca=PCA(n_components=2)
td_norm=StandardScaler().fit_transform(data)
td_pca=pca.fit_transform(td_norm)
colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',
'#f781bf', '#a65628', '#984ea3',
'#999999', '#e41a1c', '#dede00']),
int(max(clu) + 1))))
colors = np.append(colors, ["#000000"])
plt.figure(figsize=(8, 8))
plt.scatter(td_norm[:, 0], td_norm[:, 1], s=10, color=colors[cluster_assign])
plt.show()
#affinity prop- slow, but ok if you don't have any idea how many you want
def affinity_prop(data, damping=.77, preference=-70):
td_norm=StandardScaler().fit_transform(data)
db = AffinityPropagation(damping=damping,preference=preference).fit(td)
y=db.predict(td_norm)
return y
#DBSCAN- slightly faster but can label your dataset as all outliers
def dbscan(data, eps=.3):
td_norm=StandardScaler().fit_transform(data)
db = DBSCAN(eps=eps).fit(td)
y=db.labels_.astype(np.int)
return y
#K-means clustering- the classic
def kmeans(data, num_clusters):
td_norm=StandardScaler().fit_transform(data)
db = KMeans(n_clusters=num_clusters).fit(td)
y=db.labels_.astype(np.int)
return y
#Spectral Clustering- Seems to work really well
def spectral(data, num_clusters):
td_norm=StandardScaler().fit_transform(data)
db = SpectralClustering(n_clusters=num_clusters).fit(td)
y=db.labels_.astype(np.int)
return y