From 4923881829e023287d0f6d94da0f020fa0a3d26a Mon Sep 17 00:00:00 2001
From: Arthur Lu <learthurgo@gmail.com>
Date: Wed, 26 May 2021 07:41:32 +0000
Subject: [PATCH 1/3] Added Clustering.py moved kmeans from Analysis to
 Clustering

---
 analysis-master/tra_analysis/Analysis.py   |  4 +--
 analysis-master/tra_analysis/Clustering.py | 30 ++++++++++++++++++++++
 2 files changed, 32 insertions(+), 2 deletions(-)
 create mode 100644 analysis-master/tra_analysis/Clustering.py

diff --git a/analysis-master/tra_analysis/Analysis.py b/analysis-master/tra_analysis/Analysis.py
index 6a8a63b3..df81951c 100644
--- a/analysis-master/tra_analysis/Analysis.py
+++ b/analysis-master/tra_analysis/Analysis.py
@@ -599,7 +599,7 @@ def npmin(data):
 def npmax(data):
 
 	return np.amax(data)
-
+""" need to decide what to do with this function
 def kmeans(data, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0.0001, precompute_distances="auto", verbose=0, random_state=None, copy_x=True, n_jobs=None, algorithm="auto"):
 
 	kernel = sklearn.cluster.KMeans(n_clusters = n_clusters, init = init, n_init = n_init, max_iter = max_iter, tol = tol, precompute_distances = precompute_distances, verbose = verbose, random_state = random_state, copy_x = copy_x, n_jobs = n_jobs, algorithm = algorithm)
@@ -608,7 +608,7 @@ def kmeans(data, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0.
 	centers = kernel.cluster_centers_
 
 	return centers, predictions
-
+"""
 def pca(data, n_components = None, copy = True, whiten = False, svd_solver = "auto", tol = 0.0, iterated_power = "auto", random_state = None):
 
 	kernel = sklearn.decomposition.PCA(n_components = n_components, copy = copy, whiten = whiten, svd_solver = svd_solver, tol = tol, iterated_power = iterated_power, random_state = random_state)
diff --git a/analysis-master/tra_analysis/Clustering.py b/analysis-master/tra_analysis/Clustering.py
new file mode 100644
index 00000000..35988715
--- /dev/null
+++ b/analysis-master/tra_analysis/Clustering.py
@@ -0,0 +1,30 @@
+# Titan Robotics Team 2022: Clustering submodule
+# Written by Arthur Lu
+# Notes:
+#    this should be imported as a python module using 'from tra_analysis import Clustering'
+# setup:
+
+__version__ = "1.0.0"
+
+# changelog should be viewed using print(analysis.__changelog__)
+__changelog__ = """changelog:
+    1.0.0:
+        - created this submodule
+        - copied kmeans clustering from Analysis
+"""
+
+__author__ = (
+	"Arthur Lu <learthurgo@gmail.com>",
+)
+
+__all__ = [
+]
+
+def kmeans(data, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0.0001, precompute_distances="auto", verbose=0, random_state=None, copy_x=True, n_jobs=None, algorithm="auto"):
+
+	kernel = sklearn.cluster.KMeans(n_clusters = n_clusters, init = init, n_init = n_init, max_iter = max_iter, tol = tol, precompute_distances = precompute_distances, verbose = verbose, random_state = random_state, copy_x = copy_x, n_jobs = n_jobs, algorithm = algorithm)
+	kernel.fit(data)
+	predictions = kernel.predict(data)
+	centers = kernel.cluster_centers_
+
+	return centers, predictions
\ No newline at end of file

From 3e99869d5dedd03dd330ce557a8bb98e3f52040b Mon Sep 17 00:00:00 2001
From: Arthur Lu <learthurgo@gmail.com>
Date: Thu, 15 Jul 2021 23:11:42 +0000
Subject: [PATCH 2/3] added dbscan and spectral to Clustering.py

---
 analysis-master/test_analysis.py           | 13 +++++++++++-
 analysis-master/tra_analysis/Clustering.py | 24 ++++++++++++++++++++--
 analysis-master/tra_analysis/__init__.py   |  1 +
 3 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/analysis-master/test_analysis.py b/analysis-master/test_analysis.py
index ebba8da5..4bcb87b5 100644
--- a/analysis-master/test_analysis.py
+++ b/analysis-master/test_analysis.py
@@ -5,6 +5,7 @@ from sklearn import metrics
 from tra_analysis import Analysis as an
 from tra_analysis import Array
 from tra_analysis import ClassificationMetric
+from tra_analysis import Clustering
 from tra_analysis import CorrelationTest
 from tra_analysis import Fit
 from tra_analysis import KNN
@@ -230,4 +231,14 @@ def test_equation():
 		"-(sgn(cos(PI/4)))": -1,
 	}
 	for key in list(correctParse.keys()):
-		assert parser.eval(key) == correctParse[key]
\ No newline at end of file
+		assert parser.eval(key) == correctParse[key]
+
+def test_clustering():
+
+	data = X = np.array([[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]])
+
+	assert Clustering.dbscan(data, eps=3, min_samples=2).tolist() == [0, 0, 0, 1, 1, -1]
+
+	data = np.array([[1, 1], [2, 1], [1, 0], [4, 7], [3, 5], [3, 6]])
+
+	assert Clustering.spectral(data, n_clusters=2, assign_labels='discretize', random_state=0).tolist() == [1, 1, 1, 0, 0, 0]
\ No newline at end of file
diff --git a/analysis-master/tra_analysis/Clustering.py b/analysis-master/tra_analysis/Clustering.py
index 35988715..e155d5f0 100644
--- a/analysis-master/tra_analysis/Clustering.py
+++ b/analysis-master/tra_analysis/Clustering.py
@@ -4,10 +4,13 @@
 #    this should be imported as a python module using 'from tra_analysis import Clustering'
 # setup:
 
-__version__ = "1.0.0"
+__version__ = "2.0.0"
 
 # changelog should be viewed using print(analysis.__changelog__)
 __changelog__ = """changelog:
+	2.0.0:
+		- added dbscan clustering algo
+		- added spectral clustering algo
     1.0.0:
         - created this submodule
         - copied kmeans clustering from Analysis
@@ -18,8 +21,13 @@ __author__ = (
 )
 
 __all__ = [
+	"kmeans",
+	"dbscan",
+	"spectral",
 ]
 
+import sklearn
+
 def kmeans(data, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0.0001, precompute_distances="auto", verbose=0, random_state=None, copy_x=True, n_jobs=None, algorithm="auto"):
 
 	kernel = sklearn.cluster.KMeans(n_clusters = n_clusters, init = init, n_init = n_init, max_iter = max_iter, tol = tol, precompute_distances = precompute_distances, verbose = verbose, random_state = random_state, copy_x = copy_x, n_jobs = n_jobs, algorithm = algorithm)
@@ -27,4 +35,16 @@ def kmeans(data, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0.
 	predictions = kernel.predict(data)
 	centers = kernel.cluster_centers_
 
-	return centers, predictions
\ No newline at end of file
+	return centers, predictions
+
+def dbscan(data, eps=0.5, min_samples=5, metric='euclidean', metric_params=None, algorithm='auto', leaf_size=30, p=None, n_jobs=None):
+
+	model = sklearn.cluster.DBSCAN(eps = eps, min_samples = min_samples, metric = metric, metric_params = metric_params, algorithm = algorithm, leaf_size = leaf_size, p = p, n_jobs = n_jobs).fit(data)
+
+	return model.labels_
+
+def spectral(data, n_clusters=8, eigen_solver=None, n_components=None, random_state=None, n_init=10, gamma=1.0, affinity='rbf', n_neighbors=10, eigen_tol=0.0, assign_labels='kmeans', degree=3, coef0=1, kernel_params=None, n_jobs=None, verbose=False):
+
+	model = sklearn.cluster.SpectralClustering(n_clusters = n_clusters, eigen_solver = eigen_solver, n_components = n_components, random_state = random_state, n_init = n_init, gamma = gamma, affinity = affinity, n_neighbors = n_neighbors, eigen_tol = eigen_tol, assign_labels = assign_labels, degree = degree, coef0 = coef0, kernel_params = kernel_params, n_jobs = n_jobs).fit(data)
+
+	return model.labels_
\ No newline at end of file
diff --git a/analysis-master/tra_analysis/__init__.py b/analysis-master/tra_analysis/__init__.py
index a13507b8..512db270 100644
--- a/analysis-master/tra_analysis/__init__.py
+++ b/analysis-master/tra_analysis/__init__.py
@@ -59,6 +59,7 @@ __all__ = [
 from . import Analysis as Analysis
 from .Array import Array
 from .ClassificationMetric import ClassificationMetric
+from . import Clustering
 from . import CorrelationTest
 from .equation import Expression
 from . import Fit

From 3606a072c47237aed5d3beeb116efc2e17f82b69 Mon Sep 17 00:00:00 2001
From: Arthur Lu <learthurgo@gmail.com>
Date: Mon, 26 Jul 2021 18:17:42 +0000
Subject: [PATCH 3/3] added normalization preprocessing to Clustering added
 unit tests for normalized clustering

---
 analysis-master/test_analysis.py           |  6 +++++-
 analysis-master/tra_analysis/Clustering.py | 19 +++++++++++++++----
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/analysis-master/test_analysis.py b/analysis-master/test_analysis.py
index 4bcb87b5..1668c719 100644
--- a/analysis-master/test_analysis.py
+++ b/analysis-master/test_analysis.py
@@ -235,10 +235,14 @@ def test_equation():
 
 def test_clustering():
 
+	normalizer = sklearn.preprocessing.Normalizer()
+
 	data = X = np.array([[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]])
 
 	assert Clustering.dbscan(data, eps=3, min_samples=2).tolist() == [0, 0, 0, 1, 1, -1]
+	assert Clustering.dbscan(data, normalizer=normalizer, eps=3, min_samples=2).tolist() == [0, 0, 0, 0, 0, 0]
 
 	data = np.array([[1, 1], [2, 1], [1, 0], [4, 7], [3, 5], [3, 6]])
 
-	assert Clustering.spectral(data, n_clusters=2, assign_labels='discretize', random_state=0).tolist() == [1, 1, 1, 0, 0, 0]
\ No newline at end of file
+	assert Clustering.spectral(data, n_clusters=2, assign_labels='discretize', random_state=0).tolist() == [1, 1, 1, 0, 0, 0]
+	assert Clustering.spectral(data, normalizer=normalizer, n_clusters=2, assign_labels='discretize', random_state=0).tolist() == [0, 1, 1, 0, 0, 0]
\ No newline at end of file
diff --git a/analysis-master/tra_analysis/Clustering.py b/analysis-master/tra_analysis/Clustering.py
index e155d5f0..8b7cef6a 100644
--- a/analysis-master/tra_analysis/Clustering.py
+++ b/analysis-master/tra_analysis/Clustering.py
@@ -4,10 +4,12 @@
 #    this should be imported as a python module using 'from tra_analysis import Clustering'
 # setup:
 
-__version__ = "2.0.0"
+__version__ = "2.0.1"
 
 # changelog should be viewed using print(analysis.__changelog__)
 __changelog__ = """changelog:
+	2.0.1:
+		- added normalization preprocessing to clustering, expects instance of sklearn.preprocessing.Normalizer()
 	2.0.0:
 		- added dbscan clustering algo
 		- added spectral clustering algo
@@ -28,7 +30,10 @@ __all__ = [
 
 import sklearn
 
-def kmeans(data, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0.0001, precompute_distances="auto", verbose=0, random_state=None, copy_x=True, n_jobs=None, algorithm="auto"):
+def kmeans(data, normalizer = None, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0.0001, precompute_distances="auto", verbose=0, random_state=None, copy_x=True, n_jobs=None, algorithm="auto"):
+
+	if  normalizer != None:
+		data = normalizer.transform(data)
 
 	kernel = sklearn.cluster.KMeans(n_clusters = n_clusters, init = init, n_init = n_init, max_iter = max_iter, tol = tol, precompute_distances = precompute_distances, verbose = verbose, random_state = random_state, copy_x = copy_x, n_jobs = n_jobs, algorithm = algorithm)
 	kernel.fit(data)
@@ -37,13 +42,19 @@ def kmeans(data, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0.
 
 	return centers, predictions
 
-def dbscan(data, eps=0.5, min_samples=5, metric='euclidean', metric_params=None, algorithm='auto', leaf_size=30, p=None, n_jobs=None):
+def dbscan(data, normalizer=None, eps=0.5, min_samples=5, metric='euclidean', metric_params=None, algorithm='auto', leaf_size=30, p=None, n_jobs=None):
+
+	if  normalizer != None:
+		data = normalizer.transform(data)
 
 	model = sklearn.cluster.DBSCAN(eps = eps, min_samples = min_samples, metric = metric, metric_params = metric_params, algorithm = algorithm, leaf_size = leaf_size, p = p, n_jobs = n_jobs).fit(data)
 
 	return model.labels_
 
-def spectral(data, n_clusters=8, eigen_solver=None, n_components=None, random_state=None, n_init=10, gamma=1.0, affinity='rbf', n_neighbors=10, eigen_tol=0.0, assign_labels='kmeans', degree=3, coef0=1, kernel_params=None, n_jobs=None, verbose=False):
+def spectral(data, normalizer=None, n_clusters=8, eigen_solver=None, n_components=None, random_state=None, n_init=10, gamma=1.0, affinity='rbf', n_neighbors=10, eigen_tol=0.0, assign_labels='kmeans', degree=3, coef0=1, kernel_params=None, n_jobs=None, verbose=False):
+
+	if  normalizer != None:
+		data = normalizer.transform(data)
 
 	model = sklearn.cluster.SpectralClustering(n_clusters = n_clusters, eigen_solver = eigen_solver, n_components = n_components, random_state = random_state, n_init = n_init, gamma = gamma, affinity = affinity, n_neighbors = n_neighbors, eigen_tol = eigen_tol, assign_labels = assign_labels, degree = degree, coef0 = coef0, kernel_params = kernel_params, n_jobs = n_jobs).fit(data)