From 4923881829e023287d0f6d94da0f020fa0a3d26a Mon Sep 17 00:00:00 2001 From: Arthur Lu Date: Wed, 26 May 2021 07:41:32 +0000 Subject: [PATCH 1/3] Added Clustering.py moved kmeans from Analysis to Clustering --- analysis-master/tra_analysis/Analysis.py | 4 +-- analysis-master/tra_analysis/Clustering.py | 30 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 analysis-master/tra_analysis/Clustering.py diff --git a/analysis-master/tra_analysis/Analysis.py b/analysis-master/tra_analysis/Analysis.py index 6a8a63b3..df81951c 100644 --- a/analysis-master/tra_analysis/Analysis.py +++ b/analysis-master/tra_analysis/Analysis.py @@ -599,7 +599,7 @@ def npmin(data): def npmax(data): return np.amax(data) - +""" need to decide what to do with this function def kmeans(data, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0.0001, precompute_distances="auto", verbose=0, random_state=None, copy_x=True, n_jobs=None, algorithm="auto"): kernel = sklearn.cluster.KMeans(n_clusters = n_clusters, init = init, n_init = n_init, max_iter = max_iter, tol = tol, precompute_distances = precompute_distances, verbose = verbose, random_state = random_state, copy_x = copy_x, n_jobs = n_jobs, algorithm = algorithm) @@ -608,7 +608,7 @@ def kmeans(data, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0. centers = kernel.cluster_centers_ return centers, predictions - +""" def pca(data, n_components = None, copy = True, whiten = False, svd_solver = "auto", tol = 0.0, iterated_power = "auto", random_state = None): kernel = sklearn.decomposition.PCA(n_components = n_components, copy = copy, whiten = whiten, svd_solver = svd_solver, tol = tol, iterated_power = iterated_power, random_state = random_state) diff --git a/analysis-master/tra_analysis/Clustering.py b/analysis-master/tra_analysis/Clustering.py new file mode 100644 index 00000000..35988715 --- /dev/null +++ b/analysis-master/tra_analysis/Clustering.py @@ -0,0 +1,30 @@ +# Titan Robotics Team 2022: Clustering submodule +# Written by Arthur Lu +# Notes: +# this should be imported as a python module using 'from tra_analysis import Clustering' +# setup: + +__version__ = "1.0.0" + +# changelog should be viewed using print(analysis.__changelog__) +__changelog__ = """changelog: + 1.0.0: + - created this submodule + - copied kmeans clustering from Analysis +""" + +__author__ = ( + "Arthur Lu ", +) + +__all__ = [ +] + +def kmeans(data, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0.0001, precompute_distances="auto", verbose=0, random_state=None, copy_x=True, n_jobs=None, algorithm="auto"): + + kernel = sklearn.cluster.KMeans(n_clusters = n_clusters, init = init, n_init = n_init, max_iter = max_iter, tol = tol, precompute_distances = precompute_distances, verbose = verbose, random_state = random_state, copy_x = copy_x, n_jobs = n_jobs, algorithm = algorithm) + kernel.fit(data) + predictions = kernel.predict(data) + centers = kernel.cluster_centers_ + + return centers, predictions \ No newline at end of file From 3e99869d5dedd03dd330ce557a8bb98e3f52040b Mon Sep 17 00:00:00 2001 From: Arthur Lu Date: Thu, 15 Jul 2021 23:11:42 +0000 Subject: [PATCH 2/3] added dbscan and spectral to Clustering.py --- analysis-master/test_analysis.py | 13 +++++++++++- analysis-master/tra_analysis/Clustering.py | 24 ++++++++++++++++++++-- analysis-master/tra_analysis/__init__.py | 1 + 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/analysis-master/test_analysis.py b/analysis-master/test_analysis.py index ebba8da5..4bcb87b5 100644 --- a/analysis-master/test_analysis.py +++ b/analysis-master/test_analysis.py @@ -5,6 +5,7 @@ from sklearn import metrics from tra_analysis import Analysis as an from tra_analysis import Array from tra_analysis import ClassificationMetric +from tra_analysis import Clustering from tra_analysis import CorrelationTest from tra_analysis import Fit from tra_analysis import KNN @@ -230,4 +231,14 @@ def test_equation(): "-(sgn(cos(PI/4)))": -1, } for key in list(correctParse.keys()): - assert parser.eval(key) == correctParse[key] \ No newline at end of file + assert parser.eval(key) == correctParse[key] + +def test_clustering(): + + data = X = np.array([[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]]) + + assert Clustering.dbscan(data, eps=3, min_samples=2).tolist() == [0, 0, 0, 1, 1, -1] + + data = np.array([[1, 1], [2, 1], [1, 0], [4, 7], [3, 5], [3, 6]]) + + assert Clustering.spectral(data, n_clusters=2, assign_labels='discretize', random_state=0).tolist() == [1, 1, 1, 0, 0, 0] \ No newline at end of file diff --git a/analysis-master/tra_analysis/Clustering.py b/analysis-master/tra_analysis/Clustering.py index 35988715..e155d5f0 100644 --- a/analysis-master/tra_analysis/Clustering.py +++ b/analysis-master/tra_analysis/Clustering.py @@ -4,10 +4,13 @@ # this should be imported as a python module using 'from tra_analysis import Clustering' # setup: -__version__ = "1.0.0" +__version__ = "2.0.0" # changelog should be viewed using print(analysis.__changelog__) __changelog__ = """changelog: + 2.0.0: + - added dbscan clustering algo + - added spectral clustering algo 1.0.0: - created this submodule - copied kmeans clustering from Analysis @@ -18,8 +21,13 @@ __author__ = ( ) __all__ = [ + "kmeans", + "dbscan", + "spectral", ] +import sklearn + def kmeans(data, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0.0001, precompute_distances="auto", verbose=0, random_state=None, copy_x=True, n_jobs=None, algorithm="auto"): kernel = sklearn.cluster.KMeans(n_clusters = n_clusters, init = init, n_init = n_init, max_iter = max_iter, tol = tol, precompute_distances = precompute_distances, verbose = verbose, random_state = random_state, copy_x = copy_x, n_jobs = n_jobs, algorithm = algorithm) @@ -27,4 +35,16 @@ def kmeans(data, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0. predictions = kernel.predict(data) centers = kernel.cluster_centers_ - return centers, predictions \ No newline at end of file + return centers, predictions + +def dbscan(data, eps=0.5, min_samples=5, metric='euclidean', metric_params=None, algorithm='auto', leaf_size=30, p=None, n_jobs=None): + + model = sklearn.cluster.DBSCAN(eps = eps, min_samples = min_samples, metric = metric, metric_params = metric_params, algorithm = algorithm, leaf_size = leaf_size, p = p, n_jobs = n_jobs).fit(data) + + return model.labels_ + +def spectral(data, n_clusters=8, eigen_solver=None, n_components=None, random_state=None, n_init=10, gamma=1.0, affinity='rbf', n_neighbors=10, eigen_tol=0.0, assign_labels='kmeans', degree=3, coef0=1, kernel_params=None, n_jobs=None, verbose=False): + + model = sklearn.cluster.SpectralClustering(n_clusters = n_clusters, eigen_solver = eigen_solver, n_components = n_components, random_state = random_state, n_init = n_init, gamma = gamma, affinity = affinity, n_neighbors = n_neighbors, eigen_tol = eigen_tol, assign_labels = assign_labels, degree = degree, coef0 = coef0, kernel_params = kernel_params, n_jobs = n_jobs).fit(data) + + return model.labels_ \ No newline at end of file diff --git a/analysis-master/tra_analysis/__init__.py b/analysis-master/tra_analysis/__init__.py index a13507b8..512db270 100644 --- a/analysis-master/tra_analysis/__init__.py +++ b/analysis-master/tra_analysis/__init__.py @@ -59,6 +59,7 @@ __all__ = [ from . import Analysis as Analysis from .Array import Array from .ClassificationMetric import ClassificationMetric +from . import Clustering from . import CorrelationTest from .equation import Expression from . import Fit From 3606a072c47237aed5d3beeb116efc2e17f82b69 Mon Sep 17 00:00:00 2001 From: Arthur Lu Date: Mon, 26 Jul 2021 18:17:42 +0000 Subject: [PATCH 3/3] added normalization preprocessing to Clustering added unit tests for normalized clustering --- analysis-master/test_analysis.py | 6 +++++- analysis-master/tra_analysis/Clustering.py | 19 +++++++++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/analysis-master/test_analysis.py b/analysis-master/test_analysis.py index 4bcb87b5..1668c719 100644 --- a/analysis-master/test_analysis.py +++ b/analysis-master/test_analysis.py @@ -235,10 +235,14 @@ def test_equation(): def test_clustering(): + normalizer = sklearn.preprocessing.Normalizer() + data = X = np.array([[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]]) assert Clustering.dbscan(data, eps=3, min_samples=2).tolist() == [0, 0, 0, 1, 1, -1] + assert Clustering.dbscan(data, normalizer=normalizer, eps=3, min_samples=2).tolist() == [0, 0, 0, 0, 0, 0] data = np.array([[1, 1], [2, 1], [1, 0], [4, 7], [3, 5], [3, 6]]) - assert Clustering.spectral(data, n_clusters=2, assign_labels='discretize', random_state=0).tolist() == [1, 1, 1, 0, 0, 0] \ No newline at end of file + assert Clustering.spectral(data, n_clusters=2, assign_labels='discretize', random_state=0).tolist() == [1, 1, 1, 0, 0, 0] + assert Clustering.spectral(data, normalizer=normalizer, n_clusters=2, assign_labels='discretize', random_state=0).tolist() == [0, 1, 1, 0, 0, 0] \ No newline at end of file diff --git a/analysis-master/tra_analysis/Clustering.py b/analysis-master/tra_analysis/Clustering.py index e155d5f0..8b7cef6a 100644 --- a/analysis-master/tra_analysis/Clustering.py +++ b/analysis-master/tra_analysis/Clustering.py @@ -4,10 +4,12 @@ # this should be imported as a python module using 'from tra_analysis import Clustering' # setup: -__version__ = "2.0.0" +__version__ = "2.0.1" # changelog should be viewed using print(analysis.__changelog__) __changelog__ = """changelog: + 2.0.1: + - added normalization preprocessing to clustering, expects instance of sklearn.preprocessing.Normalizer() 2.0.0: - added dbscan clustering algo - added spectral clustering algo @@ -28,7 +30,10 @@ __all__ = [ import sklearn -def kmeans(data, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0.0001, precompute_distances="auto", verbose=0, random_state=None, copy_x=True, n_jobs=None, algorithm="auto"): +def kmeans(data, normalizer = None, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0.0001, precompute_distances="auto", verbose=0, random_state=None, copy_x=True, n_jobs=None, algorithm="auto"): + + if normalizer != None: + data = normalizer.transform(data) kernel = sklearn.cluster.KMeans(n_clusters = n_clusters, init = init, n_init = n_init, max_iter = max_iter, tol = tol, precompute_distances = precompute_distances, verbose = verbose, random_state = random_state, copy_x = copy_x, n_jobs = n_jobs, algorithm = algorithm) kernel.fit(data) @@ -37,13 +42,19 @@ def kmeans(data, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0. return centers, predictions -def dbscan(data, eps=0.5, min_samples=5, metric='euclidean', metric_params=None, algorithm='auto', leaf_size=30, p=None, n_jobs=None): +def dbscan(data, normalizer=None, eps=0.5, min_samples=5, metric='euclidean', metric_params=None, algorithm='auto', leaf_size=30, p=None, n_jobs=None): + + if normalizer != None: + data = normalizer.transform(data) model = sklearn.cluster.DBSCAN(eps = eps, min_samples = min_samples, metric = metric, metric_params = metric_params, algorithm = algorithm, leaf_size = leaf_size, p = p, n_jobs = n_jobs).fit(data) return model.labels_ -def spectral(data, n_clusters=8, eigen_solver=None, n_components=None, random_state=None, n_init=10, gamma=1.0, affinity='rbf', n_neighbors=10, eigen_tol=0.0, assign_labels='kmeans', degree=3, coef0=1, kernel_params=None, n_jobs=None, verbose=False): +def spectral(data, normalizer=None, n_clusters=8, eigen_solver=None, n_components=None, random_state=None, n_init=10, gamma=1.0, affinity='rbf', n_neighbors=10, eigen_tol=0.0, assign_labels='kmeans', degree=3, coef0=1, kernel_params=None, n_jobs=None, verbose=False): + + if normalizer != None: + data = normalizer.transform(data) model = sklearn.cluster.SpectralClustering(n_clusters = n_clusters, eigen_solver = eigen_solver, n_components = n_components, random_state = random_state, n_init = n_init, gamma = gamma, affinity = affinity, n_neighbors = n_neighbors, eigen_tol = eigen_tol, assign_labels = assign_labels, degree = degree, coef0 = coef0, kernel_params = kernel_params, n_jobs = n_jobs).fit(data)