mirror of
https://github.com/titanscouting/tra-analysis.git
synced 2024-12-27 01:59:08 +00:00
Merge pull request #82 from titanscouting/improve-clustering
Added new clustering tools and reorganize existing ones
This commit is contained in:
commit
a48ef20ef2
@ -5,6 +5,7 @@ from sklearn import metrics
|
|||||||
from tra_analysis import Analysis as an
|
from tra_analysis import Analysis as an
|
||||||
from tra_analysis import Array
|
from tra_analysis import Array
|
||||||
from tra_analysis import ClassificationMetric
|
from tra_analysis import ClassificationMetric
|
||||||
|
from tra_analysis import Clustering
|
||||||
from tra_analysis import CorrelationTest
|
from tra_analysis import CorrelationTest
|
||||||
from tra_analysis import Fit
|
from tra_analysis import Fit
|
||||||
from tra_analysis import KNN
|
from tra_analysis import KNN
|
||||||
@ -231,3 +232,17 @@ def test_equation():
|
|||||||
}
|
}
|
||||||
for key in list(correctParse.keys()):
|
for key in list(correctParse.keys()):
|
||||||
assert parser.eval(key) == correctParse[key]
|
assert parser.eval(key) == correctParse[key]
|
||||||
|
|
||||||
|
def test_clustering():
|
||||||
|
|
||||||
|
normalizer = sklearn.preprocessing.Normalizer()
|
||||||
|
|
||||||
|
data = X = np.array([[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]])
|
||||||
|
|
||||||
|
assert Clustering.dbscan(data, eps=3, min_samples=2).tolist() == [0, 0, 0, 1, 1, -1]
|
||||||
|
assert Clustering.dbscan(data, normalizer=normalizer, eps=3, min_samples=2).tolist() == [0, 0, 0, 0, 0, 0]
|
||||||
|
|
||||||
|
data = np.array([[1, 1], [2, 1], [1, 0], [4, 7], [3, 5], [3, 6]])
|
||||||
|
|
||||||
|
assert Clustering.spectral(data, n_clusters=2, assign_labels='discretize', random_state=0).tolist() == [1, 1, 1, 0, 0, 0]
|
||||||
|
assert Clustering.spectral(data, normalizer=normalizer, n_clusters=2, assign_labels='discretize', random_state=0).tolist() == [0, 1, 1, 0, 0, 0]
|
@ -599,7 +599,7 @@ def npmin(data):
|
|||||||
def npmax(data):
|
def npmax(data):
|
||||||
|
|
||||||
return np.amax(data)
|
return np.amax(data)
|
||||||
|
""" need to decide what to do with this function
|
||||||
def kmeans(data, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0.0001, precompute_distances="auto", verbose=0, random_state=None, copy_x=True, n_jobs=None, algorithm="auto"):
|
def kmeans(data, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0.0001, precompute_distances="auto", verbose=0, random_state=None, copy_x=True, n_jobs=None, algorithm="auto"):
|
||||||
|
|
||||||
kernel = sklearn.cluster.KMeans(n_clusters = n_clusters, init = init, n_init = n_init, max_iter = max_iter, tol = tol, precompute_distances = precompute_distances, verbose = verbose, random_state = random_state, copy_x = copy_x, n_jobs = n_jobs, algorithm = algorithm)
|
kernel = sklearn.cluster.KMeans(n_clusters = n_clusters, init = init, n_init = n_init, max_iter = max_iter, tol = tol, precompute_distances = precompute_distances, verbose = verbose, random_state = random_state, copy_x = copy_x, n_jobs = n_jobs, algorithm = algorithm)
|
||||||
@ -608,7 +608,7 @@ def kmeans(data, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0.
|
|||||||
centers = kernel.cluster_centers_
|
centers = kernel.cluster_centers_
|
||||||
|
|
||||||
return centers, predictions
|
return centers, predictions
|
||||||
|
"""
|
||||||
def pca(data, n_components = None, copy = True, whiten = False, svd_solver = "auto", tol = 0.0, iterated_power = "auto", random_state = None):
|
def pca(data, n_components = None, copy = True, whiten = False, svd_solver = "auto", tol = 0.0, iterated_power = "auto", random_state = None):
|
||||||
|
|
||||||
kernel = sklearn.decomposition.PCA(n_components = n_components, copy = copy, whiten = whiten, svd_solver = svd_solver, tol = tol, iterated_power = iterated_power, random_state = random_state)
|
kernel = sklearn.decomposition.PCA(n_components = n_components, copy = copy, whiten = whiten, svd_solver = svd_solver, tol = tol, iterated_power = iterated_power, random_state = random_state)
|
||||||
|
61
analysis-master/tra_analysis/Clustering.py
Normal file
61
analysis-master/tra_analysis/Clustering.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
# Titan Robotics Team 2022: Clustering submodule
|
||||||
|
# Written by Arthur Lu
|
||||||
|
# Notes:
|
||||||
|
# this should be imported as a python module using 'from tra_analysis import Clustering'
|
||||||
|
# setup:
|
||||||
|
|
||||||
|
__version__ = "2.0.1"
|
||||||
|
|
||||||
|
# changelog should be viewed using print(analysis.__changelog__)
|
||||||
|
__changelog__ = """changelog:
|
||||||
|
2.0.1:
|
||||||
|
- added normalization preprocessing to clustering, expects instance of sklearn.preprocessing.Normalizer()
|
||||||
|
2.0.0:
|
||||||
|
- added dbscan clustering algo
|
||||||
|
- added spectral clustering algo
|
||||||
|
1.0.0:
|
||||||
|
- created this submodule
|
||||||
|
- copied kmeans clustering from Analysis
|
||||||
|
"""
|
||||||
|
|
||||||
|
__author__ = (
|
||||||
|
"Arthur Lu <learthurgo@gmail.com>",
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"kmeans",
|
||||||
|
"dbscan",
|
||||||
|
"spectral",
|
||||||
|
]
|
||||||
|
|
||||||
|
import sklearn
|
||||||
|
|
||||||
|
def kmeans(data, normalizer = None, n_clusters=8, init="k-means++", n_init=10, max_iter=300, tol=0.0001, precompute_distances="auto", verbose=0, random_state=None, copy_x=True, n_jobs=None, algorithm="auto"):
|
||||||
|
|
||||||
|
if normalizer != None:
|
||||||
|
data = normalizer.transform(data)
|
||||||
|
|
||||||
|
kernel = sklearn.cluster.KMeans(n_clusters = n_clusters, init = init, n_init = n_init, max_iter = max_iter, tol = tol, precompute_distances = precompute_distances, verbose = verbose, random_state = random_state, copy_x = copy_x, n_jobs = n_jobs, algorithm = algorithm)
|
||||||
|
kernel.fit(data)
|
||||||
|
predictions = kernel.predict(data)
|
||||||
|
centers = kernel.cluster_centers_
|
||||||
|
|
||||||
|
return centers, predictions
|
||||||
|
|
||||||
|
def dbscan(data, normalizer=None, eps=0.5, min_samples=5, metric='euclidean', metric_params=None, algorithm='auto', leaf_size=30, p=None, n_jobs=None):
|
||||||
|
|
||||||
|
if normalizer != None:
|
||||||
|
data = normalizer.transform(data)
|
||||||
|
|
||||||
|
model = sklearn.cluster.DBSCAN(eps = eps, min_samples = min_samples, metric = metric, metric_params = metric_params, algorithm = algorithm, leaf_size = leaf_size, p = p, n_jobs = n_jobs).fit(data)
|
||||||
|
|
||||||
|
return model.labels_
|
||||||
|
|
||||||
|
def spectral(data, normalizer=None, n_clusters=8, eigen_solver=None, n_components=None, random_state=None, n_init=10, gamma=1.0, affinity='rbf', n_neighbors=10, eigen_tol=0.0, assign_labels='kmeans', degree=3, coef0=1, kernel_params=None, n_jobs=None, verbose=False):
|
||||||
|
|
||||||
|
if normalizer != None:
|
||||||
|
data = normalizer.transform(data)
|
||||||
|
|
||||||
|
model = sklearn.cluster.SpectralClustering(n_clusters = n_clusters, eigen_solver = eigen_solver, n_components = n_components, random_state = random_state, n_init = n_init, gamma = gamma, affinity = affinity, n_neighbors = n_neighbors, eigen_tol = eigen_tol, assign_labels = assign_labels, degree = degree, coef0 = coef0, kernel_params = kernel_params, n_jobs = n_jobs).fit(data)
|
||||||
|
|
||||||
|
return model.labels_
|
@ -60,6 +60,7 @@ __all__ = [
|
|||||||
from . import Analysis as Analysis
|
from . import Analysis as Analysis
|
||||||
from .Array import Array
|
from .Array import Array
|
||||||
from .ClassificationMetric import ClassificationMetric
|
from .ClassificationMetric import ClassificationMetric
|
||||||
|
from . import Clustering
|
||||||
from . import CorrelationTest
|
from . import CorrelationTest
|
||||||
from .equation import Expression
|
from .equation import Expression
|
||||||
from . import Fit
|
from . import Fit
|
||||||
|
Loading…
Reference in New Issue
Block a user