20e04c8b53
CI / linux (map[runner:ubuntu-22.04 target:aarch64]) (push) Failing after 55s
CI / linux (map[runner:ubuntu-22.04 target:armv7]) (push) Failing after 4s
CI / linux (map[runner:ubuntu-22.04 target:ppc64le]) (push) Failing after 4s
CI / linux (map[runner:ubuntu-22.04 target:s390x]) (push) Failing after 4s
CI / linux (map[runner:ubuntu-22.04 target:x86]) (push) Failing after 5s
CI / musllinux (map[runner:ubuntu-22.04 target:aarch64]) (push) Failing after 6s
CI / musllinux (map[runner:ubuntu-22.04 target:armv7]) (push) Failing after 5s
CI / musllinux (map[runner:ubuntu-22.04 target:x86]) (push) Failing after 4s
CI / musllinux (map[runner:ubuntu-22.04 target:x86_64]) (push) Failing after 4s
CI / windows (map[runner:windows-latest target:x64]) (push) Has been cancelled
CI / windows (map[runner:windows-latest target:x86]) (push) Has been cancelled
CI / macos (map[runner:macos-13 target:x86_64]) (push) Has been cancelled
CI / macos (map[runner:macos-14 target:aarch64]) (push) Has been cancelled
CI / Release (push) Has been cancelled
CI / sdist (push) Has been cancelled
CI / linux (map[runner:ubuntu-22.04 target:x86_64]) (push) Failing after 5s
246 lines
9.2 KiB
Python
246 lines
9.2 KiB
Python
# This file is automatically generated by pyo3_stub_gen
|
|
# ruff: noqa: E501, F401, F403, F405
|
|
|
|
import builtins
|
|
import numpy
|
|
import numpy.typing
|
|
import typing
|
|
__all__ = [
|
|
"KMeans",
|
|
"KMeansAlgorithm",
|
|
"KMeansInit",
|
|
"silhouette",
|
|
]
|
|
|
|
@typing.final
|
|
class KMeans:
|
|
r"""
|
|
Compute kmeans clustering
|
|
this implementation is supposed to be faster than scipy or scikit-learn
|
|
when dealing with a lot of points
|
|
|
|
## Arguments
|
|
- **points**: Numpy array #points x dimensions
|
|
- **k**: Amount of clusters to search for
|
|
- **max_iter**: Limit the maximum amount of iterations (just pass a high number for infinite)
|
|
- **init**: initialization method
|
|
- **algorithm**: algorithm to use
|
|
"""
|
|
@property
|
|
def ndim(self) -> builtins.int:
|
|
r"""
|
|
number of dimensions
|
|
"""
|
|
@property
|
|
def k(self) -> builtins.int:
|
|
r"""
|
|
number of clusters
|
|
"""
|
|
@property
|
|
def distance_sum(self) -> builtins.float:
|
|
r"""
|
|
sum of all distances, cost measure
|
|
"""
|
|
@property
|
|
def centroids(self) -> numpy.typing.NDArray[numpy.float64]:
|
|
r"""
|
|
centroid coordinates
|
|
"""
|
|
@property
|
|
def centroid_frequency(self) -> builtins.list[builtins.int]:
|
|
r"""
|
|
centroid frequencies
|
|
"""
|
|
@property
|
|
def assignments(self) -> builtins.list[builtins.int]:
|
|
r"""
|
|
to which cluster each of the points is assigned
|
|
"""
|
|
@property
|
|
def centroid_distances(self) -> builtins.list[builtins.float]:
|
|
r"""
|
|
distances of all points to the center it's assigned to
|
|
"""
|
|
def __new__(cls, points: numpy.typing.ArrayLike, k: builtins.int, max_iter: builtins.int = 300, init: typing.Optional[KMeansInit] = None, algorithm: typing.Optional[KMeansAlgorithm] = None) -> KMeans: ...
|
|
@staticmethod
|
|
def init_plusplus() -> KMeansInit:
|
|
r"""
|
|
K-Means++ initialization method, as implemented in Matlab
|
|
|
|
## Description
|
|
This initialization method starts by selecting one sample as first centroid.
|
|
Proceeding from there, the method iteratively selects one new centroid (per iteration) by calculating
|
|
each sample's probability of "being a centroid". This probability is bigger, the farther away a sample
|
|
is from its centroid. Then, one sample is randomly selected, while taking their probability of being
|
|
the next centroid into account. This leads to a tendency of selecting centroids, that are far away from
|
|
their currently assigned cluster's centroid.
|
|
(see: https://uk.mathworks.com/help/stats/kmeans.html#bueq7aj-5 Section: More About)
|
|
"""
|
|
@staticmethod
|
|
def init_random_partition() -> KMeansInit:
|
|
r"""
|
|
Random-Parition initialization method
|
|
|
|
## Description
|
|
This initialization method randomly partitions the samples into k partitions, and then calculates these partion's means.
|
|
These means are then used as initial clusters.
|
|
"""
|
|
@staticmethod
|
|
def init_random_sample() -> KMeansInit:
|
|
r"""
|
|
Random sample initialization method (a.k.a. Forgy)
|
|
|
|
## Description
|
|
This initialization method randomly selects k centroids from the samples as initial centroids.
|
|
"""
|
|
@staticmethod
|
|
def init_precomputed(centroids: numpy.typing.ArrayLike) -> KMeansInit:
|
|
r"""
|
|
Precomputed centroids initialization method
|
|
|
|
## Description
|
|
This initialization method requires a precomputed list of k centroids to use as initial
|
|
centroids.
|
|
"""
|
|
@staticmethod
|
|
def algo_lloyd() -> KMeansAlgorithm:
|
|
r"""
|
|
Normal K-Means algorithm implementation. This is the same algorithm as implemented in Matlab (one-phase).
|
|
(see: https://uk.mathworks.com/help/stats/kmeans.html#bueq7aj-5 Section: More About)
|
|
"""
|
|
@staticmethod
|
|
def algo_mini_batch(batch_size: builtins.int) -> KMeansAlgorithm:
|
|
r"""
|
|
Mini-Batch k-Means implementation.
|
|
(see: https://dl.acm.org/citation.cfm?id=1772862)
|
|
|
|
## Arguments
|
|
- **batch_size**: Amount of samples to use per iteration (higher -> better approximation but slower)
|
|
"""
|
|
def predict(self, points: numpy.typing.ArrayLike) -> tuple[builtins.list[builtins.int], builtins.list[builtins.float]]:
|
|
r"""
|
|
find the closest cluster and the distance for each point
|
|
"""
|
|
def silhouette_simple(self, points: numpy.typing.ArrayLike, assignments: numpy.typing.ArrayLike = None) -> builtins.float:
|
|
r"""
|
|
calculate the mean simple (using centroids) silhouette score for a set of points,
|
|
assignments must be specified if they do not correspond to the assignments in the KMeans instance
|
|
"""
|
|
|
|
class KMeansAlgorithm:
|
|
r"""
|
|
Specify a kmeans algorithm using lloyd or mini_batch.
|
|
"""
|
|
@staticmethod
|
|
def lloyd() -> KMeansAlgorithm:
|
|
r"""
|
|
Normal K-Means algorithm implementation. This is the same algorithm as implemented in Matlab (one-phase).
|
|
(see: https://uk.mathworks.com/help/stats/kmeans.html#bueq7aj-5 Section: More About)
|
|
"""
|
|
@staticmethod
|
|
def mini_batch(batch_size: builtins.int) -> KMeansAlgorithm:
|
|
r"""
|
|
Mini-Batch k-Means implementation.
|
|
(see: https://dl.acm.org/citation.cfm?id=1772862)
|
|
|
|
## Arguments
|
|
- **batch_size**: Amount of samples to use per iteration (higher -> better approximation but slower)
|
|
"""
|
|
@typing.final
|
|
class Lloyd(KMeansAlgorithm):
|
|
__match_args__ = ()
|
|
def __new__(cls) -> KMeansAlgorithm.Lloyd: ...
|
|
def __len__(self) -> builtins.int: ...
|
|
def __getitem__(self, key: builtins.int) -> typing.Any: ...
|
|
|
|
@typing.final
|
|
class MiniBatch(KMeansAlgorithm):
|
|
__match_args__ = ("_0",)
|
|
@property
|
|
def _0(self) -> builtins.int: ...
|
|
def __new__(cls, _0: builtins.int) -> KMeansAlgorithm.MiniBatch: ...
|
|
def __len__(self) -> builtins.int: ...
|
|
def __getitem__(self, key: builtins.int) -> typing.Any: ...
|
|
|
|
|
|
class KMeansInit:
|
|
r"""
|
|
Specify an initialization method using plusplus, random_partition, random_sample or precomputed.
|
|
"""
|
|
@staticmethod
|
|
def plusplus() -> KMeansInit:
|
|
r"""
|
|
K-Means++ initialization method, as implemented in Matlab
|
|
|
|
## Description
|
|
This initialization method starts by selecting one sample as first centroid.
|
|
Proceeding from there, the method iteratively selects one new centroid (per iteration) by calculating
|
|
each sample's probability of "being a centroid". This probability is bigger, the farther away a sample
|
|
is from its centroid. Then, one sample is randomly selected, while taking their probability of being
|
|
the next centroid into account. This leads to a tendency of selecting centroids, that are far away from
|
|
their currently assigned cluster's centroid.
|
|
(see: https://uk.mathworks.com/help/stats/kmeans.html#bueq7aj-5 Section: More About)
|
|
"""
|
|
@staticmethod
|
|
def random_partition() -> KMeansInit:
|
|
r"""
|
|
Random-Partition initialization method
|
|
|
|
## Description
|
|
This initialization method randomly partitions the samples into k partitions, and then calculates these partion's means.
|
|
These means are then used as initial clusters.
|
|
"""
|
|
@staticmethod
|
|
def random_sample() -> KMeansInit:
|
|
r"""
|
|
Random sample initialization method (a.k.a. Forgy)
|
|
|
|
## Description
|
|
This initialization method randomly selects k centroids from the samples as initial centroids.
|
|
"""
|
|
@staticmethod
|
|
def precomputed(centroids: numpy.typing.ArrayLike) -> KMeansInit:
|
|
r"""
|
|
Precomputed centroids initialization method
|
|
|
|
## Description
|
|
This initialization method requires a precomputed list of k centroids to use as initial
|
|
centroids.
|
|
"""
|
|
@typing.final
|
|
class PlusPlus(KMeansInit):
|
|
__match_args__ = ()
|
|
def __new__(cls) -> KMeansInit.PlusPlus: ...
|
|
def __len__(self) -> builtins.int: ...
|
|
def __getitem__(self, key: builtins.int) -> typing.Any: ...
|
|
|
|
@typing.final
|
|
class RandomPartition(KMeansInit):
|
|
__match_args__ = ()
|
|
def __new__(cls) -> KMeansInit.RandomPartition: ...
|
|
def __len__(self) -> builtins.int: ...
|
|
def __getitem__(self, key: builtins.int) -> typing.Any: ...
|
|
|
|
@typing.final
|
|
class RandomSample(KMeansInit):
|
|
__match_args__ = ()
|
|
def __new__(cls) -> KMeansInit.RandomSample: ...
|
|
def __len__(self) -> builtins.int: ...
|
|
def __getitem__(self, key: builtins.int) -> typing.Any: ...
|
|
|
|
@typing.final
|
|
class Precomputed(KMeansInit):
|
|
__match_args__ = ("_0",)
|
|
@property
|
|
def _0(self) -> builtins.list[builtins.float]: ...
|
|
def __new__(cls, _0: typing.Sequence[builtins.float]) -> KMeansInit.Precomputed: ...
|
|
def __len__(self) -> builtins.int: ...
|
|
def __getitem__(self, key: builtins.int) -> typing.Any: ...
|
|
|
|
|
|
def silhouette(points: numpy.typing.ArrayLike, assignments: numpy.typing.ArrayLike) -> builtins.float:
|
|
r"""
|
|
calculate the mean silhouette score for a set of points
|
|
"""
|
|
|