kmeans_rs/py/kmeans_rs/__init__.pyi

# This file is automatically generated by pyo3_stub_gen
# ruff: noqa: E501, F401, F403, F405

import builtins
import numpy
import numpy.typing
import typing
__all__ = [
    "KMeans",
    "KMeansAlgorithm",
    "KMeansInit",
    "silhouette",
]

@typing.final
class KMeans:
    r"""
    Compute kmeans clustering
    this implementation is supposed to be faster than scipy or scikit-learn
    when dealing with a lot of points

    ## Arguments
    - **points**: Numpy array #points x dimensions
    - **k**: Amount of clusters to search for
    - **max_iter**: Limit the maximum amount of iterations (just pass a high number for infinite)
    - **init**: initialization method
    - **algorithm**: algorithm to use
    """
    @property
    def ndim(self) -> builtins.int:
        r"""
        number of dimensions
        """
    @property
    def k(self) -> builtins.int:
        r"""
        number of clusters
        """
    @property
    def distance_sum(self) -> builtins.float:
        r"""
        sum of all distances, cost measure
        """
    @property
    def centroids(self) -> numpy.typing.NDArray[numpy.float64]:
        r"""
        centroid coordinates
        """
    @property
    def centroid_frequency(self) -> builtins.list[builtins.int]:
        r"""
        centroid frequencies
        """
    @property
    def assignments(self) -> builtins.list[builtins.int]:
        r"""
        to which cluster each of the points is assigned
        """
    @property
    def centroid_distances(self) -> builtins.list[builtins.float]:
        r"""
        distances of all points to the center it's assigned to
        """
    def __new__(cls, points: numpy.typing.ArrayLike, k: builtins.int, max_iter: builtins.int = 300, init: typing.Optional[KMeansInit] = None, algorithm: typing.Optional[KMeansAlgorithm] = None) -> KMeans: ...
    @staticmethod
    def init_plusplus() -> KMeansInit:
        r"""
        K-Means++ initialization method, as implemented in Matlab

        ## Description
        This initialization method starts by selecting one sample as first centroid.
        Proceeding from there, the method iteratively selects one new centroid (per iteration) by calculating
        each sample's probability of "being a centroid". This probability is bigger, the farther away a sample
        is from its centroid. Then, one sample is randomly selected, while taking their probability of being
        the next centroid into account. This leads to a tendency of selecting centroids, that are far away from
        their currently assigned cluster's centroid.
        (see: https://uk.mathworks.com/help/stats/kmeans.html#bueq7aj-5    Section: More About)
        """
    @staticmethod
    def init_random_partition() -> KMeansInit:
        r"""
        Random-Parition initialization method

        ## Description
        This initialization method randomly partitions the samples into k partitions, and then calculates these partion's means.
        These means are then used as initial clusters.
        """
    @staticmethod
    def init_random_sample() -> KMeansInit:
        r"""
        Random sample initialization method (a.k.a. Forgy)

        ## Description
        This initialization method randomly selects k centroids from the samples as initial centroids.
        """
    @staticmethod
    def init_precomputed(centroids: numpy.typing.ArrayLike) -> KMeansInit:
        r"""
        Precomputed centroids initialization method

        ## Description
        This initialization method requires a precomputed list of k centroids to use as initial
        centroids.
        """
    @staticmethod
    def algo_lloyd() -> KMeansAlgorithm:
        r"""
        Normal K-Means algorithm implementation. This is the same algorithm as implemented in Matlab (one-phase).
        (see: https://uk.mathworks.com/help/stats/kmeans.html#bueq7aj-5    Section: More About)
        """
    @staticmethod
    def algo_mini_batch(batch_size: builtins.int) -> KMeansAlgorithm:
        r"""
        Mini-Batch k-Means implementation.
        (see: https://dl.acm.org/citation.cfm?id=1772862)

        ## Arguments
        - **batch_size**: Amount of samples to use per iteration (higher -> better approximation but slower)
        """
    def predict(self, points: numpy.typing.ArrayLike) -> tuple[builtins.list[builtins.int], builtins.list[builtins.float]]:
        r"""
        find the closest cluster and the distance for each point
        """
    def silhouette_simple(self, points: numpy.typing.ArrayLike, assignments: numpy.typing.ArrayLike = None) -> builtins.float:
        r"""
        calculate the mean simple (using centroids) silhouette score for a set of points,
        assignments must be specified if they do not correspond to the assignments in the KMeans instance
        """

class KMeansAlgorithm:
    r"""
    Specify a kmeans algorithm using lloyd or mini_batch.
    """
    @staticmethod
    def lloyd() -> KMeansAlgorithm:
        r"""
        Normal K-Means algorithm implementation. This is the same algorithm as implemented in Matlab (one-phase).
        (see: https://uk.mathworks.com/help/stats/kmeans.html#bueq7aj-5    Section: More About)
        """
    @staticmethod
    def mini_batch(batch_size: builtins.int) -> KMeansAlgorithm:
        r"""
        Mini-Batch k-Means implementation.
        (see: https://dl.acm.org/citation.cfm?id=1772862)

        ## Arguments
        - **batch_size**: Amount of samples to use per iteration (higher -> better approximation but slower)
        """
    @typing.final
    class Lloyd(KMeansAlgorithm):
        __match_args__ = ()
        def __new__(cls) -> KMeansAlgorithm.Lloyd: ...
        def __len__(self) -> builtins.int: ...
        def __getitem__(self, key: builtins.int) -> typing.Any: ...

    @typing.final
    class MiniBatch(KMeansAlgorithm):
        __match_args__ = ("_0",)
        @property
        def _0(self) -> builtins.int: ...
        def __new__(cls, _0: builtins.int) -> KMeansAlgorithm.MiniBatch: ...
        def __len__(self) -> builtins.int: ...
        def __getitem__(self, key: builtins.int) -> typing.Any: ...


class KMeansInit:
    r"""
    Specify an initialization method using plusplus, random_partition, random_sample or precomputed.
    """
    @staticmethod
    def plusplus() -> KMeansInit:
        r"""
        K-Means++ initialization method, as implemented in Matlab

        ## Description
        This initialization method starts by selecting one sample as first centroid.
        Proceeding from there, the method iteratively selects one new centroid (per iteration) by calculating
        each sample's probability of "being a centroid". This probability is bigger, the farther away a sample
        is from its centroid. Then, one sample is randomly selected, while taking their probability of being
        the next centroid into account. This leads to a tendency of selecting centroids, that are far away from
        their currently assigned cluster's centroid.
        (see: https://uk.mathworks.com/help/stats/kmeans.html#bueq7aj-5    Section: More About)
        """
    @staticmethod
    def random_partition() -> KMeansInit:
        r"""
        Random-Partition initialization method

        ## Description
        This initialization method randomly partitions the samples into k partitions, and then calculates these partion's means.
        These means are then used as initial clusters.
        """
    @staticmethod
    def random_sample() -> KMeansInit:
        r"""
        Random sample initialization method (a.k.a. Forgy)

        ## Description
        This initialization method randomly selects k centroids from the samples as initial centroids.
        """
    @staticmethod
    def precomputed(centroids: numpy.typing.ArrayLike) -> KMeansInit:
        r"""
        Precomputed centroids initialization method

        ## Description
        This initialization method requires a precomputed list of k centroids to use as initial
        centroids.
        """
    @typing.final
    class PlusPlus(KMeansInit):
        __match_args__ = ()
        def __new__(cls) -> KMeansInit.PlusPlus: ...
        def __len__(self) -> builtins.int: ...
        def __getitem__(self, key: builtins.int) -> typing.Any: ...

    @typing.final
    class RandomPartition(KMeansInit):
        __match_args__ = ()
        def __new__(cls) -> KMeansInit.RandomPartition: ...
        def __len__(self) -> builtins.int: ...
        def __getitem__(self, key: builtins.int) -> typing.Any: ...

    @typing.final
    class RandomSample(KMeansInit):
        __match_args__ = ()
        def __new__(cls) -> KMeansInit.RandomSample: ...
        def __len__(self) -> builtins.int: ...
        def __getitem__(self, key: builtins.int) -> typing.Any: ...

    @typing.final
    class Precomputed(KMeansInit):
        __match_args__ = ("_0",)
        @property
        def _0(self) -> builtins.list[builtins.float]: ...
        def __new__(cls, _0: typing.Sequence[builtins.float]) -> KMeansInit.Precomputed: ...
        def __len__(self) -> builtins.int: ...
        def __getitem__(self, key: builtins.int) -> typing.Any: ...


def silhouette(points: numpy.typing.ArrayLike, assignments: numpy.typing.ArrayLike) -> builtins.float:
    r"""
    calculate the mean silhouette score for a set of points
    """