first commit
CI / linux (map[runner:ubuntu-22.04 target:aarch64]) (push) Failing after 55s
CI / linux (map[runner:ubuntu-22.04 target:armv7]) (push) Failing after 4s
CI / linux (map[runner:ubuntu-22.04 target:ppc64le]) (push) Failing after 4s
CI / linux (map[runner:ubuntu-22.04 target:s390x]) (push) Failing after 4s
CI / linux (map[runner:ubuntu-22.04 target:x86]) (push) Failing after 5s
CI / musllinux (map[runner:ubuntu-22.04 target:aarch64]) (push) Failing after 6s
CI / musllinux (map[runner:ubuntu-22.04 target:armv7]) (push) Failing after 5s
CI / musllinux (map[runner:ubuntu-22.04 target:x86]) (push) Failing after 4s
CI / musllinux (map[runner:ubuntu-22.04 target:x86_64]) (push) Failing after 4s
CI / windows (map[runner:windows-latest target:x64]) (push) Has been cancelled
CI / windows (map[runner:windows-latest target:x86]) (push) Has been cancelled
CI / macos (map[runner:macos-13 target:x86_64]) (push) Has been cancelled
CI / macos (map[runner:macos-14 target:aarch64]) (push) Has been cancelled
CI / Release (push) Has been cancelled
CI / sdist (push) Has been cancelled
CI / linux (map[runner:ubuntu-22.04 target:x86_64]) (push) Failing after 5s

This commit is contained in:
w.pomp
2026-04-16 16:26:53 +02:00
commit 20e04c8b53
11 changed files with 1529 additions and 0 deletions
+33
View File
@@ -0,0 +1,33 @@
import os
import sys
from importlib.metadata import version
from pathlib import Path
os.environ["RUST_BACKTRACE"] = "full"
os.environ["COLORBT_SHOW_HIDDEN"] = "1"
from .kmeans_rs import * # noqa
try:
__version__ = version(Path(__file__).parent.name)
except (Exception,):
__version__ = "unknown"
try:
with open(Path(__file__).parent.parent / ".git" / "HEAD") as g:
head = g.read().split(":")[1].strip()
with open(Path(__file__).parent.parent / ".git" / head) as h:
__git_commit_hash__ = h.read().rstrip("\n")
except (Exception,):
__git_commit_hash__ = "unknown"
def kmeans_generate_stub():
if len(sys.argv) > 1:
path = Path(sys.argv[1]).resolve()
else:
path = Path.cwd().resolve()
if (path / "py" / "kmeans_rs" / "__init__.py").exists():
generate_stub(str(path)) # noqa
else:
raise ModuleNotFoundError(str(path / "py" / "kmeans_rs" / "__init__.py"))
+245
View File
@@ -0,0 +1,245 @@
# This file is automatically generated by pyo3_stub_gen
# ruff: noqa: E501, F401, F403, F405
import builtins
import numpy
import numpy.typing
import typing
__all__ = [
"KMeans",
"KMeansAlgorithm",
"KMeansInit",
"silhouette",
]
@typing.final
class KMeans:
r"""
Compute kmeans clustering
this implementation is supposed to be faster than scipy or scikit-learn
when dealing with a lot of points
## Arguments
- **points**: Numpy array #points x dimensions
- **k**: Amount of clusters to search for
- **max_iter**: Limit the maximum amount of iterations (just pass a high number for infinite)
- **init**: initialization method
- **algorithm**: algorithm to use
"""
@property
def ndim(self) -> builtins.int:
r"""
number of dimensions
"""
@property
def k(self) -> builtins.int:
r"""
number of clusters
"""
@property
def distance_sum(self) -> builtins.float:
r"""
sum of all distances, cost measure
"""
@property
def centroids(self) -> numpy.typing.NDArray[numpy.float64]:
r"""
centroid coordinates
"""
@property
def centroid_frequency(self) -> builtins.list[builtins.int]:
r"""
centroid frequencies
"""
@property
def assignments(self) -> builtins.list[builtins.int]:
r"""
to which cluster each of the points is assigned
"""
@property
def centroid_distances(self) -> builtins.list[builtins.float]:
r"""
distances of all points to the center it's assigned to
"""
def __new__(cls, points: numpy.typing.ArrayLike, k: builtins.int, max_iter: builtins.int = 300, init: typing.Optional[KMeansInit] = None, algorithm: typing.Optional[KMeansAlgorithm] = None) -> KMeans: ...
@staticmethod
def init_plusplus() -> KMeansInit:
r"""
K-Means++ initialization method, as implemented in Matlab
## Description
This initialization method starts by selecting one sample as first centroid.
Proceeding from there, the method iteratively selects one new centroid (per iteration) by calculating
each sample's probability of "being a centroid". This probability is bigger, the farther away a sample
is from its centroid. Then, one sample is randomly selected, while taking their probability of being
the next centroid into account. This leads to a tendency of selecting centroids, that are far away from
their currently assigned cluster's centroid.
(see: https://uk.mathworks.com/help/stats/kmeans.html#bueq7aj-5 Section: More About)
"""
@staticmethod
def init_random_partition() -> KMeansInit:
r"""
Random-Parition initialization method
## Description
This initialization method randomly partitions the samples into k partitions, and then calculates these partion's means.
These means are then used as initial clusters.
"""
@staticmethod
def init_random_sample() -> KMeansInit:
r"""
Random sample initialization method (a.k.a. Forgy)
## Description
This initialization method randomly selects k centroids from the samples as initial centroids.
"""
@staticmethod
def init_precomputed(centroids: numpy.typing.ArrayLike) -> KMeansInit:
r"""
Precomputed centroids initialization method
## Description
This initialization method requires a precomputed list of k centroids to use as initial
centroids.
"""
@staticmethod
def algo_lloyd() -> KMeansAlgorithm:
r"""
Normal K-Means algorithm implementation. This is the same algorithm as implemented in Matlab (one-phase).
(see: https://uk.mathworks.com/help/stats/kmeans.html#bueq7aj-5 Section: More About)
"""
@staticmethod
def algo_mini_batch(batch_size: builtins.int) -> KMeansAlgorithm:
r"""
Mini-Batch k-Means implementation.
(see: https://dl.acm.org/citation.cfm?id=1772862)
## Arguments
- **batch_size**: Amount of samples to use per iteration (higher -> better approximation but slower)
"""
def predict(self, points: numpy.typing.ArrayLike) -> tuple[builtins.list[builtins.int], builtins.list[builtins.float]]:
r"""
find the closest cluster and the distance for each point
"""
def silhouette_simple(self, points: numpy.typing.ArrayLike, assignments: numpy.typing.ArrayLike = None) -> builtins.float:
r"""
calculate the mean simple (using centroids) silhouette score for a set of points,
assignments must be specified if they do not correspond to the assignments in the KMeans instance
"""
class KMeansAlgorithm:
r"""
Specify a kmeans algorithm using lloyd or mini_batch.
"""
@staticmethod
def lloyd() -> KMeansAlgorithm:
r"""
Normal K-Means algorithm implementation. This is the same algorithm as implemented in Matlab (one-phase).
(see: https://uk.mathworks.com/help/stats/kmeans.html#bueq7aj-5 Section: More About)
"""
@staticmethod
def mini_batch(batch_size: builtins.int) -> KMeansAlgorithm:
r"""
Mini-Batch k-Means implementation.
(see: https://dl.acm.org/citation.cfm?id=1772862)
## Arguments
- **batch_size**: Amount of samples to use per iteration (higher -> better approximation but slower)
"""
@typing.final
class Lloyd(KMeansAlgorithm):
__match_args__ = ()
def __new__(cls) -> KMeansAlgorithm.Lloyd: ...
def __len__(self) -> builtins.int: ...
def __getitem__(self, key: builtins.int) -> typing.Any: ...
@typing.final
class MiniBatch(KMeansAlgorithm):
__match_args__ = ("_0",)
@property
def _0(self) -> builtins.int: ...
def __new__(cls, _0: builtins.int) -> KMeansAlgorithm.MiniBatch: ...
def __len__(self) -> builtins.int: ...
def __getitem__(self, key: builtins.int) -> typing.Any: ...
class KMeansInit:
r"""
Specify an initialization method using plusplus, random_partition, random_sample or precomputed.
"""
@staticmethod
def plusplus() -> KMeansInit:
r"""
K-Means++ initialization method, as implemented in Matlab
## Description
This initialization method starts by selecting one sample as first centroid.
Proceeding from there, the method iteratively selects one new centroid (per iteration) by calculating
each sample's probability of "being a centroid". This probability is bigger, the farther away a sample
is from its centroid. Then, one sample is randomly selected, while taking their probability of being
the next centroid into account. This leads to a tendency of selecting centroids, that are far away from
their currently assigned cluster's centroid.
(see: https://uk.mathworks.com/help/stats/kmeans.html#bueq7aj-5 Section: More About)
"""
@staticmethod
def random_partition() -> KMeansInit:
r"""
Random-Partition initialization method
## Description
This initialization method randomly partitions the samples into k partitions, and then calculates these partion's means.
These means are then used as initial clusters.
"""
@staticmethod
def random_sample() -> KMeansInit:
r"""
Random sample initialization method (a.k.a. Forgy)
## Description
This initialization method randomly selects k centroids from the samples as initial centroids.
"""
@staticmethod
def precomputed(centroids: numpy.typing.ArrayLike) -> KMeansInit:
r"""
Precomputed centroids initialization method
## Description
This initialization method requires a precomputed list of k centroids to use as initial
centroids.
"""
@typing.final
class PlusPlus(KMeansInit):
__match_args__ = ()
def __new__(cls) -> KMeansInit.PlusPlus: ...
def __len__(self) -> builtins.int: ...
def __getitem__(self, key: builtins.int) -> typing.Any: ...
@typing.final
class RandomPartition(KMeansInit):
__match_args__ = ()
def __new__(cls) -> KMeansInit.RandomPartition: ...
def __len__(self) -> builtins.int: ...
def __getitem__(self, key: builtins.int) -> typing.Any: ...
@typing.final
class RandomSample(KMeansInit):
__match_args__ = ()
def __new__(cls) -> KMeansInit.RandomSample: ...
def __len__(self) -> builtins.int: ...
def __getitem__(self, key: builtins.int) -> typing.Any: ...
@typing.final
class Precomputed(KMeansInit):
__match_args__ = ("_0",)
@property
def _0(self) -> builtins.list[builtins.float]: ...
def __new__(cls, _0: typing.Sequence[builtins.float]) -> KMeansInit.Precomputed: ...
def __len__(self) -> builtins.int: ...
def __getitem__(self, key: builtins.int) -> typing.Any: ...
def silhouette(points: numpy.typing.ArrayLike, assignments: numpy.typing.ArrayLike) -> builtins.float:
r"""
calculate the mean silhouette score for a set of points
"""