| from __future__ import annotations | |
| import numpy as np | |
| from sklearn.cluster import KMeans | |
| def cluster_embeddings( | |
| embeddings: np.ndarray, | |
| n_clusters: int = 4, | |
| random_state: int = 42, | |
| ) -> list[int]: | |
| if len(embeddings) == 0: | |
| return [] | |
| effective_k = min(n_clusters, len(embeddings)) | |
| if effective_k == 1: | |
| return [0] * len(embeddings) | |
| kmeans = KMeans(n_clusters=effective_k, random_state=random_state, n_init=10) | |
| return kmeans.fit_predict(embeddings).tolist() | |