import numpy as np from sklearn.cluster import KMeans def cluster_embeddings( embeddings: np.ndarray, n_clusters: int = 4, random_state: int = 42, ) -> list[int]: if len(embeddings) == 0: return [] effective_clusters = min(n_clusters, len(embeddings)) if effective_clusters == 1: return [0] kmeans = KMeans( n_clusters=effective_clusters, random_state=random_state, n_init=10, ) return kmeans.fit_predict(embeddings).tolist()