| |
|
|
| from sentence_transformers import SentenceTransformer |
| from sentence_transformers.models import Transformer, Pooling |
| import numpy as np |
|
|
| |
| |
| model_name = 'BAAI/bge-large-en-v1.5' |
|
|
| |
| transformer = Transformer(model_name) |
|
|
| |
| dim = transformer.get_word_embedding_dimension() |
|
|
| |
| pooling = Pooling(dim) |
|
|
| |
| model = SentenceTransformer(modules=[transformer, pooling]) |
|
|
| |
| def get_word_norm(word): |
| |
| |
| |
| embedding = model.encode(word, normalize_embeddings=False) |
| l2_norm = np.linalg.norm(embedding) |
| return l2_norm |
|
|
| |
| def category_score(norm, scale_factor=1.0): |
| return scale_factor / norm |
|
|
| |
| words = ['animal', 'cat', 'mammal', 'siamese', 'thing', 'eiffel tower'] |
|
|
| |
| results = {} |
| for word in words: |
| norm = get_word_norm(word) |
| score = category_score(norm) |
| results[word] = {'norm': norm, 'score': score} |
|
|
| |
| sorted_results = sorted(results.items(), key=lambda x: x[1]['score'], reverse=True) |
| for word, data in sorted_results: |
| print(f"Word: {word}\tNorm: {data['norm']:.4f}\tCategory Score: {data['score']:.4f}") |
|
|