Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| from model2vec import StaticModel | |
| # Suppress tokenizer warnings | |
| os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
| # Best working static model β ultra-fast on CPU + long texts | |
| model = StaticModel.from_pretrained("minishlab/potion-base-32M") | |
| def generate_embedding(text: str): | |
| """Single text input β one embedding vector (fast even for 500β1000+ tokens).""" | |
| if not text or not text.strip(): | |
| return { | |
| "embedding": [], | |
| "text": "", | |
| "dimension": 256, | |
| "note": "Empty input" | |
| } | |
| cleaned_text = text.strip() | |
| # Static Model2Vec β no query/document prompt needed (always high-quality) | |
| embedding = model.encode( | |
| [cleaned_text], | |
| convert_to_numpy=True, | |
| normalize_embeddings=True, # ready for cosine similarity | |
| )[0].tolist() | |
| return { | |
| "embedding": embedding, # single list of 256 floats | |
| } | |
| # Clean single-text Gradio interface + full REST API | |
| demo = gr.Interface( | |
| fn=generate_embedding, | |
| inputs=gr.Textbox( | |
| lines=12, | |
| placeholder="Paste your text here (500β1000+ tokens works instantly)...", | |
| label="Input Text", | |
| ), | |
| outputs=gr.JSON(label="Embedding Response"), | |
| title="β‘ Qwen3-Style Fast Embedding API (Single Text)", | |
| description="""Ultra-fast static embedding model (potion-base-32M). | |
| Best reliable CPU option β’ 500Γ faster than transformers β’ Handles long texts instantly. | |
| Returns **one** 256-dim embedding vector per call.""", | |
| examples=[ | |
| ["What is the capital of France? Explain it in detail with historical context and why it matters today."], | |
| ["A very long document with many tokens to test speed... " * 50], | |
| ], | |
| ) | |
| demo.launch() |