embed-this / app.py
ccocks-deca's picture
Update app.py
99a12ae verified
import gradio as gr
import os
from model2vec import StaticModel
# Suppress tokenizer warnings
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Best working static model – ultra-fast on CPU + long texts
model = StaticModel.from_pretrained("minishlab/potion-base-32M")
def generate_embedding(text: str):
"""Single text input β†’ one embedding vector (fast even for 500–1000+ tokens)."""
if not text or not text.strip():
return {
"embedding": [],
"text": "",
"dimension": 256,
"note": "Empty input"
}
cleaned_text = text.strip()
# Static Model2Vec – no query/document prompt needed (always high-quality)
embedding = model.encode(
[cleaned_text],
convert_to_numpy=True,
normalize_embeddings=True, # ready for cosine similarity
)[0].tolist()
return {
"embedding": embedding, # single list of 256 floats
}
# Clean single-text Gradio interface + full REST API
demo = gr.Interface(
fn=generate_embedding,
inputs=gr.Textbox(
lines=12,
placeholder="Paste your text here (500–1000+ tokens works instantly)...",
label="Input Text",
),
outputs=gr.JSON(label="Embedding Response"),
title="⚑ Qwen3-Style Fast Embedding API (Single Text)",
description="""Ultra-fast static embedding model (potion-base-32M).
Best reliable CPU option β€’ 500Γ— faster than transformers β€’ Handles long texts instantly.
Returns **one** 256-dim embedding vector per call.""",
examples=[
["What is the capital of France? Explain it in detail with historical context and why it matters today."],
["A very long document with many tokens to test speed... " * 50],
],
)
demo.launch()