Spaces:

ccocks-deca
/

embed-this

Sleeping

App Files Files Community

embed-this / app.py

ccocks-deca

Update app.py

99a12ae verified 13 days ago

raw

history blame contribute delete

1.78 kB

	import gradio as gr
	import os
	from model2vec import StaticModel

	# Suppress tokenizer warnings
	os.environ["TOKENIZERS_PARALLELISM"] = "false"

	# Best working static model – ultra-fast on CPU + long texts
	model = StaticModel.from_pretrained("minishlab/potion-base-32M")

	def generate_embedding(text: str):
	"""Single text input → one embedding vector (fast even for 500–1000+ tokens)."""
	if not text or not text.strip():
	return {
	"embedding": [],
	"text": "",
	"dimension": 256,
	"note": "Empty input"
	}

	cleaned_text = text.strip()

	# Static Model2Vec – no query/document prompt needed (always high-quality)
	embedding = model.encode(
	[cleaned_text],
	convert_to_numpy=True,
	normalize_embeddings=True, # ready for cosine similarity
	)[0].tolist()

	return {
	"embedding": embedding, # single list of 256 floats
	}


	# Clean single-text Gradio interface + full REST API
	demo = gr.Interface(
	fn=generate_embedding,
	inputs=gr.Textbox(
	lines=12,
	placeholder="Paste your text here (500–1000+ tokens works instantly)...",
	label="Input Text",
	),
	outputs=gr.JSON(label="Embedding Response"),
	title="⚡ Qwen3-Style Fast Embedding API (Single Text)",
	description="""Ultra-fast static embedding model (potion-base-32M).
	Best reliable CPU option • 500× faster than transformers • Handles long texts instantly.
	Returns one 256-dim embedding vector per call.""",
	examples=[
	["What is the capital of France? Explain it in detail with historical context and why it matters today."],
	["A very long document with many tokens to test speed... " * 50],
	],
	)

	demo.launch()