Spaces:

mavengence
/

gemischtes_hack

Runtime error

App Files Files Community

Timmyo1 commited on Mar 7

Commit

76b352a

0 Parent(s):

HF Space: simplified Dockerfile for faster startup

Browse files

Files changed (4) hide show

Dockerfile +22 -0
README.md +84 -0
app.py +83 -0
requirements.txt +4 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+FROM python:3.11-slim
+# HF Spaces runs containers as UID 1000
+RUN useradd -m -u 1000 user
+WORKDIR /app
+# Install dependencies (sentence-transformers pulls torch automatically)
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Set cache dir accessible to UID 1000
+ENV HF_HOME=/home/user/.cache/huggingface
+COPY app.py .
+RUN chown -R user:user /app /home/user
+USER user
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,84 @@

+---
+title: Gemischtes Hack Embeddings
+emoji: 🎙️
+colorFrom: gray
+colorTo: yellow
+sdk: docker
+app_port: 7860
+pinned: false
+---
+# Embedding Server for Gemischtes Hack
+FastAPI server hosting `intfloat/multilingual-e5-small` embeddings model on Hugging Face Spaces.
+## Setup
+1. Create a new HF Space: https://huggingface.co/new-space
+   - Name: `gemischtes-hack-embed`
+   - License: MIT
+   - SDK: Docker
+2. Clone this Space to your machine (or manually upload files)
+3. The Docker container will:
+   - Install dependencies from `requirements.txt`
+   - Load the `multilingual-e5-small` model
+   - Expose FastAPI on port 7860
+4. Once deployed, the Space URL will be available at:
+   `https://{your-username}-gemischtes-hack-embed.hf.space`
+## API
+### POST /embed
+Generate embeddings for text.
+```bash
+curl -X POST https://{your-username}-gemischtes-hack-embed.hf.space/embed \
+  -H "Content-Type: application/json" \
+  -d '{"text": "Was ist Gemischtes Hack?"}'
+```
+Response:
+```json
+{
+  "embedding": [0.123, -0.456, ..., 0.789]  // 384-dim vector
+}
+```
+### GET /health
+Check server status.
+### GET /
+View API info.
+## Notes
+- First request takes ~10-30 seconds (model loading + HF Spaces cold start)
+- Subsequent requests take ~500ms
+- Space auto-sleeps after 48 hours of inactivity
+- Max 2 vCPU / 16 GB RAM (free tier)
+## Integration
+Update `web/src/lib/embed.ts`:
+```typescript
+const HF_SPACE_URL = "https://{your-username}-gemischtes-hack-embed.hf.space";
+async function embedLocal(text: string): Promise<number[]> {
+  const response = await fetch(`${HF_SPACE_URL}/embed`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({ text }),
+  });
+  if (!response.ok) {
+    throw new Error(`Embed error: ${response.status}`);
+  }
+  const data = await response.json();
+  return data.embedding;
+}
+```

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+"""Embedding server for multilingual-e5-small on HF Spaces."""
+from contextlib import asynccontextmanager
+import threading
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from sentence_transformers import SentenceTransformer
+import logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+MODEL_NAME = "intfloat/multilingual-e5-small"
+model = None
+model_ready = threading.Event()
+def _load_model():
+    global model
+    logger.info(f"Loading {MODEL_NAME}...")
+    model = SentenceTransformer(MODEL_NAME)
+    model_ready.set()
+    logger.info("Model loaded successfully")
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    thread = threading.Thread(target=_load_model, daemon=True)
+    thread.start()
+    yield
+app = FastAPI(title="Embedding Server", lifespan=lifespan)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+class EmbedRequest(BaseModel):
+    text: str
+class EmbedResponse(BaseModel):
+    embedding: list[float]
+@app.post("/embed", response_model=EmbedResponse)
+async def embed(request: EmbedRequest) -> EmbedResponse:
+    if not model_ready.is_set():
+        raise HTTPException(status_code=503, detail="Model still loading")
+    if not request.text:
+        return EmbedResponse(embedding=[])
+    prefixed = f"query: {request.text}"
+    embedding = model.encode([prefixed], normalize_embeddings=True)[0].tolist()
+    return EmbedResponse(embedding=embedding)
+@app.get("/health")
+async def health():
+    return {
+        "status": "ok" if model_ready.is_set() else "loading",
+        "model": MODEL_NAME,
+    }
+@app.get("/")
+async def root():
+    return {
+        "service": "Embedding Server",
+        "model": MODEL_NAME,
+        "ready": model_ready.is_set(),
+        "endpoints": {
+            "POST /embed": "Generate embeddings",
+            "GET /health": "Health check",
+        },
+    }

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+fastapi>=0.104,<1.0
+uvicorn>=0.24,<1.0
+sentence-transformers>=3.0,<4.0
+pydantic>=2.0,<3.0