Timmyo1 commited on
Commit
76b352a
·
0 Parent(s):

HF Space: simplified Dockerfile for faster startup

Browse files
Files changed (4) hide show
  1. Dockerfile +22 -0
  2. README.md +84 -0
  3. app.py +83 -0
  4. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # HF Spaces runs containers as UID 1000
4
+ RUN useradd -m -u 1000 user
5
+
6
+ WORKDIR /app
7
+
8
+ # Install dependencies (sentence-transformers pulls torch automatically)
9
+ COPY requirements.txt .
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ # Set cache dir accessible to UID 1000
13
+ ENV HF_HOME=/home/user/.cache/huggingface
14
+
15
+ COPY app.py .
16
+ RUN chown -R user:user /app /home/user
17
+
18
+ USER user
19
+
20
+ EXPOSE 7860
21
+
22
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Gemischtes Hack Embeddings
3
+ emoji: 🎙️
4
+ colorFrom: gray
5
+ colorTo: yellow
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ ---
10
+
11
+ # Embedding Server for Gemischtes Hack
12
+
13
+ FastAPI server hosting `intfloat/multilingual-e5-small` embeddings model on Hugging Face Spaces.
14
+
15
+ ## Setup
16
+
17
+ 1. Create a new HF Space: https://huggingface.co/new-space
18
+ - Name: `gemischtes-hack-embed`
19
+ - License: MIT
20
+ - SDK: Docker
21
+
22
+ 2. Clone this Space to your machine (or manually upload files)
23
+
24
+ 3. The Docker container will:
25
+ - Install dependencies from `requirements.txt`
26
+ - Load the `multilingual-e5-small` model
27
+ - Expose FastAPI on port 7860
28
+
29
+ 4. Once deployed, the Space URL will be available at:
30
+ `https://{your-username}-gemischtes-hack-embed.hf.space`
31
+
32
+ ## API
33
+
34
+ ### POST /embed
35
+ Generate embeddings for text.
36
+
37
+ ```bash
38
+ curl -X POST https://{your-username}-gemischtes-hack-embed.hf.space/embed \
39
+ -H "Content-Type: application/json" \
40
+ -d '{"text": "Was ist Gemischtes Hack?"}'
41
+ ```
42
+
43
+ Response:
44
+ ```json
45
+ {
46
+ "embedding": [0.123, -0.456, ..., 0.789] // 384-dim vector
47
+ }
48
+ ```
49
+
50
+ ### GET /health
51
+ Check server status.
52
+
53
+ ### GET /
54
+ View API info.
55
+
56
+ ## Notes
57
+
58
+ - First request takes ~10-30 seconds (model loading + HF Spaces cold start)
59
+ - Subsequent requests take ~500ms
60
+ - Space auto-sleeps after 48 hours of inactivity
61
+ - Max 2 vCPU / 16 GB RAM (free tier)
62
+
63
+ ## Integration
64
+
65
+ Update `web/src/lib/embed.ts`:
66
+
67
+ ```typescript
68
+ const HF_SPACE_URL = "https://{your-username}-gemischtes-hack-embed.hf.space";
69
+
70
+ async function embedLocal(text: string): Promise<number[]> {
71
+ const response = await fetch(`${HF_SPACE_URL}/embed`, {
72
+ method: "POST",
73
+ headers: { "Content-Type": "application/json" },
74
+ body: JSON.stringify({ text }),
75
+ });
76
+
77
+ if (!response.ok) {
78
+ throw new Error(`Embed error: ${response.status}`);
79
+ }
80
+
81
+ const data = await response.json();
82
+ return data.embedding;
83
+ }
84
+ ```
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Embedding server for multilingual-e5-small on HF Spaces."""
2
+
3
+ from contextlib import asynccontextmanager
4
+ import threading
5
+
6
+ from fastapi import FastAPI, HTTPException
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from pydantic import BaseModel
9
+ from sentence_transformers import SentenceTransformer
10
+ import logging
11
+
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+ MODEL_NAME = "intfloat/multilingual-e5-small"
16
+ model = None
17
+ model_ready = threading.Event()
18
+
19
+
20
+ def _load_model():
21
+ global model
22
+ logger.info(f"Loading {MODEL_NAME}...")
23
+ model = SentenceTransformer(MODEL_NAME)
24
+ model_ready.set()
25
+ logger.info("Model loaded successfully")
26
+
27
+
28
+ @asynccontextmanager
29
+ async def lifespan(app: FastAPI):
30
+ thread = threading.Thread(target=_load_model, daemon=True)
31
+ thread.start()
32
+ yield
33
+
34
+
35
+ app = FastAPI(title="Embedding Server", lifespan=lifespan)
36
+
37
+ app.add_middleware(
38
+ CORSMiddleware,
39
+ allow_origins=["*"],
40
+ allow_credentials=True,
41
+ allow_methods=["*"],
42
+ allow_headers=["*"],
43
+ )
44
+
45
+
46
+ class EmbedRequest(BaseModel):
47
+ text: str
48
+
49
+
50
+ class EmbedResponse(BaseModel):
51
+ embedding: list[float]
52
+
53
+
54
+ @app.post("/embed", response_model=EmbedResponse)
55
+ async def embed(request: EmbedRequest) -> EmbedResponse:
56
+ if not model_ready.is_set():
57
+ raise HTTPException(status_code=503, detail="Model still loading")
58
+ if not request.text:
59
+ return EmbedResponse(embedding=[])
60
+ prefixed = f"query: {request.text}"
61
+ embedding = model.encode([prefixed], normalize_embeddings=True)[0].tolist()
62
+ return EmbedResponse(embedding=embedding)
63
+
64
+
65
+ @app.get("/health")
66
+ async def health():
67
+ return {
68
+ "status": "ok" if model_ready.is_set() else "loading",
69
+ "model": MODEL_NAME,
70
+ }
71
+
72
+
73
+ @app.get("/")
74
+ async def root():
75
+ return {
76
+ "service": "Embedding Server",
77
+ "model": MODEL_NAME,
78
+ "ready": model_ready.is_set(),
79
+ "endpoints": {
80
+ "POST /embed": "Generate embeddings",
81
+ "GET /health": "Health check",
82
+ },
83
+ }
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi>=0.104,<1.0
2
+ uvicorn>=0.24,<1.0
3
+ sentence-transformers>=3.0,<4.0
4
+ pydantic>=2.0,<3.0