Spaces:

chmielvu
/

ollama-code-embed

Running

App Files Files Community

chmielvu commited on Mar 11

Commit

90582e1

verified ·

1 Parent(s): 39178c6

Create Ollama-compatible code embedding space

Browse files

Files changed (5) hide show

Dockerfile +16 -0
README.md +42 -6
__pycache__/app.cpython-312.pyc +0 -0
app.py +171 -0
requirements.txt +6 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+FROM python:3.11-slim
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1
+WORKDIR /app
+COPY requirements.txt /app/requirements.txt
+RUN pip install -r /app/requirements.txt
+COPY app.py /app/app.py
+EXPOSE 11434
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "11434"]

README.md CHANGED Viewed

@@ -1,10 +1,46 @@
 ---
-title: Ollama Code Embed
-emoji: 📚
-colorFrom: yellow
-colorTo: red
 sdk: docker
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: ollama-code-embed
+emoji: 🧩
+colorFrom: gray
+colorTo: indigo
 sdk: docker
+app_port: 11434
+pinned: true
 ---
+# Ollama-Compatible Code Embeddings
+This Space exposes an Ollama-style embedding API backed by the same code embedding model used in `Code-Embed-Qwen-rerank-sentiment`.
+## Model
+- Embeddings: `jinaai/jina-code-embeddings-0.5b`
+- Served model name: `code-embed`
+## Endpoints
+- `GET /`
+- `GET /api/version`
+- `GET /api/tags`
+- `POST /api/embed`
+- `POST /api/embeddings`
+- `POST /embed`
+- `GET /health`
+## Notes
+- The server accepts Ollama-style request bodies and ignores extra fields such as `api_key`.
+- `/api/embed` accepts `input` as either a string or a list of strings.
+- `/api/embeddings` is included for older Ollama clients that send a single `prompt`.
+## Example
+```bash
+curl -X POST "$SPACE_URL/api/embed" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "code-embed",
+    "input": ["def hello(name): return f\"Hello {name}\""],
+    "truncate": true
+  }'
+```

__pycache__/app.cpython-312.pyc ADDED Viewed

Binary file (8.49 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import time
+from typing import Any
+import numpy as np
+import torch
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import HTMLResponse
+from pydantic import BaseModel, ConfigDict
+from sentence_transformers import SentenceTransformer
+torch.set_grad_enabled(False)
+torch.set_num_threads(2)
+APP_TITLE = "ollama-code-embed"
+MODEL_ID = "jinaai/jina-code-embeddings-0.5b"
+MODEL_NAME = "code-embed"
+MODEL_CREATED_AT = "2026-03-11T00:00:00Z"
+MODEL_DIMENSIONS = 896
+SERVER_VERSION = "0.11.0"
+app = FastAPI(title=APP_TITLE, version="1.0.0")
+_model: SentenceTransformer | None = None
+_loaded_at_ns: int | None = None
+_load_duration_ns: int = 0
+class CompatibleRequest(BaseModel):
+    model_config = ConfigDict(extra="allow")
+class EmbedRequest(CompatibleRequest):
+    model: str = MODEL_NAME
+    input: str | list[str] | None = None
+    prompt: str | None = None
+    truncate: bool = True
+    dimensions: int | None = None
+    options: dict[str, Any] | None = None
+    keep_alive: str | int | None = None
+def get_model() -> SentenceTransformer:
+    global _model, _loaded_at_ns, _load_duration_ns
+    if _model is None:
+        started = time.perf_counter_ns()
+        _model = SentenceTransformer(MODEL_ID, trust_remote_code=True, device="cpu")
+        _load_duration_ns = time.perf_counter_ns() - started
+        _loaded_at_ns = time.time_ns()
+    return _model
+def normalize_inputs(request: EmbedRequest) -> list[str]:
+    if request.input is not None:
+        return request.input if isinstance(request.input, list) else [request.input]
+    if request.prompt is not None:
+        return [request.prompt]
+    raise HTTPException(status_code=400, detail="Request must include 'input' or 'prompt'")
+def maybe_truncate(vector: np.ndarray, dimensions: int | None) -> np.ndarray:
+    if dimensions is None or dimensions <= 0 or dimensions >= vector.shape[0]:
+        return vector
+    truncated = vector[:dimensions]
+    norm = np.linalg.norm(truncated)
+    if norm > 0:
+        truncated = truncated / norm
+    return truncated
+def estimate_prompt_eval_count(texts: list[str], model: SentenceTransformer) -> int:
+    tokenizer = getattr(model, "tokenizer", None)
+    if tokenizer is None:
+        return sum(max(1, len(text.split())) for text in texts)
+    return sum(len(tokenizer.encode(text, add_special_tokens=True)) for text in texts)
+@app.get("/", response_class=HTMLResponse)
+def root() -> str:
+    return f"""<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <title>{APP_TITLE}</title>
+  <style>
+    body {{ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; margin: 32px; line-height: 1.45; }}
+    code {{ background: #f4f4f4; padding: 2px 6px; border-radius: 4px; }}
+  </style>
+</head>
+<body>
+  <h1>Ollama-Compatible Code Embeddings</h1>
+  <p>Model: <code>{MODEL_ID}</code></p>
+  <p>Served name: <code>{MODEL_NAME}</code></p>
+  <ul>
+    <li><code>GET /api/version</code></li>
+    <li><code>GET /api/tags</code></li>
+    <li><code>POST /api/embed</code></li>
+    <li><code>POST /api/embeddings</code></li>
+    <li><code>POST /embed</code></li>
+  </ul>
+</body>
+</html>"""
+@app.get("/health")
+def health() -> dict[str, float]:
+    return {"unix": time.time()}
+@app.get("/api/version")
+def api_version() -> dict[str, str]:
+    return {"version": SERVER_VERSION}
+@app.get("/api/tags")
+def api_tags() -> dict[str, Any]:
+    return {
+        "models": [
+            {
+                "name": MODEL_NAME,
+                "model": MODEL_NAME,
+                "modified_at": MODEL_CREATED_AT,
+                "size": 0,
+                "digest": MODEL_ID,
+                "details": {
+                    "format": "sentence-transformers",
+                    "family": "jina",
+                    "families": ["jina", "embedding"],
+                    "parameter_size": "0.5B",
+                    "quantization_level": "F32",
+                },
+            }
+        ]
+    }
+def embed_impl(request: EmbedRequest) -> dict[str, Any]:
+    if request.model not in {MODEL_NAME, MODEL_ID}:
+        raise HTTPException(status_code=404, detail=f"Model '{request.model}' not found")
+    texts = normalize_inputs(request)
+    model = get_model()
+    started = time.perf_counter_ns()
+    vectors = np.asarray(model.encode(texts, convert_to_numpy=True))
+    total_duration = time.perf_counter_ns() - started
+    payload = [maybe_truncate(vector, request.dimensions).astype(np.float32).tolist() for vector in vectors]
+    return {
+        "model": MODEL_NAME,
+        "embeddings": payload,
+        "total_duration": total_duration,
+        "load_duration": _load_duration_ns,
+        "prompt_eval_count": estimate_prompt_eval_count(texts, model),
+    }
+@app.post("/api/embed")
+@app.post("/embed")
+def api_embed(request: EmbedRequest) -> dict[str, Any]:
+    return embed_impl(request)
+@app.post("/api/embeddings")
+def api_embeddings(request: EmbedRequest) -> dict[str, Any]:
+    result = embed_impl(request)
+    first = result["embeddings"][0] if result["embeddings"] else []
+    return {
+        "embedding": first,
+        "model": result["model"],
+        "total_duration": result["total_duration"],
+        "load_duration": result["load_duration"],
+        "prompt_eval_count": result["prompt_eval_count"],
+    }

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+fastapi>=0.116.0
+numpy>=2.2.0
+sentence-transformers>=5.1.0
+torch>=2.8.0
+transformers>=4.57.0
+uvicorn[standard]>=0.35.0