chmielvu commited on
Commit
90582e1
·
verified ·
1 Parent(s): 39178c6

Create Ollama-compatible code embedding space

Browse files
Files changed (5) hide show
  1. Dockerfile +16 -0
  2. README.md +42 -6
  3. __pycache__/app.cpython-312.pyc +0 -0
  4. app.py +171 -0
  5. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ ENV PYTHONDONTWRITEBYTECODE=1 \
4
+ PYTHONUNBUFFERED=1 \
5
+ PIP_NO_CACHE_DIR=1
6
+
7
+ WORKDIR /app
8
+
9
+ COPY requirements.txt /app/requirements.txt
10
+ RUN pip install -r /app/requirements.txt
11
+
12
+ COPY app.py /app/app.py
13
+
14
+ EXPOSE 11434
15
+
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "11434"]
README.md CHANGED
@@ -1,10 +1,46 @@
1
  ---
2
- title: Ollama Code Embed
3
- emoji: 📚
4
- colorFrom: yellow
5
- colorTo: red
6
  sdk: docker
7
- pinned: false
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: ollama-code-embed
3
+ emoji: 🧩
4
+ colorFrom: gray
5
+ colorTo: indigo
6
  sdk: docker
7
+ app_port: 11434
8
+ pinned: true
9
  ---
10
 
11
+ # Ollama-Compatible Code Embeddings
12
+
13
+ This Space exposes an Ollama-style embedding API backed by the same code embedding model used in `Code-Embed-Qwen-rerank-sentiment`.
14
+
15
+ ## Model
16
+
17
+ - Embeddings: `jinaai/jina-code-embeddings-0.5b`
18
+ - Served model name: `code-embed`
19
+
20
+ ## Endpoints
21
+
22
+ - `GET /`
23
+ - `GET /api/version`
24
+ - `GET /api/tags`
25
+ - `POST /api/embed`
26
+ - `POST /api/embeddings`
27
+ - `POST /embed`
28
+ - `GET /health`
29
+
30
+ ## Notes
31
+
32
+ - The server accepts Ollama-style request bodies and ignores extra fields such as `api_key`.
33
+ - `/api/embed` accepts `input` as either a string or a list of strings.
34
+ - `/api/embeddings` is included for older Ollama clients that send a single `prompt`.
35
+
36
+ ## Example
37
+
38
+ ```bash
39
+ curl -X POST "$SPACE_URL/api/embed" \
40
+ -H "Content-Type: application/json" \
41
+ -d '{
42
+ "model": "code-embed",
43
+ "input": ["def hello(name): return f\"Hello {name}\""],
44
+ "truncate": true
45
+ }'
46
+ ```
__pycache__/app.cpython-312.pyc ADDED
Binary file (8.49 kB). View file
 
app.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from typing import Any
3
+
4
+ import numpy as np
5
+ import torch
6
+ from fastapi import FastAPI, HTTPException
7
+ from fastapi.responses import HTMLResponse
8
+ from pydantic import BaseModel, ConfigDict
9
+ from sentence_transformers import SentenceTransformer
10
+
11
+ torch.set_grad_enabled(False)
12
+ torch.set_num_threads(2)
13
+
14
+ APP_TITLE = "ollama-code-embed"
15
+ MODEL_ID = "jinaai/jina-code-embeddings-0.5b"
16
+ MODEL_NAME = "code-embed"
17
+ MODEL_CREATED_AT = "2026-03-11T00:00:00Z"
18
+ MODEL_DIMENSIONS = 896
19
+ SERVER_VERSION = "0.11.0"
20
+
21
+ app = FastAPI(title=APP_TITLE, version="1.0.0")
22
+ _model: SentenceTransformer | None = None
23
+ _loaded_at_ns: int | None = None
24
+ _load_duration_ns: int = 0
25
+
26
+
27
+ class CompatibleRequest(BaseModel):
28
+ model_config = ConfigDict(extra="allow")
29
+
30
+
31
+ class EmbedRequest(CompatibleRequest):
32
+ model: str = MODEL_NAME
33
+ input: str | list[str] | None = None
34
+ prompt: str | None = None
35
+ truncate: bool = True
36
+ dimensions: int | None = None
37
+ options: dict[str, Any] | None = None
38
+ keep_alive: str | int | None = None
39
+
40
+
41
+ def get_model() -> SentenceTransformer:
42
+ global _model, _loaded_at_ns, _load_duration_ns
43
+ if _model is None:
44
+ started = time.perf_counter_ns()
45
+ _model = SentenceTransformer(MODEL_ID, trust_remote_code=True, device="cpu")
46
+ _load_duration_ns = time.perf_counter_ns() - started
47
+ _loaded_at_ns = time.time_ns()
48
+ return _model
49
+
50
+
51
+ def normalize_inputs(request: EmbedRequest) -> list[str]:
52
+ if request.input is not None:
53
+ return request.input if isinstance(request.input, list) else [request.input]
54
+ if request.prompt is not None:
55
+ return [request.prompt]
56
+ raise HTTPException(status_code=400, detail="Request must include 'input' or 'prompt'")
57
+
58
+
59
+ def maybe_truncate(vector: np.ndarray, dimensions: int | None) -> np.ndarray:
60
+ if dimensions is None or dimensions <= 0 or dimensions >= vector.shape[0]:
61
+ return vector
62
+ truncated = vector[:dimensions]
63
+ norm = np.linalg.norm(truncated)
64
+ if norm > 0:
65
+ truncated = truncated / norm
66
+ return truncated
67
+
68
+
69
+ def estimate_prompt_eval_count(texts: list[str], model: SentenceTransformer) -> int:
70
+ tokenizer = getattr(model, "tokenizer", None)
71
+ if tokenizer is None:
72
+ return sum(max(1, len(text.split())) for text in texts)
73
+ return sum(len(tokenizer.encode(text, add_special_tokens=True)) for text in texts)
74
+
75
+
76
+ @app.get("/", response_class=HTMLResponse)
77
+ def root() -> str:
78
+ return f"""<!doctype html>
79
+ <html lang="en">
80
+ <head>
81
+ <meta charset="utf-8" />
82
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
83
+ <title>{APP_TITLE}</title>
84
+ <style>
85
+ body {{ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; margin: 32px; line-height: 1.45; }}
86
+ code {{ background: #f4f4f4; padding: 2px 6px; border-radius: 4px; }}
87
+ </style>
88
+ </head>
89
+ <body>
90
+ <h1>Ollama-Compatible Code Embeddings</h1>
91
+ <p>Model: <code>{MODEL_ID}</code></p>
92
+ <p>Served name: <code>{MODEL_NAME}</code></p>
93
+ <ul>
94
+ <li><code>GET /api/version</code></li>
95
+ <li><code>GET /api/tags</code></li>
96
+ <li><code>POST /api/embed</code></li>
97
+ <li><code>POST /api/embeddings</code></li>
98
+ <li><code>POST /embed</code></li>
99
+ </ul>
100
+ </body>
101
+ </html>"""
102
+
103
+
104
+ @app.get("/health")
105
+ def health() -> dict[str, float]:
106
+ return {"unix": time.time()}
107
+
108
+
109
+ @app.get("/api/version")
110
+ def api_version() -> dict[str, str]:
111
+ return {"version": SERVER_VERSION}
112
+
113
+
114
+ @app.get("/api/tags")
115
+ def api_tags() -> dict[str, Any]:
116
+ return {
117
+ "models": [
118
+ {
119
+ "name": MODEL_NAME,
120
+ "model": MODEL_NAME,
121
+ "modified_at": MODEL_CREATED_AT,
122
+ "size": 0,
123
+ "digest": MODEL_ID,
124
+ "details": {
125
+ "format": "sentence-transformers",
126
+ "family": "jina",
127
+ "families": ["jina", "embedding"],
128
+ "parameter_size": "0.5B",
129
+ "quantization_level": "F32",
130
+ },
131
+ }
132
+ ]
133
+ }
134
+
135
+
136
+ def embed_impl(request: EmbedRequest) -> dict[str, Any]:
137
+ if request.model not in {MODEL_NAME, MODEL_ID}:
138
+ raise HTTPException(status_code=404, detail=f"Model '{request.model}' not found")
139
+
140
+ texts = normalize_inputs(request)
141
+ model = get_model()
142
+ started = time.perf_counter_ns()
143
+ vectors = np.asarray(model.encode(texts, convert_to_numpy=True))
144
+ total_duration = time.perf_counter_ns() - started
145
+ payload = [maybe_truncate(vector, request.dimensions).astype(np.float32).tolist() for vector in vectors]
146
+ return {
147
+ "model": MODEL_NAME,
148
+ "embeddings": payload,
149
+ "total_duration": total_duration,
150
+ "load_duration": _load_duration_ns,
151
+ "prompt_eval_count": estimate_prompt_eval_count(texts, model),
152
+ }
153
+
154
+
155
+ @app.post("/api/embed")
156
+ @app.post("/embed")
157
+ def api_embed(request: EmbedRequest) -> dict[str, Any]:
158
+ return embed_impl(request)
159
+
160
+
161
+ @app.post("/api/embeddings")
162
+ def api_embeddings(request: EmbedRequest) -> dict[str, Any]:
163
+ result = embed_impl(request)
164
+ first = result["embeddings"][0] if result["embeddings"] else []
165
+ return {
166
+ "embedding": first,
167
+ "model": result["model"],
168
+ "total_duration": result["total_duration"],
169
+ "load_duration": result["load_duration"],
170
+ "prompt_eval_count": result["prompt_eval_count"],
171
+ }
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi>=0.116.0
2
+ numpy>=2.2.0
3
+ sentence-transformers>=5.1.0
4
+ torch>=2.8.0
5
+ transformers>=4.57.0
6
+ uvicorn[standard]>=0.35.0