Alibrown commited on
Commit
4ee3607
·
verified ·
1 Parent(s): 06b30ae

Upload 7 files

Browse files
Files changed (7) hide show
  1. Dockerfile +16 -0
  2. adi.py +247 -0
  3. main.py +198 -0
  4. model.py +179 -0
  5. requirements.txt +10 -0
  6. smollm.py +88 -0
  7. train.py +152 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && apt-get install -y \
6
+ build-essential \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ COPY requirements.txt .
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ COPY . .
13
+
14
+ EXPOSE 7860
15
+
16
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
adi.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =====================================================================================================
2
+ # Anti-Dump Algorithm (ADI)
3
+ # Copyright 2008 - 2025 S. Volkan Kücükbudak
4
+ # Apache License V2 + ESOL 1.1
5
+ # https://github.com/VolkanSah/Anti-Dump-Index
6
+ # =====================================================================================================
7
+
8
+ from dataclasses import dataclass
9
+ from typing import List, Dict, Tuple, Optional
10
+ import re
11
+ import numpy as np
12
+ import json
13
+ from pathlib import Path
14
+
15
+ @dataclass
16
+ class InputMetrics:
17
+ noise: float
18
+ effort: float
19
+ context: float
20
+ details: float
21
+ bonus_factors: float
22
+ penalty_factors: float
23
+ repetition_penalty: float = 0.0
24
+
25
+ class DumpindexAnalyzer:
26
+ def __init__(self, weights: Dict[str, float] = None, enable_logging: bool = False):
27
+ self.weights = weights or {
28
+ 'noise': 1.0,
29
+ 'effort': 2.0,
30
+ 'context': 1.5,
31
+ 'details': 1.5,
32
+ 'bonus': 0.5,
33
+ 'penalty': 1.0
34
+ }
35
+ self.enable_logging = enable_logging
36
+ self.log_file = Path('adi_logs.jsonl')
37
+
38
+ self.noise_patterns = {
39
+ 'urgency': r'\b(urgent|asap|emergency|!!+|\?\?+)\b',
40
+ 'informal': r'\b(pls|plz|thx|omg|wtf)\b',
41
+ 'vague': r'\b(something|somehow|maybe|probably)\b'
42
+ }
43
+ self.detail_patterns = {
44
+ 'code_elements': r'\b(function|class|method|variable|array|object|def|return)\b',
45
+ 'technical_terms': r'\b(error|exception|bug|issue|crash|fail|traceback|stack)\b',
46
+ 'specifics': r'[a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z_][a-zA-Z0-9_]*'
47
+ }
48
+ self.context_indicators = {
49
+ 'background': r'\b(because|since|as|when|while)\b',
50
+ 'environment': r'\b(using|version|environment|platform|system)\b',
51
+ 'goal': r'\b(trying to|want to|need to|goal is|attempting to)\b'
52
+ }
53
+
54
+ def _has_negation_before(self, text: str, match_pos: int, window_size: int = 50) -> bool:
55
+ window_start = max(0, match_pos - window_size)
56
+ window = text[window_start:match_pos].lower()
57
+ return bool(re.search(r'\b(no|not|never|without|dont|don\'t|doesnt|doesn\'t)\b', window))
58
+
59
+ def calculate_repetition_penalty(self, text: str) -> float:
60
+ words = text.lower().split()
61
+ if len(words) == 0:
62
+ return 0.0
63
+ unique_ratio = len(set(words)) / len(words)
64
+ word_counts = {}
65
+ for word in words:
66
+ if len(word) > 3:
67
+ word_counts[word] = word_counts.get(word, 0) + 1
68
+ max_repetition = max(word_counts.values()) if word_counts else 1
69
+ repetition_factor = min(max_repetition / len(words), 0.5)
70
+ penalty = (1 - unique_ratio) * 2 + repetition_factor * 2
71
+ return min(penalty, 3.0)
72
+
73
+ def calculate_noise(self, text: str) -> Tuple[float, Dict]:
74
+ noise_count = 0
75
+ noise_details = {}
76
+ for category, pattern in self.noise_patterns.items():
77
+ matches = re.findall(pattern, text.lower())
78
+ noise_count += len(matches)
79
+ noise_details[category] = matches
80
+ total_words = len(text.split())
81
+ return (noise_count / max(total_words, 1), noise_details)
82
+
83
+ def calculate_effort(self, text: str) -> float:
84
+ sentences = [s.strip() for s in re.split(r'[.!?]+', text) if s.strip()]
85
+ if not sentences:
86
+ return 0.0
87
+ avg_sentence_length = np.mean([len(s.split()) for s in sentences])
88
+ has_formatting = bool(re.search(r'```|\*\*|\n\s*\n', text))
89
+ has_punctuation = bool(re.search(r'[.,;:]', text))
90
+ sentence_quality = (
91
+ (len(sentences) >= 3) * 1.0 +
92
+ (20 <= avg_sentence_length <= 50) * 2.0 +
93
+ (avg_sentence_length >= 5) * 0.5
94
+ )
95
+ return min(5.0, sentence_quality + has_formatting * 1.5 + has_punctuation * 1.5)
96
+
97
+ def calculate_context(self, text: str) -> float:
98
+ context_score = 0.0
99
+ for category, pattern in self.context_indicators.items():
100
+ for match in re.finditer(pattern, text.lower()):
101
+ if not self._has_negation_before(text, match.start()):
102
+ context_score += 1.0
103
+ break
104
+ return min(5.0, context_score)
105
+
106
+ def calculate_details(self, text: str) -> Tuple[float, Dict]:
107
+ detail_score = 0.0
108
+ detail_findings = {}
109
+ for category, pattern in self.detail_patterns.items():
110
+ matches = re.findall(pattern, text.lower())
111
+ score = len(matches) * 0.5
112
+ detail_findings[category] = matches
113
+ detail_score += score
114
+ return (min(5.0, detail_score), detail_findings)
115
+
116
+ def calculate_bonus_factors(self, text: str) -> float:
117
+ bonus_score = 0.0
118
+ if re.search(r'```[\s\S]*?```', text):
119
+ bonus_score += 1.0
120
+ if re.search(r'\[.*?\]\(.*?\)', text):
121
+ bonus_score += 0.5
122
+ if re.search(r'\n\s*[-*+]\s', text):
123
+ bonus_score += 0.5
124
+ return bonus_score
125
+
126
+ def calculate_penalty_factors(self, text: str) -> Tuple[float, Dict]:
127
+ penalties = {}
128
+ alpha_chars = re.findall(r'[a-zA-Z]', text)
129
+ if alpha_chars:
130
+ caps_ratio = len(re.findall(r'[A-Z]', text)) / len(alpha_chars)
131
+ if caps_ratio > 0.7:
132
+ penalties['excessive_caps'] = caps_ratio
133
+ excessive_punctuation = len(re.findall(r'[!?]{2,}', text))
134
+ if excessive_punctuation:
135
+ penalties['excessive_punctuation'] = excessive_punctuation
136
+ if len(text.split()) < 10:
137
+ penalties['too_short'] = 1.0
138
+ penalty_score = sum(penalties.values()) if penalties else 0
139
+ return (min(5.0, penalty_score), penalties)
140
+
141
+ def calculate_adi(self, metrics: InputMetrics) -> float:
142
+ try:
143
+ numerator = (
144
+ self.weights['noise'] * metrics.noise -
145
+ (self.weights['effort'] * metrics.effort +
146
+ self.weights['bonus'] * metrics.bonus_factors)
147
+ )
148
+ denominator = (
149
+ self.weights['context'] * metrics.context +
150
+ self.weights['details'] * metrics.details +
151
+ self.weights['penalty'] * metrics.penalty_factors +
152
+ metrics.repetition_penalty
153
+ )
154
+ return numerator / max(denominator, 0.1)
155
+ except Exception as e:
156
+ return float('inf')
157
+
158
+ def analyze_input(self, text: str, user_context: Optional[Dict] = None) -> Dict:
159
+ noise_value, noise_details = self.calculate_noise(text)
160
+ effort_value = self.calculate_effort(text)
161
+ context_value = self.calculate_context(text)
162
+ details_value, detail_findings = self.calculate_details(text)
163
+ bonus_value = self.calculate_bonus_factors(text)
164
+ penalty_value, penalty_details = self.calculate_penalty_factors(text)
165
+ repetition_value = self.calculate_repetition_penalty(text)
166
+
167
+ metrics = InputMetrics(
168
+ noise=noise_value, effort=effort_value, context=context_value,
169
+ details=details_value, bonus_factors=bonus_value,
170
+ penalty_factors=penalty_value, repetition_penalty=repetition_value
171
+ )
172
+ adi = self.calculate_adi(metrics)
173
+
174
+ adi_adjusted = adi
175
+ if user_context:
176
+ if user_context.get('tier') == 'enterprise':
177
+ adi_adjusted *= 0.8
178
+ if user_context.get('history_avg', 0) < 0:
179
+ adi_adjusted *= 0.9
180
+
181
+ decision = self._make_decision(adi_adjusted)
182
+ recommendations = self._generate_recommendations(
183
+ metrics, noise_details, detail_findings, penalty_details
184
+ )
185
+
186
+ return {
187
+ 'adi': round(adi, 3),
188
+ 'adi_adjusted': round(adi_adjusted, 3) if user_context else None,
189
+ 'metrics': {
190
+ 'noise': round(noise_value, 3), 'effort': round(effort_value, 3),
191
+ 'context': round(context_value, 3), 'details': round(details_value, 3),
192
+ 'bonus_factors': round(bonus_value, 3), 'penalty_factors': round(penalty_value, 3),
193
+ 'repetition_penalty': round(repetition_value, 3)
194
+ },
195
+ 'decision': decision,
196
+ 'recommendations': recommendations,
197
+ 'details': {
198
+ 'noise_findings': noise_details,
199
+ 'technical_details': detail_findings,
200
+ 'penalties': penalty_details
201
+ }
202
+ }
203
+
204
+ def _make_decision(self, adi: float) -> str:
205
+ if adi > 1:
206
+ return "REJECT"
207
+ elif 0 <= adi <= 1:
208
+ return "MEDIUM_PRIORITY"
209
+ else:
210
+ return "HIGH_PRIORITY"
211
+
212
+ def _generate_recommendations(self, metrics, noise_details, detail_findings, penalty_details):
213
+ recommendations = []
214
+ if metrics.noise > 0.3:
215
+ recommendations.append("Reduce informal or urgent expressions.")
216
+ if metrics.context < 1.0:
217
+ recommendations.append("Provide more context (environment, background, goal).")
218
+ if metrics.details < 1.0:
219
+ recommendations.append("Include specific technical details or error messages.")
220
+ if metrics.effort < 2.0:
221
+ recommendations.append("Improve the structure of your input with proper sentences.")
222
+ if metrics.repetition_penalty > 1.0:
223
+ recommendations.append("Avoid repeating the same keywords excessively.")
224
+ if metrics.penalty_factors > 0:
225
+ if 'excessive_caps' in penalty_details:
226
+ recommendations.append("Avoid excessive capitalization.")
227
+ if 'excessive_punctuation' in penalty_details:
228
+ recommendations.append("Reduce excessive punctuation marks.")
229
+ if 'too_short' in penalty_details:
230
+ recommendations.append("Provide a more detailed description (minimum 10 words).")
231
+ if not recommendations:
232
+ recommendations.append("Your input quality is excellent. No improvements needed.")
233
+ return recommendations
234
+
235
+ def _log_analysis(self, text: str, adi: float, metrics: InputMetrics):
236
+ log_entry = {
237
+ 'text_hash': hash(text), 'text_length': len(text), 'adi': round(adi, 3),
238
+ 'metrics': {
239
+ 'noise': round(metrics.noise, 3), 'effort': round(metrics.effort, 3),
240
+ 'context': round(metrics.context, 3), 'details': round(metrics.details, 3),
241
+ 'bonus_factors': round(metrics.bonus_factors, 3),
242
+ 'penalty_factors': round(metrics.penalty_factors, 3),
243
+ 'repetition_penalty': round(metrics.repetition_penalty, 3)
244
+ }
245
+ }
246
+ with open(self.log_file, 'a') as f:
247
+ f.write(json.dumps(log_entry) + '\n')
main.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =============================================================================
2
+ # main.py
3
+ # FastAPI — OpenAI-compatible /v1/chat/completions endpoint
4
+ # SmolLM2 Service Space
5
+ # Copyright 2026 - Volkan Kücükbudak
6
+ # Apache License V2 + ESOL 1.1
7
+ # =============================================================================
8
+ # Hub connects via:
9
+ # base_url = "https://codey-lab-smollm-service.hf.space/v1"
10
+ # → POST /v1/chat/completions (OpenAI-compatible)
11
+ # → GET /v1/health (status check)
12
+ # =============================================================================
13
+
14
+ import logging
15
+ import time
16
+ import uuid
17
+ from contextlib import asynccontextmanager
18
+
19
+ from fastapi import FastAPI, HTTPException, Request
20
+ from fastapi.responses import JSONResponse
21
+ from pydantic import BaseModel
22
+ from typing import List, Optional
23
+
24
+ import smollm
25
+ import model as model_module
26
+ from adi import DumpindexAnalyzer
27
+
28
+ logging.basicConfig(
29
+ level=logging.INFO,
30
+ format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
31
+ )
32
+ logger = logging.getLogger("main")
33
+
34
+ # ── ADI ───────────────────────────────────────────────────────────────────────
35
+ adi_analyzer = DumpindexAnalyzer(enable_logging=False)
36
+
37
+
38
+ # ── Startup ───────────────────────────────────────────────────────────────────
39
+ @asynccontextmanager
40
+ async def lifespan(app: FastAPI):
41
+ logger.info("=== SmolLM2 Service starting ===")
42
+ logger.info(f"Model config: {model_module.status()}")
43
+ smollm.load() # preload model on startup
44
+ yield
45
+ logger.info("=== SmolLM2 Service stopped ===")
46
+
47
+ app = FastAPI(title="SmolLM2 Service", version="1.0.0", lifespan=lifespan)
48
+
49
+
50
+ # =============================================================================
51
+ # Request / Response Models (OpenAI-compatible)
52
+ # =============================================================================
53
+
54
+ class Message(BaseModel):
55
+ role: str
56
+ content: str
57
+
58
+ class ChatCompletionRequest(BaseModel):
59
+ model: Optional[str] = "smollm2-360m"
60
+ messages: List[Message]
61
+ max_tokens: Optional[int] = 150
62
+ temperature: Optional[float] = 0.2
63
+ stream: Optional[bool] = False
64
+
65
+ class ChatCompletionResponse(BaseModel):
66
+ id: str
67
+ object: str = "chat.completion"
68
+ created: int
69
+ model: str
70
+ choices: List[dict]
71
+ adi: Optional[dict] = None # ADI result attached to response
72
+
73
+
74
+ # =============================================================================
75
+ # Routes
76
+ # =============================================================================
77
+
78
+ @app.get("/")
79
+ async def root():
80
+ return {
81
+ "service": "SmolLM2 Service",
82
+ "model": smollm.device_info(),
83
+ "ready": smollm.is_ready(),
84
+ "docs": "/docs",
85
+ }
86
+
87
+
88
+ @app.get("/v1/health")
89
+ async def health():
90
+ return {
91
+ "status": "ok" if smollm.is_ready() else "loading",
92
+ "device": smollm.device_info(),
93
+ "model": model_module.status(),
94
+ }
95
+
96
+
97
+ @app.post("/v1/chat/completions")
98
+ async def chat_completions(req: ChatCompletionRequest):
99
+ if not req.messages:
100
+ raise HTTPException(status_code=400, detail="messages cannot be empty")
101
+
102
+ # ── Extract prompt + system prompt ────────────────────────────────────────
103
+ system_prompt = ""
104
+ user_prompt = ""
105
+
106
+ for msg in req.messages:
107
+ if msg.role == "system":
108
+ system_prompt = msg.content
109
+ elif msg.role == "user":
110
+ user_prompt = msg.content
111
+
112
+ if not user_prompt:
113
+ raise HTTPException(status_code=400, detail="No user message found")
114
+
115
+ # ── ADI Analysis ──────────────────────────────────────────────────────────
116
+ adi_result = adi_analyzer.analyze_input(user_prompt)
117
+ decision = adi_result["decision"]
118
+ logger.info(f"ADI | decision: {decision} | score: {adi_result['adi']}")
119
+
120
+ # ── Route by ADI decision ─────────────────────────────────────────────────
121
+ if decision == "REJECT":
122
+ logger.info("ADI → REJECT: returning rejection response")
123
+ response_text = (
124
+ "Your request needs more detail before I can help. "
125
+ "Suggestions: " + " | ".join(adi_result["recommendations"])
126
+ )
127
+ # Log to dataset
128
+ model_module.push_log({
129
+ "prompt": user_prompt,
130
+ "system_prompt": system_prompt,
131
+ "adi_score": adi_result["adi"],
132
+ "adi_decision": decision,
133
+ "adi_metrics": adi_result["metrics"],
134
+ "response": None,
135
+ "routed_to": "REJECT",
136
+ "model": req.model,
137
+ })
138
+ return _build_response(req.model, response_text, adi_result)
139
+
140
+ # ── SmolLM2 Inference ─────────────────────────────────────────────────────
141
+ try:
142
+ response_text = await smollm.complete(
143
+ prompt=user_prompt,
144
+ system_prompt=system_prompt,
145
+ max_tokens=req.max_tokens,
146
+ temperature=req.temperature,
147
+ )
148
+ routed_to = "smollm2"
149
+ logger.info(f"SmolLM2 response ok | decision: {decision}")
150
+
151
+ except Exception as e:
152
+ logger.warning(f"SmolLM2 failed: {type(e).__name__} — triggering hub fallback")
153
+ # Return 503 so hub's fallback chain kicks in
154
+ raise HTTPException(
155
+ status_code=503,
156
+ detail={
157
+ "error": "smollm_unavailable",
158
+ "adi_decision": decision,
159
+ "message": "Route to next provider in fallback chain",
160
+ }
161
+ )
162
+
163
+ # ── Log to Dataset ────────────────────────────────────────────────────────
164
+ model_module.push_log({
165
+ "prompt": user_prompt,
166
+ "system_prompt": system_prompt,
167
+ "adi_score": adi_result["adi"],
168
+ "adi_decision": decision,
169
+ "adi_metrics": adi_result["metrics"],
170
+ "response": response_text,
171
+ "routed_to": routed_to,
172
+ "model": req.model,
173
+ })
174
+
175
+ return _build_response(req.model, response_text, adi_result)
176
+
177
+
178
+ # =============================================================================
179
+ # Helpers
180
+ # =============================================================================
181
+
182
+ def _build_response(model: str, content: str, adi_result: dict) -> dict:
183
+ return {
184
+ "id": f"smollm-{uuid.uuid4().hex[:8]}",
185
+ "object": "chat.completion",
186
+ "created": int(time.time()),
187
+ "model": model,
188
+ "choices": [{
189
+ "index": 0,
190
+ "message": {"role": "assistant", "content": content},
191
+ "finish_reason": "stop",
192
+ }],
193
+ "adi": {
194
+ "score": adi_result["adi"],
195
+ "decision": adi_result["decision"],
196
+ "metrics": adi_result["metrics"],
197
+ }
198
+ }
model.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =============================================================================
2
+ # model.py
3
+ # HuggingFace Model + Dataset Access Layer
4
+ # SmolLM2 Service Space
5
+ # Copyright 2026 - Volkan Kücükbudak
6
+ # Apache License V2 + ESOL 1.1
7
+ # =============================================================================
8
+ # Handles:
9
+ # - Model loading (SmolLM2 from HF or private repo)
10
+ # - Dataset read/write (private HF dataset)
11
+ # - Token resolution (HF_TOKEN → TEST_TOKEN → None)
12
+ # =============================================================================
13
+
14
+ import os
15
+ import logging
16
+ from datetime import datetime
17
+ from typing import Optional
18
+ from huggingface_hub import HfApi, login
19
+ from datasets import load_dataset, Dataset
20
+
21
+ logger = logging.getLogger("model")
22
+
23
+ # ── Token Resolution ──────────────────────────────────────────────────────────
24
+ TOKEN = (
25
+ os.environ.get("HF_TOKEN") or
26
+ os.environ.get("TEST_TOKEN") or
27
+ os.environ.get("HUGGINGFACE_TOKEN") or
28
+ os.environ.get("HF_API_TOKEN") or
29
+ None
30
+ )
31
+
32
+ # ── Config from ENV ───────────────────────────────────────────────────────────
33
+ MODEL_REPO = os.environ.get("MODEL_REPO", "HuggingFaceTB/SmolLM2-360M-Instruct")
34
+ DATASET_REPO = os.environ.get("DATASET_REPO", "codey-lab/data.universal-mcp-hub")
35
+ PRIVATE_MODEL = os.environ.get("PRIVATE_MODEL_REPO", "codey-lab/model.universal-mcp-hub")
36
+
37
+ # ── HF API ────────────────────────────────────────────────────────────────────
38
+ _api: Optional[HfApi] = None
39
+
40
+ def get_api() -> Optional[HfApi]:
41
+ """Returns authenticated HfApi instance or None if no token."""
42
+ global _api
43
+ if _api is None and TOKEN:
44
+ try:
45
+ login(token=TOKEN, add_to_git_credential=False)
46
+ _api = HfApi(token=TOKEN)
47
+ logger.info("HF API authenticated")
48
+ except Exception as e:
49
+ logger.warning(f"HF API auth failed: {type(e).__name__} — running unauthenticated")
50
+ return _api
51
+
52
+
53
+ # =============================================================================
54
+ # Model Access
55
+ # =============================================================================
56
+
57
+ def get_model_id() -> str:
58
+ """
59
+ Returns model ID to load.
60
+ Prefers private fine-tuned model if available, falls back to base model.
61
+ """
62
+ api = get_api()
63
+ if api and PRIVATE_MODEL:
64
+ try:
65
+ api.model_info(PRIVATE_MODEL, token=TOKEN)
66
+ logger.info(f"Using private model: {PRIVATE_MODEL}")
67
+ return PRIVATE_MODEL
68
+ except Exception:
69
+ logger.info(f"Private model not ready — using base: {MODEL_REPO}")
70
+ return MODEL_REPO
71
+
72
+
73
+ def get_model_kwargs() -> dict:
74
+ """Returns kwargs for from_pretrained() calls."""
75
+ kwargs = {}
76
+ if TOKEN:
77
+ kwargs["token"] = TOKEN
78
+ return kwargs
79
+
80
+
81
+ # =============================================================================
82
+ # Dataset Access
83
+ # =============================================================================
84
+
85
+ def load_logs() -> list:
86
+ """
87
+ Load existing log entries from HF Dataset.
88
+ Returns empty list if dataset doesn't exist yet.
89
+ """
90
+ if not TOKEN:
91
+ logger.warning("No token — dataset read skipped")
92
+ return []
93
+ try:
94
+ ds = load_dataset(DATASET_REPO, split="train", token=TOKEN)
95
+ return ds.to_list()
96
+ except Exception as e:
97
+ logger.info(f"Dataset load: {type(e).__name__} — starting fresh")
98
+ return []
99
+
100
+
101
+ def push_log(entry: dict) -> bool:
102
+ """
103
+ Append a log entry to HF Dataset and push.
104
+
105
+ Args:
106
+ entry: dict with prompt, adi, response, model, timestamp etc.
107
+
108
+ Returns:
109
+ True on success, False on failure.
110
+ """
111
+ if not TOKEN:
112
+ logger.warning("No token — dataset push skipped")
113
+ return False
114
+ try:
115
+ existing = load_logs()
116
+ entry["timestamp"] = datetime.utcnow().isoformat()
117
+ existing.append(entry)
118
+ ds = Dataset.from_list(existing)
119
+ ds.push_to_hub(DATASET_REPO, token=TOKEN, private=True)
120
+ logger.info(f"Dataset updated — total entries: {len(existing)}")
121
+ return True
122
+ except Exception as e:
123
+ logger.warning(f"Dataset push failed: {type(e).__name__}: {e}")
124
+ return False
125
+
126
+
127
+ def push_model_card(info: dict) -> bool:
128
+ """
129
+ Update model card / metadata in private model repo.
130
+ Useful for tracking which weights/config is deployed.
131
+ """
132
+ api = get_api()
133
+ if not api:
134
+ return False
135
+ try:
136
+ content = f"""---
137
+ language: en
138
+ license: apache-2.0
139
+ base_model: {MODEL_REPO}
140
+ ---
141
+
142
+ # SmolLM2 Service
143
+
144
+ Base: `{MODEL_REPO}`
145
+ Dataset: `{DATASET_REPO}`
146
+ Last updated: {datetime.utcnow().isoformat()}
147
+
148
+ ## Config
149
+ ```json
150
+ {info}
151
+ ```
152
+ """
153
+ api.upload_file(
154
+ path_or_fileobj=content.encode(),
155
+ path_in_repo="README.md",
156
+ repo_id=PRIVATE_MODEL,
157
+ repo_type="model",
158
+ token=TOKEN,
159
+ )
160
+ logger.info(f"Model card updated: {PRIVATE_MODEL}")
161
+ return True
162
+ except Exception as e:
163
+ logger.warning(f"Model card update failed: {type(e).__name__}: {e}")
164
+ return False
165
+
166
+
167
+ # =============================================================================
168
+ # Health
169
+ # =============================================================================
170
+
171
+ def status() -> dict:
172
+ """Returns model/dataset config status for health endpoint."""
173
+ return {
174
+ "token": "set" if TOKEN else "missing",
175
+ "model_repo": MODEL_REPO,
176
+ "private_model": PRIVATE_MODEL,
177
+ "dataset_repo": DATASET_REPO,
178
+ "hf_api": "authenticated" if get_api() else "unauthenticated",
179
+ }
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.115.0
2
+ uvicorn==0.30.6
3
+ transformers==4.46.0
4
+ torch==2.4.1
5
+ accelerate==0.34.2
6
+ numpy==1.26.4
7
+ huggingface_hub==0.25.0
8
+ datasets==3.0.1
9
+ pydantic==2.9.2
10
+ httpx==0.27.2
smollm.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =============================================================================
2
+ # smollm.py
3
+ # SmolLM2 Inference Engine
4
+ # SmolLM2 Service Space
5
+ # Copyright 2026 - Volkan Kücükbudak
6
+ # Apache License V2 + ESOL 1.1
7
+ # =============================================================================
8
+
9
+ import logging
10
+ import torch
11
+ from typing import Optional
12
+ import model as model_module
13
+
14
+ logger = logging.getLogger("smollm")
15
+
16
+ _tokenizer = None
17
+ _model = None
18
+ _device = None
19
+
20
+
21
+ def load():
22
+ """Lazy model loader — called on first request."""
23
+ global _tokenizer, _model, _device
24
+
25
+ if _model is not None:
26
+ return
27
+
28
+ from transformers import AutoModelForCausalLM, AutoTokenizer
29
+
30
+ model_id = model_module.get_model_id()
31
+ kwargs = model_module.get_model_kwargs()
32
+ _device = "cuda" if torch.cuda.is_available() else "cpu"
33
+
34
+ logger.info(f"Loading {model_id} on {_device}...")
35
+ _tokenizer = AutoTokenizer.from_pretrained(model_id, **kwargs)
36
+ _model = AutoModelForCausalLM.from_pretrained(model_id, **kwargs).to(_device)
37
+ logger.info(f"Model ready [{_device}]")
38
+
39
+ # Update model card on startup
40
+ model_module.push_model_card({
41
+ "model_id": model_id,
42
+ "device": _device,
43
+ })
44
+
45
+
46
+ async def complete(
47
+ prompt: str,
48
+ system_prompt: str = "",
49
+ max_tokens: int = 150,
50
+ temperature: float = 0.2,
51
+ ) -> str:
52
+ """
53
+ Run SmolLM2 inference.
54
+
55
+ Returns:
56
+ Generated text string.
57
+ Raises:
58
+ RuntimeError on inference failure.
59
+ """
60
+ load()
61
+
62
+ messages = []
63
+ if system_prompt.strip():
64
+ messages.append({"role": "system", "content": system_prompt})
65
+ messages.append({"role": "user", "content": prompt})
66
+
67
+ text = _tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
68
+ inputs = _tokenizer.encode(text, return_tensors="pt").to(_device)
69
+
70
+ with torch.no_grad():
71
+ outputs = _model.generate(
72
+ inputs,
73
+ max_new_tokens=max_tokens,
74
+ temperature=temperature if temperature > 0 else None,
75
+ do_sample=temperature > 0,
76
+ top_p=0.9 if temperature > 0 else None,
77
+ pad_token_id=_tokenizer.eos_token_id,
78
+ )
79
+
80
+ new_tokens = outputs[0][inputs.shape[-1]:]
81
+ return _tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
82
+
83
+
84
+ def is_ready() -> bool:
85
+ return _model is not None
86
+
87
+ def device_info() -> str:
88
+ return _device or "not loaded"
train.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =============================================================================
2
+ # train.py
3
+ # Dataset Preparation + Finetuning Entry Point
4
+ # SmolLM2 Service Space
5
+ # Copyright 2026 - Volkan Kücükbudak
6
+ # Apache License V2 + ESOL 1.1
7
+ # =============================================================================
8
+ # Usage:
9
+ # python train.py --mode export → export HF dataset to training format
10
+ # python train.py --mode validate → validate ADI weights against dataset
11
+ # python train.py --mode finetune → finetune SmolLM2 on collected data (future)
12
+ # =============================================================================
13
+
14
+ import argparse
15
+ import json
16
+ import logging
17
+ from datetime import datetime
18
+ from pathlib import Path
19
+
20
+ import model as model_module
21
+ from adi import DumpindexAnalyzer
22
+
23
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
24
+ logger = logging.getLogger("train")
25
+
26
+
27
+ # =============================================================================
28
+ # Mode 1 — Export dataset to training format
29
+ # =============================================================================
30
+
31
+ def export_dataset(output_path: str = "train_data.jsonl"):
32
+ """
33
+ Export HF dataset logs to JSONL format for training.
34
+ Filters: only HIGH_PRIORITY and MEDIUM_PRIORITY entries with actual responses.
35
+ """
36
+ logger.info("Loading dataset from HF...")
37
+ entries = model_module.load_logs()
38
+
39
+ if not entries:
40
+ logger.warning("Dataset empty — nothing to export")
41
+ return
42
+
43
+ output = Path(output_path)
44
+ count = 0
45
+
46
+ with open(output, "w") as f:
47
+ for entry in entries:
48
+ # Only export entries where SmolLM2 actually responded
49
+ if entry.get("adi_decision") == "REJECT":
50
+ continue
51
+ if not entry.get("response"):
52
+ continue
53
+
54
+ # Format as instruction tuning pair
55
+ record = {
56
+ "instruction": entry.get("system_prompt", "You are a helpful assistant."),
57
+ "input": entry.get("prompt", ""),
58
+ "output": entry.get("response", ""),
59
+ "adi_score": entry.get("adi_score"),
60
+ "adi_decision": entry.get("adi_decision"),
61
+ }
62
+ f.write(json.dumps(record) + "\n")
63
+ count += 1
64
+
65
+ logger.info(f"Exported {count}/{len(entries)} entries → {output}")
66
+
67
+
68
+ # =============================================================================
69
+ # Mode 2 — Validate ADI weights against collected data
70
+ # =============================================================================
71
+
72
+ def validate_adi():
73
+ """
74
+ Run ADI weight validation against dataset.
75
+ Uses entries that have human_label field (manually labeled).
76
+ """
77
+ logger.info("Loading dataset for ADI validation...")
78
+ entries = model_module.load_logs()
79
+
80
+ labeled = [(e["prompt"], e["human_label"]) for e in entries if e.get("human_label")]
81
+
82
+ if not labeled:
83
+ logger.warning("No labeled entries found — add 'human_label' field to dataset entries")
84
+ logger.info("Expected labels: REJECT | MEDIUM_PRIORITY | HIGH_PRIORITY")
85
+ return
86
+
87
+ analyzer = DumpindexAnalyzer()
88
+ accuracy = analyzer.validate_weights(labeled)
89
+ logger.info(f"ADI Validation accuracy: {accuracy:.1%} on {len(labeled)} samples")
90
+
91
+ # Save results
92
+ result = {
93
+ "timestamp": datetime.utcnow().isoformat(),
94
+ "accuracy": accuracy,
95
+ "samples": len(labeled),
96
+ "weights": analyzer.weights,
97
+ }
98
+ Path("validation_results.json").write_text(json.dumps(result, indent=2))
99
+ logger.info("Results saved → validation_results.json")
100
+
101
+
102
+ # =============================================================================
103
+ # Mode 3 — Finetune placeholder
104
+ # =============================================================================
105
+
106
+ def finetune():
107
+ """
108
+ Finetune SmolLM2 on collected dataset.
109
+ Placeholder — requires export first + enough data (>500 samples recommended).
110
+ """
111
+ train_file = Path("train_data.jsonl")
112
+ if not train_file.exists():
113
+ logger.error("train_data.jsonl not found — run: python train.py --mode export first")
114
+ return
115
+
116
+ lines = train_file.read_text().strip().splitlines()
117
+ logger.info(f"Training samples available: {len(lines)}")
118
+
119
+ if len(lines) < 100:
120
+ logger.warning(f"Only {len(lines)} samples — recommend 500+ for meaningful finetuning")
121
+
122
+ # TODO: implement finetuning with transformers Trainer
123
+ # Rough plan:
124
+ # 1. Load base model via model.get_model_id()
125
+ # 2. Tokenize train_data.jsonl
126
+ # 3. TrainingArguments + Trainer
127
+ # 4. Save to PRIVATE_MODEL repo via model.push_model_card()
128
+ logger.info("Finetune placeholder — not yet implemented")
129
+ logger.info("Next step: implement with transformers.Trainer or TRL SFTTrainer")
130
+
131
+
132
+ # =============================================================================
133
+ # CLI
134
+ # =============================================================================
135
+
136
+ if __name__ == "__main__":
137
+ parser = argparse.ArgumentParser(description="SmolLM2 Training Utilities")
138
+ parser.add_argument(
139
+ "--mode",
140
+ choices=["export", "validate", "finetune"],
141
+ required=True,
142
+ help="export: dump dataset to JSONL | validate: test ADI weights | finetune: train model"
143
+ )
144
+ parser.add_argument("--output", default="train_data.jsonl", help="Output file for export mode")
145
+ args = parser.parse_args()
146
+
147
+ if args.mode == "export":
148
+ export_dataset(args.output)
149
+ elif args.mode == "validate":
150
+ validate_adi()
151
+ elif args.mode == "finetune":
152
+ finetune()