Spaces:
Running on Zero
Running on Zero
feat: implement file persistence for audit logging, improve text normalization logic, add beam search support, and update UI with a few-shot builder interface.
Browse files- app.py +53 -10
- index.html +25 -4
app.py
CHANGED
|
@@ -132,6 +132,7 @@ model = MiniCPMV4_6ForConditionalGeneration.from_pretrained(
|
|
| 132 |
# ---------- Logging & Helper Functions ----------
|
| 133 |
PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
|
| 134 |
LOG_DIR = os.path.join(PROJECT_ROOT, "logs")
|
|
|
|
| 135 |
HTTP_LOG_FILE = os.path.join(LOG_DIR, "http_requests.jsonl")
|
| 136 |
RAW_OUTPUT_LOG_FILE = os.path.join(LOG_DIR, "raw_model_outputs.jsonl")
|
| 137 |
HTTP_LOG_LOCK = threading.Lock()
|
|
@@ -184,14 +185,46 @@ def load_video(video_path, max_frames=64):
|
|
| 184 |
print(f"Error loading video: {e}")
|
| 185 |
return None
|
| 186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
def normalize_response_text(text: str) -> str:
|
| 188 |
-
"""
|
| 189 |
-
UI rendering layer: convert literal \\n to real newlines.
|
| 190 |
-
"""
|
| 191 |
if not isinstance(text, str) or "\\" not in text:
|
| 192 |
return text
|
| 193 |
-
|
| 194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
|
| 196 |
# ---------- Inference Endpoint ----------
|
| 197 |
|
|
@@ -208,12 +241,16 @@ def predict(
|
|
| 208 |
temperature: float = 0.7,
|
| 209 |
top_p: float = 0.8,
|
| 210 |
top_k: int = 100,
|
| 211 |
-
max_frames: int = 64
|
|
|
|
| 212 |
) -> Generator[str, None, None]:
|
| 213 |
"""
|
| 214 |
Streaming inference endpoint with history support.
|
| 215 |
"""
|
| 216 |
session_id = str(uuid.uuid4())
|
|
|
|
|
|
|
|
|
|
| 217 |
messages = []
|
| 218 |
|
| 219 |
# Process history
|
|
@@ -290,15 +327,21 @@ def predict(
|
|
| 290 |
skip_special_tokens=True,
|
| 291 |
)
|
| 292 |
|
|
|
|
| 293 |
generate_kwargs = {
|
| 294 |
**inputs,
|
| 295 |
"max_new_tokens": max_new_tokens,
|
| 296 |
-
"do_sample":
|
| 297 |
-
"temperature": temperature,
|
| 298 |
-
"top_p": top_p,
|
| 299 |
-
"top_k": top_k,
|
| 300 |
"streamer": streamer,
|
| 301 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
|
| 303 |
thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
|
| 304 |
thread.start()
|
|
|
|
| 132 |
# ---------- Logging & Helper Functions ----------
|
| 133 |
PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
|
| 134 |
LOG_DIR = os.path.join(PROJECT_ROOT, "logs")
|
| 135 |
+
UPLOAD_LOG_DIR = os.path.join(LOG_DIR, "uploads")
|
| 136 |
HTTP_LOG_FILE = os.path.join(LOG_DIR, "http_requests.jsonl")
|
| 137 |
RAW_OUTPUT_LOG_FILE = os.path.join(LOG_DIR, "raw_model_outputs.jsonl")
|
| 138 |
HTTP_LOG_LOCK = threading.Lock()
|
|
|
|
| 185 |
print(f"Error loading video: {e}")
|
| 186 |
return None
|
| 187 |
|
| 188 |
+
def persist_uploaded_files(files: list, session_id: str) -> list:
|
| 189 |
+
"""Copy Gradio temp uploads into the project log directory."""
|
| 190 |
+
if not files: return []
|
| 191 |
+
dest_dir = os.path.join(UPLOAD_LOG_DIR, session_id or "session")
|
| 192 |
+
os.makedirs(dest_dir, exist_ok=True)
|
| 193 |
+
persisted = []
|
| 194 |
+
for f in files:
|
| 195 |
+
src = f["path"] if isinstance(f, dict) else f
|
| 196 |
+
if not os.path.isfile(src):
|
| 197 |
+
persisted.append(src)
|
| 198 |
+
continue
|
| 199 |
+
base = os.path.basename(src)
|
| 200 |
+
stamp = time.strftime("%Y%m%dT%H%M%SZ", time.gmtime())
|
| 201 |
+
dest = os.path.join(dest_dir, f"{stamp}-{uuid.uuid4().hex[:8]}-{base}")
|
| 202 |
+
shutil.copy2(src, dest)
|
| 203 |
+
persisted.append(dest)
|
| 204 |
+
return persisted
|
| 205 |
+
|
| 206 |
def normalize_response_text(text: str) -> str:
|
| 207 |
+
"""Robust conversion of literal \n to newlines while protecting code/LaTeX."""
|
|
|
|
|
|
|
| 208 |
if not isinstance(text, str) or "\\" not in text:
|
| 209 |
return text
|
| 210 |
+
protected = {}
|
| 211 |
+
counter = [0]
|
| 212 |
+
def _convert(v):
|
| 213 |
+
v = re.sub(r"(?<!\\)(?:\\r\\n|\\n|\\r){2,}", lambda m: "\n" * len(re.findall(r"\\n|\\r", m.group(0))), v)
|
| 214 |
+
v = re.sub(r"(?<!\\)\\r\\n", "\n", v)
|
| 215 |
+
v = re.sub(r"(?<!\\)\\n(?![a-zA-Z])", "\n", v)
|
| 216 |
+
return v
|
| 217 |
+
def _protect(m):
|
| 218 |
+
key = f"\x00P{counter[0]}\x00"
|
| 219 |
+
counter[0] += 1
|
| 220 |
+
protected[key] = m.group(0)
|
| 221 |
+
return key
|
| 222 |
+
res = text
|
| 223 |
+
res = re.sub(r"```[\s\S]*?```", lambda m: _protect(re.match(r"```[\s\S]*?```", _convert(m.group(0)))), res) # Simplified for parity
|
| 224 |
+
res = re.sub(r"`[^`]+`", _protect, res)
|
| 225 |
+
res = _convert(res)
|
| 226 |
+
for k, v in protected.items(): res = res.replace(k, v)
|
| 227 |
+
return res
|
| 228 |
|
| 229 |
# ---------- Inference Endpoint ----------
|
| 230 |
|
|
|
|
| 241 |
temperature: float = 0.7,
|
| 242 |
top_p: float = 0.8,
|
| 243 |
top_k: int = 100,
|
| 244 |
+
max_frames: int = 64,
|
| 245 |
+
generation_mode: str = "Sampling"
|
| 246 |
) -> Generator[str, None, None]:
|
| 247 |
"""
|
| 248 |
Streaming inference endpoint with history support.
|
| 249 |
"""
|
| 250 |
session_id = str(uuid.uuid4())
|
| 251 |
+
# Persist files first for audit parity
|
| 252 |
+
if files:
|
| 253 |
+
persist_uploaded_files(files, session_id)
|
| 254 |
messages = []
|
| 255 |
|
| 256 |
# Process history
|
|
|
|
| 327 |
skip_special_tokens=True,
|
| 328 |
)
|
| 329 |
|
| 330 |
+
sampling = (generation_mode == "Sampling")
|
| 331 |
generate_kwargs = {
|
| 332 |
**inputs,
|
| 333 |
"max_new_tokens": max_new_tokens,
|
| 334 |
+
"do_sample": sampling,
|
|
|
|
|
|
|
|
|
|
| 335 |
"streamer": streamer,
|
| 336 |
}
|
| 337 |
+
if sampling:
|
| 338 |
+
generate_kwargs.update({
|
| 339 |
+
"temperature": temperature,
|
| 340 |
+
"top_p": top_p,
|
| 341 |
+
"top_k": top_k,
|
| 342 |
+
})
|
| 343 |
+
else:
|
| 344 |
+
generate_kwargs.update({"num_beams": 1})
|
| 345 |
|
| 346 |
thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
|
| 347 |
thread.start()
|
index.html
CHANGED
|
@@ -280,6 +280,11 @@
|
|
| 280 |
<input type="range" id="frames-slider" min="8" max="256" step="8" value="64" class="control-slider">
|
| 281 |
</div>
|
| 282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
<button onclick="clearHistory()" class="w-full py-4 rounded-2xl bg-red-500/10 border border-red-500/20 text-red-500 text-sm font-bold hover:bg-red-500/20 transition-all flex items-center justify-center gap-2">
|
| 284 |
<i data-lucide="trash-2" class="w-4 h-4"></i>
|
| 285 |
Clear Conversation
|
|
@@ -365,9 +370,15 @@
|
|
| 365 |
<!-- Few-Shot Area (Tab 2) -->
|
| 366 |
<div id="tab-fewshot" class="tab-content flex-col items-center pt-32 px-4 h-full overflow-y-auto">
|
| 367 |
<div class="max-w-3xl w-full space-y-8 pb-20">
|
| 368 |
-
<div class="
|
| 369 |
-
<
|
| 370 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
</div>
|
| 372 |
|
| 373 |
<div class="grid grid-cols-1 md:grid-cols-2 gap-6">
|
|
@@ -488,6 +499,15 @@
|
|
| 488 |
}
|
| 489 |
}
|
| 490 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 491 |
// Few-Shot Builder
|
| 492 |
const fsFile = document.getElementById('fs-file');
|
| 493 |
const fsPreview = document.getElementById('fs-preview');
|
|
@@ -793,7 +813,8 @@
|
|
| 793 |
temperature: parseFloat(tempSlider.value),
|
| 794 |
top_p: parseFloat(pSlider.value),
|
| 795 |
top_k: parseInt(kSlider.value),
|
| 796 |
-
max_frames: parseInt(framesSlider.value)
|
|
|
|
| 797 |
});
|
| 798 |
|
| 799 |
let finalAnswer = "";
|
|
|
|
| 280 |
<input type="range" id="frames-slider" min="8" max="256" step="8" value="64" class="control-slider">
|
| 281 |
</div>
|
| 282 |
|
| 283 |
+
<button id="open-fewshot" class="w-full py-4 rounded-2xl bg-white/5 hover:bg-white/10 border border-white/5 transition-all flex items-center justify-center gap-2 group mb-2">
|
| 284 |
+
<i data-lucide="sparkles" class="w-4 h-4 text-[#27D4EA] group-hover:scale-110 transition-transform"></i>
|
| 285 |
+
<span class="text-sm font-bold">Few-Shot Builder</span>
|
| 286 |
+
</button>
|
| 287 |
+
|
| 288 |
<button onclick="clearHistory()" class="w-full py-4 rounded-2xl bg-red-500/10 border border-red-500/20 text-red-500 text-sm font-bold hover:bg-red-500/20 transition-all flex items-center justify-center gap-2">
|
| 289 |
<i data-lucide="trash-2" class="w-4 h-4"></i>
|
| 290 |
Clear Conversation
|
|
|
|
| 370 |
<!-- Few-Shot Area (Tab 2) -->
|
| 371 |
<div id="tab-fewshot" class="tab-content flex-col items-center pt-32 px-4 h-full overflow-y-auto">
|
| 372 |
<div class="max-w-3xl w-full space-y-8 pb-20">
|
| 373 |
+
<div class="flex items-center justify-between">
|
| 374 |
+
<div class="space-y-2">
|
| 375 |
+
<h2 class="text-2xl font-bold tracking-tight">Few-Shot Builder</h2>
|
| 376 |
+
<p class="text-white/40 text-sm">Add custom examples to guide the model's behavior.</p>
|
| 377 |
+
</div>
|
| 378 |
+
<button id="return-chat" class="px-6 py-2 rounded-full bg-white/5 hover:bg-white/10 border border-white/10 transition-all flex items-center gap-2 text-xs font-bold uppercase tracking-widest">
|
| 379 |
+
<i data-lucide="arrow-left" class="w-4 h-4"></i>
|
| 380 |
+
Back to Chat
|
| 381 |
+
</button>
|
| 382 |
</div>
|
| 383 |
|
| 384 |
<div class="grid grid-cols-1 md:grid-cols-2 gap-6">
|
|
|
|
| 499 |
}
|
| 500 |
}
|
| 501 |
|
| 502 |
+
const openFewShot = document.getElementById('open-fewshot');
|
| 503 |
+
const returnChat = document.getElementById('return-chat');
|
| 504 |
+
|
| 505 |
+
openFewShot.onclick = () => {
|
| 506 |
+
toggleSettingsSidebar(false);
|
| 507 |
+
switchTab('fewshot');
|
| 508 |
+
};
|
| 509 |
+
returnChat.onclick = () => switchTab('chat');
|
| 510 |
+
|
| 511 |
// Few-Shot Builder
|
| 512 |
const fsFile = document.getElementById('fs-file');
|
| 513 |
const fsPreview = document.getElementById('fs-preview');
|
|
|
|
| 813 |
temperature: parseFloat(tempSlider.value),
|
| 814 |
top_p: parseFloat(pSlider.value),
|
| 815 |
top_k: parseInt(kSlider.value),
|
| 816 |
+
max_frames: parseInt(framesSlider.value),
|
| 817 |
+
generation_mode: generationMode
|
| 818 |
});
|
| 819 |
|
| 820 |
let finalAnswer = "";
|