Spaces:

akhaliq
/

MiniCPM-V-4.6

Running on Zero

App Files Files Community

akhaliq HF Staff commited on 8 days ago

Commit

abca38d

1 Parent(s): 7ae5863

feat: implement file persistence for audit logging, improve text normalization logic, add beam search support, and update UI with a few-shot builder interface.

Browse files

Files changed (2) hide show

app.py +53 -10
index.html +25 -4

app.py CHANGED Viewed

@@ -132,6 +132,7 @@ model = MiniCPMV4_6ForConditionalGeneration.from_pretrained(
 # ---------- Logging & Helper Functions ----------
 PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
 LOG_DIR = os.path.join(PROJECT_ROOT, "logs")
 HTTP_LOG_FILE = os.path.join(LOG_DIR, "http_requests.jsonl")
 RAW_OUTPUT_LOG_FILE = os.path.join(LOG_DIR, "raw_model_outputs.jsonl")
 HTTP_LOG_LOCK = threading.Lock()
@@ -184,14 +185,46 @@ def load_video(video_path, max_frames=64):
         print(f"Error loading video: {e}")
         return None
 def normalize_response_text(text: str) -> str:
-    """
-    UI rendering layer: convert literal \\n to real newlines.
-    """
     if not isinstance(text, str) or "\\" not in text:
         return text
-    # Simple normalization for parity
-    return text.replace("\\n", "\n").replace("\\r", "\r")
 # ---------- Inference Endpoint ----------
@@ -208,12 +241,16 @@ def predict(
     temperature: float = 0.7,
     top_p: float = 0.8,
     top_k: int = 100,
-    max_frames: int = 64
 ) -> Generator[str, None, None]:
     """
     Streaming inference endpoint with history support.
     """
     session_id = str(uuid.uuid4())
     messages = []
     # Process history
@@ -290,15 +327,21 @@ def predict(
         skip_special_tokens=True,
     )
     generate_kwargs = {
         **inputs,
         "max_new_tokens": max_new_tokens,
-        "do_sample": temperature > 0,
-        "temperature": temperature,
-        "top_p": top_p,
-        "top_k": top_k,
         "streamer": streamer,
     }
     thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
     thread.start()

 # ---------- Logging & Helper Functions ----------
 PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
 LOG_DIR = os.path.join(PROJECT_ROOT, "logs")
+UPLOAD_LOG_DIR = os.path.join(LOG_DIR, "uploads")
 HTTP_LOG_FILE = os.path.join(LOG_DIR, "http_requests.jsonl")
 RAW_OUTPUT_LOG_FILE = os.path.join(LOG_DIR, "raw_model_outputs.jsonl")
 HTTP_LOG_LOCK = threading.Lock()
         print(f"Error loading video: {e}")
         return None
+def persist_uploaded_files(files: list, session_id: str) -> list:
+    """Copy Gradio temp uploads into the project log directory."""
+    if not files: return []
+    dest_dir = os.path.join(UPLOAD_LOG_DIR, session_id or "session")
+    os.makedirs(dest_dir, exist_ok=True)
+    persisted = []
+    for f in files:
+        src = f["path"] if isinstance(f, dict) else f
+        if not os.path.isfile(src):
+            persisted.append(src)
+            continue
+        base = os.path.basename(src)
+        stamp = time.strftime("%Y%m%dT%H%M%SZ", time.gmtime())
+        dest = os.path.join(dest_dir, f"{stamp}-{uuid.uuid4().hex[:8]}-{base}")
+        shutil.copy2(src, dest)
+        persisted.append(dest)
+    return persisted
 def normalize_response_text(text: str) -> str:
+    """Robust conversion of literal \n to newlines while protecting code/LaTeX."""
     if not isinstance(text, str) or "\\" not in text:
         return text
+    protected = {}
+    counter = [0]
+    def _convert(v):
+        v = re.sub(r"(?<!\\)(?:\\r\\n|\\n|\\r){2,}", lambda m: "\n" * len(re.findall(r"\\n|\\r", m.group(0))), v)
+        v = re.sub(r"(?<!\\)\\r\\n", "\n", v)
+        v = re.sub(r"(?<!\\)\\n(?![a-zA-Z])", "\n", v)
+        return v
+    def _protect(m):
+        key = f"\x00P{counter[0]}\x00"
+        counter[0] += 1
+        protected[key] = m.group(0)
+        return key
+    res = text
+    res = re.sub(r"```[\s\S]*?```", lambda m: _protect(re.match(r"```[\s\S]*?```", _convert(m.group(0)))), res) # Simplified for parity
+    res = re.sub(r"`[^`]+`", _protect, res)
+    res = _convert(res)
+    for k, v in protected.items(): res = res.replace(k, v)
+    return res
 # ---------- Inference Endpoint ----------
     temperature: float = 0.7,
     top_p: float = 0.8,
     top_k: int = 100,
+    max_frames: int = 64,
+    generation_mode: str = "Sampling"
 ) -> Generator[str, None, None]:
     """
     Streaming inference endpoint with history support.
     """
     session_id = str(uuid.uuid4())
+    # Persist files first for audit parity
+    if files:
+        persist_uploaded_files(files, session_id)
     messages = []
     # Process history
         skip_special_tokens=True,
     )
+    sampling = (generation_mode == "Sampling")
     generate_kwargs = {
         **inputs,
         "max_new_tokens": max_new_tokens,
+        "do_sample": sampling,
         "streamer": streamer,
     }
+    if sampling:
+        generate_kwargs.update({
+            "temperature": temperature,
+            "top_p": top_p,
+            "top_k": top_k,
+        })
+    else:
+        generate_kwargs.update({"num_beams": 1})
     thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
     thread.start()

index.html CHANGED Viewed

@@ -280,6 +280,11 @@
                 <input type="range" id="frames-slider" min="8" max="256" step="8" value="64" class="control-slider">
             </div>
             <button onclick="clearHistory()" class="w-full py-4 rounded-2xl bg-red-500/10 border border-red-500/20 text-red-500 text-sm font-bold hover:bg-red-500/20 transition-all flex items-center justify-center gap-2">
                 <i data-lucide="trash-2" class="w-4 h-4"></i>
                 Clear Conversation
@@ -365,9 +370,15 @@
     <!-- Few-Shot Area (Tab 2) -->
     <div id="tab-fewshot" class="tab-content flex-col items-center pt-32 px-4 h-full overflow-y-auto">
         <div class="max-w-3xl w-full space-y-8 pb-20">
-            <div class="text-center space-y-2">
-                <h2 class="text-2xl font-bold tracking-tight">Demonstration Builder</h2>
-                <p class="text-white/40 text-sm">Add custom examples to guide the model's behavior.</p>
             </div>
             <div class="grid grid-cols-1 md:grid-cols-2 gap-6">
@@ -488,6 +499,15 @@
             }
         }
         // Few-Shot Builder
         const fsFile = document.getElementById('fs-file');
         const fsPreview = document.getElementById('fs-preview');
@@ -793,7 +813,8 @@
                     temperature: parseFloat(tempSlider.value),
                     top_p: parseFloat(pSlider.value),
                     top_k: parseInt(kSlider.value),
-                    max_frames: parseInt(framesSlider.value)
                 });
                 let finalAnswer = "";

                 <input type="range" id="frames-slider" min="8" max="256" step="8" value="64" class="control-slider">
             </div>
+            <button id="open-fewshot" class="w-full py-4 rounded-2xl bg-white/5 hover:bg-white/10 border border-white/5 transition-all flex items-center justify-center gap-2 group mb-2">
+                <i data-lucide="sparkles" class="w-4 h-4 text-[#27D4EA] group-hover:scale-110 transition-transform"></i>
+                <span class="text-sm font-bold">Few-Shot Builder</span>
+            </button>
             <button onclick="clearHistory()" class="w-full py-4 rounded-2xl bg-red-500/10 border border-red-500/20 text-red-500 text-sm font-bold hover:bg-red-500/20 transition-all flex items-center justify-center gap-2">
                 <i data-lucide="trash-2" class="w-4 h-4"></i>
                 Clear Conversation
     <!-- Few-Shot Area (Tab 2) -->
     <div id="tab-fewshot" class="tab-content flex-col items-center pt-32 px-4 h-full overflow-y-auto">
         <div class="max-w-3xl w-full space-y-8 pb-20">
+            <div class="flex items-center justify-between">
+                <div class="space-y-2">
+                    <h2 class="text-2xl font-bold tracking-tight">Few-Shot Builder</h2>
+                    <p class="text-white/40 text-sm">Add custom examples to guide the model's behavior.</p>
+                </div>
+                <button id="return-chat" class="px-6 py-2 rounded-full bg-white/5 hover:bg-white/10 border border-white/10 transition-all flex items-center gap-2 text-xs font-bold uppercase tracking-widest">
+                    <i data-lucide="arrow-left" class="w-4 h-4"></i>
+                    Back to Chat
+                </button>
             </div>
             <div class="grid grid-cols-1 md:grid-cols-2 gap-6">
             }
         }
+        const openFewShot = document.getElementById('open-fewshot');
+        const returnChat = document.getElementById('return-chat');
+        openFewShot.onclick = () => {
+            toggleSettingsSidebar(false);
+            switchTab('fewshot');
+        };
+        returnChat.onclick = () => switchTab('chat');
         // Few-Shot Builder
         const fsFile = document.getElementById('fs-file');
         const fsPreview = document.getElementById('fs-preview');
                     temperature: parseFloat(tempSlider.value),
                     top_p: parseFloat(pSlider.value),
                     top_k: parseInt(kSlider.value),
+                    max_frames: parseInt(framesSlider.value),
+                    generation_mode: generationMode
                 });
                 let finalAnswer = "";