Spaces:

anky2002
/

FORENSIQ

Running

anky2002 commited on 14 days ago

Commit

6845e7a

verified ·

1 Parent(s): 8a218d4

Upload agents/semantic_agent.py with huggingface_hub

Files changed (1) hide show

agents/semantic_agent.py CHANGED Viewed

@@ -18,7 +18,7 @@ def _b64(img, mx=1024):
     buf=io.BytesIO(); img.convert("RGB").save(buf,"JPEG",quality=90); return base64.b64encode(buf.getvalue()).decode()
 def _vlm(img, sys_prompt, user_prompt):
-    """Call VLM with deterministic timeout and single retry."""
     try:
         from openai import OpenAI
     except ImportError: return None
@@ -28,7 +28,7 @@ def _vlm(img, sys_prompt, user_prompt):
     client=OpenAI(
         base_url="https://router.huggingface.co/v1",
         api_key=token,
-        timeout=60.0,  # Deterministic 60s timeout
     )
     b64=_b64(img)
     messages=[
@@ -39,8 +39,9 @@ def _vlm(img, sys_prompt, user_prompt):
         ]}
     ]
-    # Try up to 2 times for deterministic behavior
-    for attempt in range(2):
         try:
             resp=client.chat.completions.create(
                 model="Qwen/Qwen2.5-VL-72B-Instruct",
@@ -50,10 +51,13 @@ def _vlm(img, sys_prompt, user_prompt):
             )
             return resp.choices[0].message.content
         except Exception as e:
-            if attempt == 0:
-                import time; time.sleep(2)  # Brief pause before retry
                 continue
-            return f"VLM_ERROR: {e}"
     return "VLM_ERROR: exhausted retries"
 def _parse(text):

     buf=io.BytesIO(); img.convert("RGB").save(buf,"JPEG",quality=90); return base64.b64encode(buf.getvalue()).decode()
 def _vlm(img, sys_prompt, user_prompt):
+    """Call VLM with generous timeout and retry for cold-start."""
     try:
         from openai import OpenAI
     except ImportError: return None
     client=OpenAI(
         base_url="https://router.huggingface.co/v1",
         api_key=token,
+        timeout=90.0,  # 90s — 72B model needs time for cold start
     )
     b64=_b64(img)
     messages=[
         ]}
     ]
+    # Try up to 3 times with exponential backoff (cold start can take 30s+)
+    last_error = None
+    for attempt in range(3):
         try:
             resp=client.chat.completions.create(
                 model="Qwen/Qwen2.5-VL-72B-Instruct",
             )
             return resp.choices[0].message.content
         except Exception as e:
+            last_error = e
+            if attempt < 2:
+                import time
+                wait = 3 * (attempt + 1)  # 3s, 6s backoff
+                time.sleep(wait)
                 continue
+    return f"VLM_ERROR: {last_error}"
     return "VLM_ERROR: exhausted retries"
 def _parse(text):