Spaces:

lablab-ai-amd-developer-hackathon
/

medivision-ai-agent

Sleeping

dikheng commited on 17 days ago

Commit

f89a5cf

1 Parent(s): 432592c

refactor: system/user prompt split + robust JSON extraction

- generate_response now accepts system_prompt + user_prompt separately
(proper roles in chat completions); exposes max_tokens, temperature,
force_json params per agent call
- _extract_json helper: strips markdown fences, falls back to scanning
balanced braces — eliminates JSON parse failures in production
- vision/clinical/format/chat agents updated to use new signature with
per-step token and temperature budgets
- prompts rewritten for cleaner system instructions and stricter JSON
schema enforcement

Files changed (5) hide show

app.py +101 -12
src/agents.py +130 -30
src/inference.py +12 -8
src/model_loader.py +46 -19
src/prompts.py +129 -57

app.py CHANGED Viewed

@@ -659,11 +659,102 @@ def _empty_soap_html(lang: str) -> str:
     return _build_soap_html("", lang)
 def _build_result_html(result: dict, lang: str) -> str:
     t             = _I18N.get(lang, _I18N["en"])
     triage        = result.get("triage_level", "Low")
     patient_msg   = result.get("patient_message", "")
     conditions    = result.get("possible_conditions", [])
     metrics       = result.get("_metrics", {})
     backend_tag = (
@@ -672,7 +763,6 @@ def _build_result_html(result: dict, lang: str) -> str:
         "border:1px solid #16a34a;'>AMD Cloud</span>"
     )
-    # Triage color
     triage_colors = {
         "High":   ("#ef4444", "#7f1d1d"),
         "Medium": ("#f97316", "#431407"),
@@ -680,7 +770,6 @@ def _build_result_html(result: dict, lang: str) -> str:
     }
     t_color, t_bg = triage_colors.get(triage, ("#22c55e", "#052e16"))
-    # Red-flag flashing banner
     critical_banner = ""
     if triage == "High":
         critical_banner = f"""
@@ -692,19 +781,14 @@ def _build_result_html(result: dict, lang: str) -> str:
     </span>
   </div>"""
-    # Possible conditions chips
-    cond_chips = "".join(
-        f"<span style='background:#1e3a5f; color:#93c5fd; font-size:0.72rem; "
-        f"padding:3px 10px; border-radius:999px; border:1px solid #2563eb;'>{c}</span>"
-        for c in conditions
-    ) if conditions else "<span style='color:#6b7280;'>—</span>"
-    # Patient message paragraphs
     msg_html = "".join(
         f"<p style='margin:0 0 8px; color:#d1d5db; line-height:1.6;'>{line}</p>"
         for line in patient_msg.split("\n") if line.strip()
     ) if patient_msg else "<p style='color:#6b7280;'>—</p>"
     return f"""
 <div style='background:#111827; border:1px solid #ED1C24; border-radius:12px;
             padding:clamp(14px,4vw,20px); font-family:Arial,sans-serif; color:#f9fafb;
@@ -737,10 +821,12 @@ def _build_result_html(result: dict, lang: str) -> str:
   <div style='background:#1f2937; border-radius:8px; padding:14px; margin-bottom:12px;'>
     <div style='font-size:0.72rem; text-transform:uppercase; letter-spacing:.05em;
-                color:#9ca3af; margin-bottom:8px;'>{t['conditions_label']}</div>
-    <div style='display:flex; flex-wrap:wrap; gap:6px;'>{cond_chips}</div>
   </div>
   <div style='background:#1f2937; border-radius:8px; padding:14px; margin-bottom:12px;'>
     <div style='font-size:0.72rem; text-transform:uppercase; letter-spacing:.05em;
                 color:#9ca3af; margin-bottom:8px;'>
@@ -944,6 +1030,9 @@ def predict(image_1, image_2, symptoms: str, lang_choice: str, selected_regions)
             "visual_description":  result.get("visual_description", ""),
             "possible_conditions": result.get("possible_conditions", []),
             "triage_level":        result.get("triage_level", "Low"),
             "patient_message":     patient_msg,
         }
         return (

     return _build_soap_html("", lang)
+def _condition_probability_bars(conditions: list, t: dict) -> str:
+    """Render probability bars for each possible condition."""
+    if not conditions:
+        return "<span style='color:#6b7280;'>—</span>"
+    bars = []
+    for c in conditions:
+        if isinstance(c, dict):
+            name  = c.get("name", "Unknown")
+            prob  = int(c.get("probability", 50))
+            icd10 = c.get("icd10", "")
+        else:
+            name, prob, icd10 = str(c), 50, ""
+        fill = "#ef4444" if prob >= 70 else "#f97316" if prob >= 45 else "#eab308"
+        icd_badge = (
+            f"<span style='font-size:0.6rem; color:#6b7280; background:#0f172a; "
+            f"padding:1px 5px; border-radius:3px; margin-left:4px; font-family:monospace;'>"
+            f"{icd10}</span>"
+        ) if icd10 else ""
+        bars.append(
+            f"<div style='margin-bottom:10px;'>"
+            f"  <div style='display:flex; align-items:center; justify-content:space-between; margin-bottom:3px;'>"
+            f"    <span style='font-size:0.8rem; color:#e2e8f0; font-weight:600;'>{name}{icd_badge}</span>"
+            f"    <span style='font-size:0.75rem; color:{fill}; font-weight:700;'>{prob}%</span>"
+            f"  </div>"
+            f"  <div style='background:#374151; border-radius:9999px; height:7px; overflow:hidden;'>"
+            f"    <div style='background:{fill}; width:{prob}%; height:100%; border-radius:9999px; "
+            f"         transition:width 0.7s ease;'></div>"
+            f"  </div>"
+            f"</div>"
+        )
+    return "".join(bars)
+def _red_flags_panel(red_flags: list, watch_symptoms: list, urgency_reason: str) -> str:
+    """Render red flags and watch symptoms warning panel. Returns empty string if nothing to show."""
+    has_flags   = bool(red_flags)
+    has_watch   = bool(watch_symptoms)
+    has_urgency = bool(urgency_reason)
+    if not has_flags and not has_watch and not has_urgency:
+        return ""
+    flags_html = ""
+    if has_flags:
+        items = "".join(
+            f"<li style='margin:3px 0; color:#fca5a5;'>&#9888; {f}</li>"
+            for f in red_flags
+        )
+        flags_html = (
+            f"<div style='font-size:0.72rem; color:#ef4444; font-weight:700; "
+            f"text-transform:uppercase; letter-spacing:.04em; margin-bottom:6px;'>Red Flags</div>"
+            f"<ul style='margin:0 0 10px; padding-left:18px; list-style:none;'>{items}</ul>"
+        )
+    watch_html = ""
+    if has_watch:
+        items = "".join(
+            f"<li style='margin:3px 0; color:#fde68a;'>&#128065; {w}</li>"
+            for w in watch_symptoms
+        )
+        watch_html = (
+            f"<div style='font-size:0.72rem; color:#f59e0b; font-weight:700; "
+            f"text-transform:uppercase; letter-spacing:.04em; margin-bottom:6px;'>Watch For</div>"
+            f"<ul style='margin:0; padding-left:18px; list-style:none;'>{items}</ul>"
+        )
+    urgency_html = ""
+    if has_urgency:
+        urgency_html = (
+            f"<div style='font-size:0.75rem; color:#9ca3af; font-style:italic; "
+            f"border-top:1px solid #374151; padding-top:8px; margin-top:8px;'>"
+            f"&#9432; {urgency_reason}</div>"
+        )
+    border_color = "#ef4444" if has_flags else "#f59e0b"
+    bg_color     = "#1c0a0a" if has_flags else "#1c1000"
+    return (
+        f"<div style='background:{bg_color}; border:1px solid {border_color}; "
+        f"border-left:4px solid {border_color}; border-radius:8px; "
+        f"padding:12px 14px; margin-bottom:12px;'>"
+        f"{flags_html}{watch_html}{urgency_html}"
+        f"</div>"
+    )
 def _build_result_html(result: dict, lang: str) -> str:
     t             = _I18N.get(lang, _I18N["en"])
     triage        = result.get("triage_level", "Low")
     patient_msg   = result.get("patient_message", "")
     conditions    = result.get("possible_conditions", [])
+    red_flags     = result.get("red_flags", [])
+    watch_symptoms = result.get("watch_symptoms", [])
+    urgency_reason = result.get("urgency_reason", "")
     metrics       = result.get("_metrics", {})
     backend_tag = (
         "border:1px solid #16a34a;'>AMD Cloud</span>"
     )
     triage_colors = {
         "High":   ("#ef4444", "#7f1d1d"),
         "Medium": ("#f97316", "#431407"),
     }
     t_color, t_bg = triage_colors.get(triage, ("#22c55e", "#052e16"))
     critical_banner = ""
     if triage == "High":
         critical_banner = f"""
     </span>
   </div>"""
     msg_html = "".join(
         f"<p style='margin:0 0 8px; color:#d1d5db; line-height:1.6;'>{line}</p>"
         for line in patient_msg.split("\n") if line.strip()
     ) if patient_msg else "<p style='color:#6b7280;'>—</p>"
+    cond_bars      = _condition_probability_bars(conditions, t)
+    alert_panel    = _red_flags_panel(red_flags, watch_symptoms, urgency_reason)
     return f"""
 <div style='background:#111827; border:1px solid #ED1C24; border-radius:12px;
             padding:clamp(14px,4vw,20px); font-family:Arial,sans-serif; color:#f9fafb;
   <div style='background:#1f2937; border-radius:8px; padding:14px; margin-bottom:12px;'>
     <div style='font-size:0.72rem; text-transform:uppercase; letter-spacing:.05em;
+                color:#9ca3af; margin-bottom:10px;'>{t['conditions_label']}</div>
+    {cond_bars}
   </div>
+  {alert_panel}
   <div style='background:#1f2937; border-radius:8px; padding:14px; margin-bottom:12px;'>
     <div style='font-size:0.72rem; text-transform:uppercase; letter-spacing:.05em;
                 color:#9ca3af; margin-bottom:8px;'>
             "visual_description":  result.get("visual_description", ""),
             "possible_conditions": result.get("possible_conditions", []),
             "triage_level":        result.get("triage_level", "Low"),
+            "urgency_reason":      result.get("urgency_reason", ""),
+            "red_flags":           result.get("red_flags", []),
+            "watch_symptoms":      result.get("watch_symptoms", []),
             "patient_message":     patient_msg,
         }
         return (

src/agents.py CHANGED Viewed

@@ -2,7 +2,13 @@ import json
 import re
 from src.model_loader import generate_response, generate_text
-from src.prompts import VISION_AGENT_SYSTEM, CLINICAL_AGENT_SYSTEM, PATIENT_AGENT_SYSTEM, SOAP_AGENT_SYSTEM, CHAT_AGENT_SYSTEM
 _LANG_NAMES = {
     "en": "English",
@@ -13,73 +19,153 @@ _LANG_NAMES = {
     "ja": "Japanese",
 }
 _NO_IMAGE_DESC = "(No image provided — assessment based on patient symptom text only.)"
 _ZERO_METRICS = {"latency_ms": 0, "total_tokens": 0, "tokens_per_sec": 0}
 def vision_agent(image_path_1, image_path_2, symptoms: str) -> tuple[str, dict]:
-    """Step 1: strictly objective visual description. Returns (description_text, metrics)."""
     if not image_path_1 and not image_path_2:
         return _NO_IMAGE_DESC, _ZERO_METRICS.copy()
     two_images = bool(image_path_2)
-    user_msg = VISION_AGENT_SYSTEM + "\n\n"
     if two_images:
-        user_msg += "TWO images are provided: the first image is Day 1, the second image is Day X.\n\n"
-    user_msg += f"Patient symptom text: {symptoms or '(none provided)'}"
-    return generate_response(user_msg, image_path=image_path_1 or None,
-                             image_path_2=image_path_2 or None)
 def clinical_agent(visual_description: str, symptoms: str, lang: str = "en") -> tuple[dict, dict]:
-    """Step 2: clinical reasoning → strict JSON. Returns (parsed_dict, metrics)."""
     lang_name = _LANG_NAMES.get(lang, "English")
-    prompt = (
-        CLINICAL_AGENT_SYSTEM + "\n\n"
-        f"TARGET LANGUAGE FOR CONDITIONS: {lang_name}\n\n"
         f"VISUAL DESCRIPTION:\n{visual_description}\n\n"
         f"PATIENT SYMPTOMS:\n{symptoms or '(none provided)'}"
     )
-    raw, metrics = generate_text(prompt)
-    match = re.search(r'\{.*\}', raw, re.DOTALL)
-    if not match:
-        raise ValueError(f"Clinical agent did not return JSON: {raw[:300]}")
-    data = json.loads(match.group())
     return {
         "triage_level":        data.get("triage_level", "Low"),
-        "possible_conditions": data.get("possible_conditions", []),
         "clinical_assessment": data.get("clinical_assessment", ""),
         "recommendation":      data.get("recommendation", ""),
     }, metrics
 def chat_agent(question: str, context: dict, history: list, lang: str) -> tuple[str, dict]:
-    """Follow-up Q&A. Returns (answer_text, metrics)."""
     lang_name = _LANG_NAMES.get(lang, "English")
     ctx_block = (
         f"ANALYSIS CONTEXT:\n"
         f"- Visual description: {context.get('visual_description', '(none)')}\n"
-        f"- Possible conditions: {', '.join(context.get('possible_conditions', []))}\n"
         f"- Triage level: {context.get('triage_level', 'Low')}\n"
-        f"- Patient message given: {context.get('patient_message', '(none)')}"
     )
     history_block = ""
     for user_msg, bot_msg in (history or []):
         history_block += f"\nPatient: {user_msg}\nAssistant: {bot_msg}"
-    prompt = (
-        CHAT_AGENT_SYSTEM + "\n\n"
         f"TARGET LANGUAGE: {lang_name}\n\n"
         f"{ctx_block}\n"
         f"{history_block}\n\n"
         f"Patient: {question}\nAssistant:"
     )
-    answer, metrics = generate_text(prompt)
     return answer.strip(), metrics
 def format_agent(clinical_json: dict, visual_description: str,
                  symptoms: str, lang: str) -> tuple[str, str, dict]:
-    """Step 3a+3b: patient message and SOAP note as two separate LLM calls."""
     lang_name = _LANG_NAMES.get(lang, "English")
     context = (
         f"TARGET LANGUAGE: {lang_name}\n\n"
@@ -87,11 +173,25 @@ def format_agent(clinical_json: dict, visual_description: str,
         f"VISUAL DESCRIPTION (Objective):\n{visual_description}\n\n"
         f"CLINICAL JSON:\n{json.dumps(clinical_json, ensure_ascii=False, indent=2)}"
     )
-    patient_msg, m3a = generate_text(PATIENT_AGENT_SYSTEM + "\n\n" + context)
-    soap,        m3b = generate_text(SOAP_AGENT_SYSTEM    + "\n\n" + context)
     metrics = {
-        "latency_ms":    m3a["latency_ms"] + m3b["latency_ms"],
-        "total_tokens":  m3a["total_tokens"] + m3b["total_tokens"],
-        "tokens_per_sec": round((m3a.get("tokens_per_sec", 0) + m3b.get("tokens_per_sec", 0)) / 2, 1),
     }
     return patient_msg.strip(), soap.strip(), metrics

 import re
 from src.model_loader import generate_response, generate_text
+from src.prompts import (
+    VISION_AGENT_SYSTEM,
+    CLINICAL_AGENT_SYSTEM,
+    PATIENT_AGENT_SYSTEM,
+    SOAP_AGENT_SYSTEM,
+    CHAT_AGENT_SYSTEM,
+)
 _LANG_NAMES = {
     "en": "English",
     "ja": "Japanese",
 }
 _NO_IMAGE_DESC = "(No image provided — assessment based on patient symptom text only.)"
 _ZERO_METRICS = {"latency_ms": 0, "total_tokens": 0, "tokens_per_sec": 0}
+def _extract_json(raw: str) -> dict:
+    """Robustly extract first JSON object from LLM output, stripping markdown fences."""
+    cleaned = re.sub(r"^```(?:json)?\s*|\s*```$", "", raw.strip(), flags=re.MULTILINE)
+    try:
+        return json.loads(cleaned)
+    except json.JSONDecodeError:
+        pass
+    # Scan for first balanced {...} block
+    depth = 0
+    start = None
+    for i, ch in enumerate(cleaned):
+        if ch == "{":
+            if depth == 0:
+                start = i
+            depth += 1
+        elif ch == "}":
+            depth -= 1
+            if depth == 0 and start is not None:
+                try:
+                    return json.loads(cleaned[start:i + 1])
+                except json.JSONDecodeError:
+                    continue
+    raise ValueError(f"No valid JSON object found in response: {raw[:300]}")
 def vision_agent(image_path_1, image_path_2, symptoms: str) -> tuple[str, dict]:
+    """
+    Step 1: objective visual description.
+    Returns (description_text, metrics).
+    """
     if not image_path_1 and not image_path_2:
         return _NO_IMAGE_DESC, _ZERO_METRICS.copy()
     two_images = bool(image_path_2)
+    user_prompt = ""
     if two_images:
+        user_prompt += "TWO images provided: first image is Day 1 (baseline), second image is Day X (follow-up).\n\n"
+    user_prompt += f"Patient symptom report: {symptoms or '(none provided)'}\n\nAnalyze the image(s) as instructed."
+    return generate_response(
+        system_prompt=VISION_AGENT_SYSTEM,
+        user_prompt=user_prompt,
+        image_path=image_path_1 or None,
+        image_path_2=image_path_2 or None,
+        max_tokens=600,
+        temperature=0.0,
+    )
 def clinical_agent(visual_description: str, symptoms: str, lang: str = "en") -> tuple[dict, dict]:
+    """
+    Step 2: clinical reasoning → structured JSON with richer schema.
+    Returns (parsed_dict, metrics).
+    """
     lang_name = _LANG_NAMES.get(lang, "English")
+    user_prompt = (
+        f"TARGET LANGUAGE: {lang_name}\n\n"
         f"VISUAL DESCRIPTION:\n{visual_description}\n\n"
         f"PATIENT SYMPTOMS:\n{symptoms or '(none provided)'}"
     )
+    raw, metrics = generate_text(
+        system_prompt=CLINICAL_AGENT_SYSTEM,
+        user_prompt=user_prompt,
+        max_tokens=800,
+        temperature=0.0,
+        force_json=True,
+    )
+    data = _extract_json(raw)
+    # Normalise possible_conditions — support new {name, probability, icd10} schema
+    # and gracefully handle plain-string fallback from older model outputs
+    raw_conditions = data.get("possible_conditions", [])
+    conditions = []
+    for item in raw_conditions:
+        if isinstance(item, dict):
+            conditions.append({
+                "name":        str(item.get("name", item.get("condition", "Unknown"))),
+                "probability": int(item.get("probability", item.get("match_probability", 50))),
+                "icd10":       str(item.get("icd10", item.get("icd10_code", ""))),
+            })
+        elif isinstance(item, str):
+            conditions.append({"name": item, "probability": 50, "icd10": ""})
     return {
         "triage_level":        data.get("triage_level", "Low"),
+        "urgency_reason":      data.get("urgency_reason", ""),
+        "possible_conditions": conditions,
+        "red_flags":           data.get("red_flags", []),
+        "watch_symptoms":      data.get("watch_symptoms", []),
         "clinical_assessment": data.get("clinical_assessment", ""),
         "recommendation":      data.get("recommendation", ""),
     }, metrics
 def chat_agent(question: str, context: dict, history: list, lang: str) -> tuple[str, dict]:
+    """
+    Follow-up Q&A. Returns (answer_text, metrics).
+    """
     lang_name = _LANG_NAMES.get(lang, "English")
+    conditions_text = ", ".join(
+        c["name"] if isinstance(c, dict) else c
+        for c in context.get("possible_conditions", [])
+    )
     ctx_block = (
         f"ANALYSIS CONTEXT:\n"
         f"- Visual description: {context.get('visual_description', '(none)')}\n"
+        f"- Possible conditions: {conditions_text}\n"
         f"- Triage level: {context.get('triage_level', 'Low')}\n"
+        f"- Urgency reason: {context.get('urgency_reason', '')}\n"
+        f"- Red flags: {'; '.join(context.get('red_flags', [])) or 'none'}\n"
+        f"- Patient message: {context.get('patient_message', '(none)')}"
     )
     history_block = ""
     for user_msg, bot_msg in (history or []):
         history_block += f"\nPatient: {user_msg}\nAssistant: {bot_msg}"
+    user_prompt = (
         f"TARGET LANGUAGE: {lang_name}\n\n"
         f"{ctx_block}\n"
         f"{history_block}\n\n"
         f"Patient: {question}\nAssistant:"
     )
+    answer, metrics = generate_text(
+        system_prompt=CHAT_AGENT_SYSTEM,
+        user_prompt=user_prompt,
+        max_tokens=300,
+        temperature=0.3,
+    )
     return answer.strip(), metrics
 def format_agent(clinical_json: dict, visual_description: str,
                  symptoms: str, lang: str) -> tuple[str, str, dict]:
+    """
+    Step 3a + 3b: patient message and SOAP note as two separate LLM calls.
+    Returns (patient_message, soap_note, combined_metrics).
+    """
     lang_name = _LANG_NAMES.get(lang, "English")
     context = (
         f"TARGET LANGUAGE: {lang_name}\n\n"
         f"VISUAL DESCRIPTION (Objective):\n{visual_description}\n\n"
         f"CLINICAL JSON:\n{json.dumps(clinical_json, ensure_ascii=False, indent=2)}"
     )
+    patient_msg, m3a = generate_text(
+        system_prompt=PATIENT_AGENT_SYSTEM,
+        user_prompt=context,
+        max_tokens=500,
+        temperature=0.4,
+    )
+    soap, m3b = generate_text(
+        system_prompt=SOAP_AGENT_SYSTEM,
+        user_prompt=context,
+        max_tokens=600,
+        temperature=0.0,
+    )
     metrics = {
+        "latency_ms":     m3a["latency_ms"] + m3b["latency_ms"],
+        "total_tokens":   m3a["total_tokens"] + m3b["total_tokens"],
+        "tokens_per_sec": round(
+            (m3a.get("tokens_per_sec", 0) + m3b.get("tokens_per_sec", 0)) / 2, 1
+        ),
     }
     return patient_msg.strip(), soap.strip(), metrics

src/inference.py CHANGED Viewed

@@ -6,13 +6,14 @@ class MediVisionPipeline:
                 lang: str = "en", region: str = "") -> dict:
         """
         Run the 3-step agentic pipeline:
-          Step 1 — Vision Agent: objective visual description
-          Step 2 — Clinical Agent: triage JSON
-          Step 3 — Format Agent: patient message + SOAP note
         Returns dict with keys:
-            triage_level, possible_conditions, patient_message,
-            soap_note, visual_description, _metrics
         """
         symptoms_full = f"{'Region: ' + region + '. ' if region else ''}{symptoms}"
@@ -21,19 +22,22 @@ class MediVisionPipeline:
         patient_msg, soap, m3 = format_agent(clinical, visual_desc, symptoms_full, lang)
         metrics = {
-            "latency_ms":    m1["latency_ms"] + m2["latency_ms"] + m3["latency_ms"],
-            "total_tokens":  m1["total_tokens"] + m2["total_tokens"] + m3["total_tokens"],
             "tokens_per_sec": round(
                 (m1.get("tokens_per_sec", 0) + m2.get("tokens_per_sec", 0) + m3.get("tokens_per_sec", 0)) / 3, 1
             ),
         }
         return {
             "triage_level":        clinical["triage_level"],
             "possible_conditions": clinical["possible_conditions"],
             "patient_message":     patient_msg,
             "soap_note":           soap,
             "visual_description":  visual_desc,
             "_metrics":            metrics,
-            # kept for follow-up chat context
             "_clinical":           clinical,
         }

                 lang: str = "en", region: str = "") -> dict:
         """
         Run the 3-step agentic pipeline:
+          Step 1 — Vision Agent:   objective visual description
+          Step 2 — Clinical Agent: structured triage JSON
+          Step 3 — Format Agent:   patient message + SOAP note
         Returns dict with keys:
+            triage_level, urgency_reason, possible_conditions,
+            red_flags, watch_symptoms, clinical_assessment,
+            patient_message, soap_note, visual_description, _metrics
         """
         symptoms_full = f"{'Region: ' + region + '. ' if region else ''}{symptoms}"
         patient_msg, soap, m3 = format_agent(clinical, visual_desc, symptoms_full, lang)
         metrics = {
+            "latency_ms":     m1["latency_ms"] + m2["latency_ms"] + m3["latency_ms"],
+            "total_tokens":   m1["total_tokens"] + m2["total_tokens"] + m3["total_tokens"],
             "tokens_per_sec": round(
                 (m1.get("tokens_per_sec", 0) + m2.get("tokens_per_sec", 0) + m3.get("tokens_per_sec", 0)) / 3, 1
             ),
         }
         return {
             "triage_level":        clinical["triage_level"],
+            "urgency_reason":      clinical["urgency_reason"],
             "possible_conditions": clinical["possible_conditions"],
+            "red_flags":           clinical["red_flags"],
+            "watch_symptoms":      clinical["watch_symptoms"],
+            "clinical_assessment": clinical["clinical_assessment"],
             "patient_message":     patient_msg,
             "soap_note":           soap,
             "visual_description":  visual_desc,
             "_metrics":            metrics,
             "_clinical":           clinical,
         }

src/model_loader.py CHANGED Viewed

@@ -62,21 +62,32 @@ def check_connection() -> tuple[bool, str]:
         return False, f"{type(exc).__name__}: {exc}"
-def generate_response(prompt: str, image_path: str = None,
-                      image_path_2: str = None) -> tuple[str, dict]:
     """
-    Send a request to the vLLM endpoint and return (text_output, metrics).
-    Supports 0, 1, or 2 images (image_path_2 for A/B comparison).
-    metrics keys:
-        latency_ms  – wall-clock time for the API call in milliseconds
-        total_tokens – total tokens used (prompt + completion), or 0 if unavailable
-        tokens_per_sec – completion tokens / latency, or 0 if unavailable
-    Raises RuntimeError if the backend is unreachable or returns an error.
     """
     try:
         client = _get_client()
         if image_path or image_path_2:
             content = []
@@ -88,18 +99,22 @@ def generate_response(prompt: str, image_path: str = None,
                 b64, mime = _encode_image(image_path_2)
                 content.append({"type": "image_url",
                                  "image_url": {"url": f"data:{mime};base64,{b64}"}})
-            content.append({"type": "text", "text": prompt})
-            messages = [{"role": "user", "content": content}]
         else:
-            messages = [{"role": "user", "content": prompt}]
-        t0 = time.perf_counter()
-        response = client.chat.completions.create(
             model=config.MODEL_NAME,
             messages=messages,
-            max_tokens=config.MAX_NEW_TOKENS,
-            temperature=config.TEMPERATURE,
         )
         latency_ms = (time.perf_counter() - t0) * 1000
         usage = getattr(response, "usage", None)
@@ -118,6 +133,18 @@ def generate_response(prompt: str, image_path: str = None,
         raise RuntimeError(f"AMD Cloud backend unreachable: {exc}") from exc
-def generate_text(prompt: str) -> tuple[str, dict]:
-    """Text-only call — same endpoint as generate_response(), no image encoding."""
-    return generate_response(prompt, image_path=None)

         return False, f"{type(exc).__name__}: {exc}"
+def generate_response(
+    system_prompt: str,
+    user_prompt: str,
+    image_path: str = None,
+    image_path_2: str = None,
+    max_tokens: int = None,
+    temperature: float = None,
+    force_json: bool = False,
+) -> tuple[str, dict]:
     """
+    Send a chat completion to the vLLM endpoint with proper system/user separation.
+    system_prompt → role: system
+    user_prompt   → role: user (may include 0, 1, or 2 images)
+    Returns (text_output, metrics).
+    metrics keys: latency_ms, total_tokens, tokens_per_sec
     """
     try:
         client = _get_client()
+        _max_tokens = max_tokens if max_tokens is not None else config.MAX_NEW_TOKENS
+        _temperature = temperature if temperature is not None else config.TEMPERATURE
+        messages = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
         if image_path or image_path_2:
             content = []
                 b64, mime = _encode_image(image_path_2)
                 content.append({"type": "image_url",
                                  "image_url": {"url": f"data:{mime};base64,{b64}"}})
+            content.append({"type": "text", "text": user_prompt})
+            messages.append({"role": "user", "content": content})
         else:
+            messages.append({"role": "user", "content": user_prompt})
+        kwargs = dict(
             model=config.MODEL_NAME,
             messages=messages,
+            max_tokens=_max_tokens,
+            temperature=_temperature,
         )
+        if force_json:
+            kwargs["response_format"] = {"type": "json_object"}
+        t0 = time.perf_counter()
+        response = client.chat.completions.create(**kwargs)
         latency_ms = (time.perf_counter() - t0) * 1000
         usage = getattr(response, "usage", None)
         raise RuntimeError(f"AMD Cloud backend unreachable: {exc}") from exc
+def generate_text(
+    system_prompt: str,
+    user_prompt: str,
+    max_tokens: int = None,
+    temperature: float = None,
+    force_json: bool = False,
+) -> tuple[str, dict]:
+    """Text-only call — no image encoding."""
+    return generate_response(
+        system_prompt=system_prompt,
+        user_prompt=user_prompt,
+        max_tokens=max_tokens,
+        temperature=temperature,
+        force_json=force_json,
+    )

src/prompts.py CHANGED Viewed

@@ -1,74 +1,146 @@
-VISION_AGENT_SYSTEM = """You are a medical imaging assistant performing STRICTLY OBJECTIVE visual analysis.
-Do NOT diagnose. Do NOT give medical advice. Do NOT speculate on conditions.
-Your ONLY job: describe exactly what you see in the image(s) using clinical descriptive language.
-If ONE image is provided, describe:
-- Lesion size (estimated), shape, border characteristics
-- Color(s), texture, surface features (scaling, crusting, ulceration, exudate)
-- Surrounding skin condition
-- Any signs of inflammation, swelling, or structural abnormality
-If TWO images are provided (Day 1 vs Day X), describe BOTH images separately, then compare:
-- Changes in size (larger / smaller / same)
-- Changes in color or border definition
-- Changes in surface features (scaling, crusting, exudate)
-- Overall progression verdict: IMPROVED / UNCHANGED / WORSENED
-Output: plain text only. No JSON. No diagnosis. No recommendations."""
-CLINICAL_AGENT_SYSTEM = """You are a clinical reasoning engine for a dermatology triage system.
-You receive: (1) an objective visual description and (2) the patient's symptom text.
-You perform clinical reasoning and output ONLY a JSON object — no extra text, no markdown fences.
-JSON schema (strict):
-{
-  "triage_level": "High" | "Medium" | "Low",
-  "possible_conditions": ["condition 1 in TARGET LANGUAGE", "condition 2 in TARGET LANGUAGE"],
-  "clinical_assessment": "brief medical reasoning (2-3 sentences max)",
-  "recommendation": "immediate actions or home care advice (2-4 sentences)"
-}
-triage_level rules:
-- "High": suspected melanoma, necrosis, severe cellulitis, rapidly spreading infection, deep burn
-- "Medium": moderate infection signs, non-healing wound >2 weeks, significant inflammation
-- "Low": minor abrasion, mild rash, superficial wound with no infection signs
-IMPORTANT: Write the condition names in possible_conditions in the TARGET LANGUAGE specified.
-Return ONLY the JSON object. No explanation before or after."""
-CHAT_AGENT_SYSTEM = """You are a medical assistant continuing a consultation with a patient.
-You have already completed an analysis of their condition. Use the provided analysis context to answer follow-up questions.
 RULES:
-- Answer in the TARGET LANGUAGE specified
-- Be concise, empathetic, and helpful
-- Reference the analysis context when relevant
-- Always recommend consulting a doctor for anything serious or worsening
-- Never diagnose — only provide general guidance based on the existing analysis
-- Do not repeat the full analysis; focus on answering the specific question asked"""
-PATIENT_AGENT_SYSTEM = """You are a medical communication specialist writing a patient-friendly message.
-Write ONLY the patient message — plain prose, no headings, no labels, no separators.
-Language: write entirely in the TARGET LANGUAGE specified in the input.
-Your message MUST cover all of the following in flowing sentences (minimum 5 sentences):
-1. An empathetic opening acknowledging the patient's concern
-2. If an image was provided: plain-language description of what was visually observed. If VISUAL DESCRIPTION starts with "(No image provided", skip this point entirely.
-3. The possible conditions explained in simple everyday terms (no medical jargon)
-4. Clear, actionable steps the patient should take
-5. A reassuring closing line encouraging them to consult a doctor for serious symptoms
-Output only the message text. No bullet points. No markdown. No extra commentary."""
-SOAP_AGENT_SYSTEM = """You are a clinical documentation specialist writing a SOAP note.
-Write ONLY the SOAP note in professional clinical English. No introduction, no commentary.
-Format exactly as:
-S (Subjective): [patient complaint paraphrased in English — translate if original is in another language]
-O (Objective): [visual findings summary — write "No image provided" if no image was given]
-A (Assessment): [possible conditions and clinical reasoning]
-P (Plan): [recommended clinical actions]
-Output only the four SOAP lines. Nothing before S, nothing after P."""

+VISION_AGENT_SYSTEM = """You are a clinical dermatologist and wound-care specialist performing OBJECTIVE visual analysis.
+Your task: describe exactly what you observe in the provided image(s) using precise clinical terminology.
+Do NOT diagnose. Do NOT speculate on internal conditions. Do NOT give treatment advice.
+SINGLE IMAGE — describe all of the following that are visible:
+Lesion type (macule, papule, plaque, vesicle, bulla, pustule, nodule, ulcer, erosion, crust, scar, wound).
+Size: estimate in centimeters.
+Shape: round, oval, irregular, linear, annular, serpiginous.
+Border: well-defined or ill-defined, regular or irregular, raised or flat.
+Color: all present colors (erythema, hyperpigmentation, pallor, violaceous, brown, black, yellow).
+Surface: smooth, scaling, crusting, exudate type (serous/purulent/hemorrhagic), ulceration depth.
+Surrounding skin: erythema halo, edema, warmth signs, satellite lesions.
+Distribution: localized, diffuse, grouped, linear, dermatomal.
+Structural abnormalities: tissue necrosis, exposed structures, foreign body.
+TWO IMAGES (Day 1 vs Day X) — describe EACH image separately using the criteria above, then add a COMPARISON:
+Size change: larger, smaller, or unchanged with estimated percentage.
+Color change: improved erythema, increased discoloration, new colors.
+Border change: more defined or more irregular.
+Surface change: re-epithelialization, new crusting, increased exudate, reduced scaling.
+Overall healing trajectory: IMPROVING, STABLE, or DETERIORATING.
+Any notable new findings since Day 1.
+OUTPUT: plain clinical prose. No bullet points. No headers. No JSON. No diagnosis.
+If image quality is poor, state "Image quality is limited; the following observations may be incomplete:" then proceed.
+If no abnormality is visible, state "No visible cutaneous abnormality detected on the provided image."
+Maximum 200 words per image."""
+CLINICAL_AGENT_SYSTEM = """You are an experienced dermatology triage physician with wound-care expertise.
+You receive: (1) an objective visual description from a vision specialist, and (2) the patient's own symptom report.
+Perform clinical reasoning and output ONLY a single JSON object. No text before or after. No markdown fences.
+Required schema:
+{
+  "triage_level": "High" or "Medium" or "Low",
+  "urgency_reason": "one sentence in English explaining WHY this triage level was assigned",
+  "possible_conditions": [
+    {"name": "condition name in TARGET LANGUAGE", "probability": integer 5 to 95, "icd10": "X00.0"}
+  ],
+  "red_flags": ["specific alarming sign from visual or symptom data — English only"],
+  "watch_symptoms": ["symptom that should prompt immediate re-evaluation — English only"],
+  "clinical_assessment": "2-3 sentences in English explaining pathophysiology connection between findings and symptoms",
+  "recommendation": "2-4 sentence action plan in TARGET LANGUAGE, ranked by urgency"
+}
+TRIAGE RULES:
+"High": suspected melanoma (asymmetry + irregular border + multiple colors + >6mm), necrotic tissue, deep ulceration,
+  rapidly spreading cellulitis (>2 cm/day), sepsis signs (fever + spreading erythema + systemic symptoms),
+  severe burn (full-thickness or >10% BSA), necrotizing fasciitis signs, exposed bone/tendon/joint,
+  bite wounds with high infection risk.
+"Medium": localized infection signs (purulent exudate + erythema + warmth, contained),
+  non-healing wound >2 weeks, inflammatory lesion with moderate systemic symptoms,
+  suspected fungal infection needing prescription antifungal,
+  pigmented lesion with 1-2 atypical features, partial-thickness burn.
+"Low": minor abrasion or superficial laceration with clean wound bed,
+  mild inflammatory rash without infection signs,
+  stable dry scaling lesion (likely eczema or psoriasis),
+  insect bite without secondary infection.
+PROBABILITY RULES:
+List 1-4 conditions maximum, ranked highest first. Probabilities may sum to more than 100 (conditions can co-exist).
+Never assign 0% or 100%. Minimum 5%, maximum 95%.
+Include the most dangerous condition on the differential even at low probability if visual evidence supports it.
+RED FLAGS: only include if actual evidence exists in description or symptoms. Empty array if none.
+Each flag must reference a specific observable finding, not a generic statement.
+LANGUAGE: condition names and recommendation in TARGET LANGUAGE. All other fields in English.
+Return ONLY the JSON object."""
+CHAT_AGENT_SYSTEM = """You are a compassionate medical assistant continuing a consultation.
+You have access to a completed dermatology and wound-care analysis. Answer the patient's follow-up question.
 RULES:
+Answer entirely in TARGET LANGUAGE.
+Be concise (2-4 sentences), empathetic, and specific to what the analysis found.
+Reference specific findings from the context (e.g., "the redness we identified...").
+Never name a specific prescription drug — say "your doctor may prescribe medication".
+Never give a definitive diagnosis — say "the analysis suggests" or "signs are consistent with".
+If the question is outside dermatology or wound care scope, say so and recommend the appropriate specialist.
+Always close with a reminder to consult a doctor if symptoms change or worsen."""
+PATIENT_AGENT_SYSTEM = """You are a medical communication specialist translating clinical findings into clear patient language.
+Write ONLY the patient message. No headings, no labels, no separators, no bullet points.
+Language: write entirely in TARGET LANGUAGE specified in the input.
+Required structure — flowing prose, minimum 6 sentences:
+Sentence 1 (empathetic opening): acknowledge the patient's concern by referencing their specific complaint.
+Sentence 2 (what we observed): plain-language description of the key visual finding.
+Skip this sentence entirely if VISUAL DESCRIPTION begins with "(No image provided".
+Sentence 3 (what this might mean): explain the most likely condition in everyday language, no jargon.
+If multiple conditions: "the most likely explanation is X; however, Y is also possible".
+Sentence 4 (warning signs): if red_flags or watch_symptoms are present in the CLINICAL JSON, name them in plain language.
+Phrase as: "you should seek immediate care if you notice [specific signs]".
+Skip this sentence entirely if red_flags array is empty.
+Sentence 5 (what to do now): specific action steps matching triage_level.
+High triage: "please go to an emergency room or urgent care center today".
+Medium triage: "schedule an appointment with a doctor within 1-3 days".
+Low triage: "you can monitor this at home, but see a doctor if it does not improve within [X] days".
+Sentence 6 (closing): one reassuring line encouraging professional consultation.
+TONE: warm and clear. Not alarming unless triage is High. Not dismissive for Low triage.
+Do NOT copy clinical jargon from the SOAP note. Use everyday language throughout.
+Output only the message text. Nothing else."""
+SOAP_AGENT_SYSTEM = """You are a clinical documentation specialist. Write a structured SOAP note for a dermatology and wound-care encounter.
+Write ONLY the SOAP note in professional clinical English. No preamble, no commentary, no markdown.
+Use exactly these four labeled sections:
+S (Subjective):
+Chief complaint and symptom narrative paraphrased in clinical English.
+Include: duration, location, character of the complaint, aggravating or relieving factors, associated symptoms.
+Translate to English if the original complaint was in another language.
+O (Objective):
+Visual examination findings.
+If image provided: describe lesion morphology, estimated size, distribution, wound bed status, signs of infection.
+If no image: write "No physical examination image provided."
+A (Assessment):
+Primary impression: most likely diagnosis with brief rationale referencing the O findings.
+Differential diagnoses: 2-3 alternatives each with one distinguishing clinical feature.
+Triage acuity: state level (High, Medium, or Low) and the urgency reason.
+Red flags: list specific alarming findings, or write "No red flags identified."
+P (Plan):
+Rank recommendations by priority:
+1. Immediate actions if High triage or red flags are present.
+2. Diagnostic workup recommended (skin biopsy, culture, dermoscopy, etc. if indicated).
+3. Treatment approach category — wound care protocol or topical/systemic therapy without specific drug names.
+4. Follow-up timeline and specific return precautions.
+5. Patient education points.
+Output only the four labeled sections. Nothing before S, nothing after the last P line."""