# mediagent/agents/advisor.py """ Clinical Advisor Agent for MediAgent. Post-report interactive Q&A. Answers follow-up clinical questions from radiologists/clinicians in full context of the generated report. Acts as a 24/7 senior radiology consultant. """ import logging import re from typing import Optional from core.llm import LLMClient from core.models import FinalReport logger = logging.getLogger(__name__) class ClinicalAdvisorAgent: """ Interactive clinical consultation agent activated after report generation. Answers follow-up questions with access to all pipeline outputs. Scope is limited to radiological interpretation — no treatment prescriptions. """ SYSTEM_PROMPT = """You are a senior radiologist consultant. Answer the clinician's question directly and concisely — 2-4 sentences maximum. No preamble, no thinking out loud, no reasoning steps. Just the answer. Rules: reference report findings; no fabrication; no medications/dosages; formal radiological tone. If management decision, end with "Clinical correlation recommended." """ def __init__(self, llm_client: Optional[LLMClient] = None): self.llm = llm_client or LLMClient() def answer(self, question: str, report: FinalReport) -> str: """ Answer a follow-up clinical question in the context of the generated report. Args: question: Free-text clinical question from the user report: The FinalReport from the pipeline Returns: str: Clinical answer text """ logger.info("💬 Clinical Advisor processing question: %.80s", question) sections = report.sections severity = report.overall_severity.value if hasattr(report.overall_severity, "value") else str(report.overall_severity) # Send only the most relevant report fields — less tokens = faster response context = ( f"Severity: {severity} | Impression: {sections.impression} | " f"Findings: {sections.findings[:600]} | " f"Recommendations: {sections.recommendations[:300]}" ) prompt = f"Report: {context}\n\nQuestion: {question}\n\nAnswer:" result = self.llm.generate_text( prompt=prompt, system_prompt=self.SYSTEM_PROMPT, temperature=0.0, max_tokens=200, # Disable Qwen3 thinking/reasoning mode entirely extra_body={"chat_template_kwargs": {"enable_thinking": False}}, ) if result.get("success") and result.get("content"): answer = self._strip_thinking(result["content"].strip()) logger.info("✅ Clinical Advisor answered | tokens=%s", result.get("usage", {}).get("total_tokens", 0)) return answer logger.warning("⚠️ Clinical Advisor LLM call failed") return "Unable to process this question. Please review the report directly and consult a licensed radiologist." @staticmethod def _strip_thinking(text: str) -> str: """ Remove all thinking/reasoning output that Qwen and similar models emit. Handles both structured tags and plain-text reasoning patterns. """ # Remove ... XML blocks text = re.sub(r".*?", "", text, flags=re.DOTALL) # Remove ```think ... ``` markdown blocks text = re.sub(r"```think.*?```", "", text, flags=re.DOTALL) # Remove plain-text reasoning preambles Qwen3 emits without tags: # "Here's a thinking process: 1. ..." or "Let me think: ..." etc. text = re.sub( r"(?i)^(here'?s?\s+(a\s+)?thinking\s+process:?|let me (think|analyze|consider):?|thinking:?).*?(\n\n|\Z)", "", text, flags=re.DOTALL ) # Remove numbered reasoning lists at the start (1. **Title:** ...) text = re.sub(r"^(\s*\d+\.\s+\*\*[^*]+\*\*:.*\n?)+", "", text, flags=re.MULTILINE) # If after stripping we have a clear section break, take only what's after it if "\n\n" in text: parts = [p.strip() for p in text.split("\n\n") if p.strip()] # Take the last substantial chunk (the actual answer) for part in reversed(parts): if len(part) > 20: return part return text.strip()