| import datetime |
| import streamlit as st |
| import base64 |
| import uuid |
| import time |
| from langchain_core.messages import HumanMessage, ToolMessage |
| from services.ai_service import get_response_stream |
| from services.mcp_service import run_agent |
| from services.chat_service import get_current_chat, _append_message_to_session |
| from services.export_service import export_chat_to_markdown, export_chat_to_json |
| from services.logging_service import get_logger |
| from services.task_monitor import get_task_monitor |
| from utils.async_helpers import run_async |
| from utils.ai_prompts import make_system_prompt, make_main_prompt |
| import ui_components.sidebar_components as sd_compents |
| from ui_components.main_components import display_tool_executions |
| from config import DEFAULT_MAX_TOKENS, DEFAULT_TEMPERATURE |
| import traceback |
|
|
|
|
| def replace_citation(match, citation_to_doc, doc_id_to_info): |
| """Replace citation markers with formatted citations""" |
| citation_num = int(match.group(1)) |
| if citation_num in citation_to_doc: |
| doc_id = citation_to_doc[citation_num] |
| if doc_id in doc_id_to_info: |
| doc_info = doc_id_to_info[doc_id] |
| title = doc_info.get('title', 'N/A') |
| return f"([{citation_num}]({doc_info.get('url', '#')} \"{title}\"))" |
| return match.group(0) |
|
|
|
|
| def replace_footnote_citation(match, citation_to_doc, doc_id_to_info): |
| """Replace footnote citation markers with formatted citations""" |
| citation_num = int(match.group(1)) |
| if citation_num in citation_to_doc: |
| doc_id = citation_to_doc[citation_num] |
| if doc_id in doc_id_to_info: |
| doc_info = doc_id_to_info[doc_id] |
| title = doc_info.get('title', 'N/A') |
| return f"([{citation_num}]({doc_info.get('url', '#')} \"{title}\"))" |
| return match.group(0) |
|
|
|
|
| def replace_document_citation(match, citation_to_doc, doc_id_to_info): |
| """Replace document citation markers with formatted citations""" |
| citation_num = int(match.group(1)) |
| if citation_num in citation_to_doc: |
| doc_id = citation_to_doc[citation_num] |
| if doc_id in doc_id_to_info: |
| doc_info = doc_id_to_info[doc_id] |
| title = doc_info.get('title', 'N/A') |
| return f"([{citation_num}]({doc_info.get('url', '#')} \"{title}\"))" |
| return match.group(0) |
|
|
|
|
| def extract_bio_final_answer(raw: str) -> str | None: |
| """ |
| Extract the final answer from bio_qa_stream_chat ToolMessage text marked with |
| 'Bio-QA-final-Answer:' (note the Chinese full-width colon). |
| Compatible with two scenarios: |
| A) SSE stream: Multiple lines containing 'data: {...}' JSON |
| B) Plain text/code blocks: First appears ```bio-...``` code block, final answer appears at the end |
| Returns plain text answer; returns None if not found. |
| """ |
| if not raw: |
| return None |
|
|
| marker = "Bio-QA-final-Answer:" |
|
|
| |
| if "data:" in raw: |
| final = [] |
| for line in raw.splitlines(): |
| line = line.strip() |
| if not line.startswith("data: "): |
| continue |
| |
| try: |
| import json |
| data = json.loads(line[6:]) |
| except Exception: |
| continue |
| if data.get("type") == "result": |
| content = str(data.get("content", "")) |
| if content.startswith(marker): |
| |
| final_text = content[len(marker):].strip() |
| final.append(final_text) |
| elif data.get("type") == "done": |
| |
| break |
| if final: |
| |
| return final[-1].strip() |
|
|
| |
| idx = raw.rfind(marker) |
| if idx != -1: |
| final_text = raw[idx + len(marker):].strip() |
| |
| if final_text.startswith("```"): |
| |
| final_text = final_text.lstrip("`") |
| |
| final_text = final_text.rstrip("`").strip() |
| return final_text or None |
|
|
| return None |
|
|
|
|
| def extract_review_final_report(raw: str) -> str | None: |
| """ |
| Extract the final report content from review_generate ToolMessage text marked with |
| 'Final_report\n'. |
| Compatible with two scenarios: |
| A) SSE stream: Multiple lines containing 'data: {...}' JSON |
| B) Plain text: Directly find content after Final_report\n marker |
| Returns plain text report; returns None if not found. |
| """ |
| if not raw: |
| return None |
|
|
| marker = "Final_report\n" |
|
|
| |
| if "data:" in raw: |
| final_content = [] |
| found_marker = False |
| for line in raw.splitlines(): |
| line = line.strip() |
| if not line.startswith("data: "): |
| continue |
| |
| try: |
| import json |
| data = json.loads(line[6:]) |
| except Exception: |
| continue |
| if data.get("type") == "result": |
| content = str(data.get("content", "")) |
| if content == marker: |
| found_marker = True |
| continue |
| elif found_marker: |
| |
| final_content.append(content) |
| elif data.get("type") == "done": |
| |
| break |
| if final_content: |
| return "".join(final_content).strip() |
|
|
| |
| idx = raw.find(marker) |
| if idx != -1: |
| final_text = raw[idx + len(marker):].strip() |
| |
| if final_text.startswith("```"): |
| |
| final_text = final_text.lstrip("`") |
| |
| final_text = final_text.rstrip("`").strip() |
| return final_text or None |
|
|
| return None |
|
|
|
|
| def create_download_button(content: str, filename: str, file_type: str = "md", tool_type: str = "literature_review"): |
| """ |
| Create a download button that supports downloading as Markdown or PDF format |
| |
| Args: |
| content: Content to download |
| filename: Filename (without extension) |
| file_type: File type, 'md' or 'pdf' |
| tool_type: Tool type for appropriate filename generation |
| """ |
| |
| counter = st.session_state.get("download_btn_counter", 0) |
| st.session_state["download_btn_counter"] = counter + 1 |
| base_key = f"download_{tool_type}_{file_type}_{counter}" |
|
|
| |
| from datetime import datetime |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
| |
| |
| if tool_type == "bio_qa_stream_chat": |
| base_filename = "bio_qa_report" |
| elif tool_type == "review_generate": |
| base_filename = "literature_review" |
| else: |
| base_filename = filename |
| |
| filename_with_timestamp = f"{base_filename}_{timestamp}" |
| |
| if file_type == "md": |
| |
| st.download_button( |
| label=f"📥 Download as Markdown", |
| data=content, |
| file_name=f"{filename_with_timestamp}.md", |
| mime="text/markdown", |
| help="Click to download report as Markdown format", |
| key=f"{base_key}_md" |
| ) |
| elif file_type == "pdf": |
| try: |
| |
| from reportlab.lib.pagesizes import letter, A4 |
| from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak |
| from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle |
| from reportlab.lib.units import inch |
| from reportlab.lib.enums import TA_JUSTIFY, TA_LEFT |
| from io import BytesIO |
| import markdown |
| |
| |
| html_content = markdown.markdown(content, extensions=['tables', 'fenced_code']) |
| |
| |
| buffer = BytesIO() |
| doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18) |
| |
| |
| styles = getSampleStyleSheet() |
| |
| |
| title_style = ParagraphStyle( |
| 'CustomTitle', |
| parent=styles['Heading1'], |
| fontSize=16, |
| spaceAfter=30, |
| alignment=TA_LEFT |
| ) |
| |
| heading_style = ParagraphStyle( |
| 'CustomHeading', |
| parent=styles['Heading2'], |
| fontSize=14, |
| spaceAfter=12, |
| spaceBefore=20, |
| alignment=TA_LEFT |
| ) |
| |
| body_style = ParagraphStyle( |
| 'CustomBody', |
| parent=styles['Normal'], |
| fontSize=11, |
| spaceAfter=6, |
| alignment=TA_JUSTIFY |
| ) |
| |
| |
| story = [] |
| |
| |
| if tool_type == "bio_qa_stream_chat": |
| title = "Biological Q&A Report" |
| elif tool_type == "review_generate": |
| title = "Literature Review Report" |
| else: |
| title = "Report" |
| |
| story.append(Paragraph(title, title_style)) |
| story.append(Spacer(1, 12)) |
| |
| |
| from bs4 import BeautifulSoup, NavigableString |
| |
| soup = BeautifulSoup(html_content, 'html.parser') |
| |
| def element_text_with_links(element) -> str: |
| parts = [] |
| for child in element.children: |
| if isinstance(child, NavigableString): |
| parts.append(str(child)) |
| elif getattr(child, 'name', None) == 'a': |
| href = child.get('href', '#') |
| text = child.get_text(strip=True) |
| parts.append(f'<link href="{href}">{text}</link>') |
| else: |
| |
| parts.append(child.get_text(strip=False)) |
| return ''.join(parts).strip() |
| |
| for element in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'code', 'pre', 'blockquote', 'ul', 'ol', 'li']): |
| if element.name in ['h1', 'h2', 'h3']: |
| heading_text = element_text_with_links(element) |
| story.append(Paragraph(heading_text or element.get_text(), heading_style)) |
| story.append(Spacer(1, 6)) |
| elif element.name == 'p': |
| text = element_text_with_links(element) |
| if text.strip(): |
| story.append(Paragraph(text, body_style)) |
| elif element.name == 'code': |
| code_style = ParagraphStyle( |
| 'CodeText', |
| parent=body_style, |
| fontName='Courier', |
| fontSize=10, |
| backColor='#f8f9fa' |
| ) |
| story.append(Paragraph(element.get_text(), code_style)) |
| elif element.name == 'pre': |
| pre_style = ParagraphStyle( |
| 'PreText', |
| parent=body_style, |
| fontName='Courier', |
| fontSize=10, |
| backColor='#f8f9fa', |
| leftIndent=20 |
| ) |
| story.append(Paragraph(element.get_text(), pre_style)) |
| story.append(Spacer(1, 6)) |
| elif element.name == 'blockquote': |
| quote_style = ParagraphStyle( |
| 'QuoteText', |
| parent=body_style, |
| leftIndent=20, |
| leftPadding=10, |
| borderWidth=1, |
| borderColor='#3498db', |
| borderPadding=5 |
| ) |
| quote_text = element_text_with_links(element) |
| story.append(Paragraph(quote_text or element.get_text(), quote_style)) |
| story.append(Spacer(1, 6)) |
| elif element.name in ['ul', 'ol']: |
| index = 0 |
| for li in element.find_all('li', recursive=False): |
| index += 1 |
| li_text = element_text_with_links(li) |
| bullet = '• ' if element.name == 'ul' else f'{index}. ' |
| story.append(Paragraph(f'{bullet}{li_text}', body_style)) |
| story.append(Spacer(1, 6)) |
| |
| |
| doc.build(story) |
| pdf_bytes = buffer.getvalue() |
| buffer.close() |
| |
| |
| st.download_button( |
| label="📥 Download as PDF", |
| data=pdf_bytes, |
| file_name=f"{filename_with_timestamp}.pdf", |
| mime="application/pdf", |
| help="Click to download report as PDF format", |
| key=f"{base_key}_pdf" |
| ) |
| |
| except ImportError as e: |
| st.warning(f"⚠️ Cannot generate PDF: Missing required libraries. Please install reportlab and beautifulsoup4. Error: {str(e)}") |
| except Exception as e: |
| st.error(f"❌ Error generating PDF: {str(e)}") |
|
|
|
|
| def main(): |
| |
| logger = get_logger() |
| task_monitor = get_task_monitor() |
| |
| with st.sidebar: |
| st.link_button("🚀 Parameter Extraction", "https://huggingface.co/spaces/jackkuo/Automated-Enzyme-Kinetics-Extractor", type="primary") |
| st.subheader("Chat History") |
| sd_compents.create_history_chat_container() |
|
|
| |
| |
| st.header("Chat with Agent") |
| |
| messages_container = st.container(border=True, height=600) |
| |
| |
| if st.session_state.get('current_chat_id'): |
| st.session_state["messages"] = get_current_chat(st.session_state['current_chat_id']) |
| tool_count = 0 |
| |
| |
| logger.log_system_status(f"Re-rendering {len(st.session_state['messages'])} messages for chat {st.session_state['current_chat_id']}") |
| |
| |
| chat_id = st.session_state['current_chat_id'] |
| bio_data_key = f"bio_data_{chat_id}" |
| bio_data = st.session_state.get(bio_data_key, {}) |
| |
| for m in st.session_state["messages"]: |
| |
| has_tool = "tool" in m and m["tool"] |
| has_content = "content" in m and m["content"] |
| logger.log_system_status(f"Message: role={m.get('role')}, has_tool={has_tool}, has_content={has_content}") |
| |
| with messages_container.chat_message(m["role"]): |
| |
| if "tool" in m and m["tool"]: |
| tool_count += 1 |
| |
| with st.expander(f"🔧 ToolMessage - {tool_count}", expanded=False): |
| st.code(m["tool"], language='yaml') |
| |
| |
| if "content" in m and m["content"]: |
| content_text = str(m["content"]) |
| |
| |
| if (m["role"] == "assistant" and |
| bio_data.get('has_bio_final_answer') and |
| bio_data.get('bio_final_answer_content') == content_text): |
| |
| |
| bio_search_data = bio_data.get('bio_search_data', []) |
| bio_citation_data = bio_data.get('bio_citation_data', []) |
| web_search_data = bio_data.get('web_search_data', []) |
| |
| |
| if bio_search_data or web_search_data: |
| total_bio_docs = sum(len(data.get('handlerParam', {}).get('bioDocs', [])) for data in bio_search_data) |
| total_web_docs = sum(len(data.get('handlerParam', {}).get('bioDocs', [])) for data in web_search_data) |
| if total_bio_docs > 0 and total_web_docs > 0: |
| st.markdown(f"### 📚 Analysis based on {total_bio_docs} scientific papers and {total_web_docs} web pages") |
| elif total_bio_docs > 0: |
| st.markdown(f"### 📚 Analysis based on {total_bio_docs} scientific papers") |
| else: |
| st.markdown(f"### 🌐 Analysis based on {total_web_docs} web pages") |
| |
| st.markdown("### 🎯 Final Answer") |
| |
| |
| processed_answer = content_text |
| if bio_citation_data and (bio_search_data or web_search_data): |
| |
| doc_id_to_info = {} |
| |
| for search_data in bio_search_data: |
| bio_docs = search_data.get('handlerParam', {}).get('bioDocs', []) |
| for doc in bio_docs: |
| doc_id_to_info[doc.get('docId')] = doc |
| |
| for search_data in web_search_data: |
| web_docs = search_data.get('handlerParam', {}).get('bioDocs', []) |
| for doc in web_docs: |
| doc_id_to_info[doc.get('docId')] = doc |
| |
| |
| citation_to_doc = {} |
| for citation in bio_citation_data: |
| citation_num = citation.get('citation') |
| doc_id = citation.get('docId') |
| citation_to_doc[citation_num] = doc_id |
| |
| |
| import re |
| |
| |
| def replace_citation_local(match): |
| return replace_citation(match, citation_to_doc, doc_id_to_info) |
| processed_answer = re.sub(r'\[bio-rag-citation:(\d+)\]', replace_citation_local, processed_answer) |
| |
| def replace_footnote_citation_local(match): |
| return replace_footnote_citation(match, citation_to_doc, doc_id_to_info) |
| processed_answer = re.sub(r'\[\^(\d+)\]', replace_footnote_citation_local, processed_answer) |
| |
| def replace_document_citation_local(match): |
| return replace_document_citation(match, citation_to_doc, doc_id_to_info) |
| processed_answer = re.sub(r'\[document (\d+)\]', replace_document_citation_local, processed_answer) |
| |
| |
| processed_answer = re.sub(r'\n\nReferences:.*$', '', processed_answer, flags=re.DOTALL) |
| |
| |
| processed_answer = re.sub(r'\](\[)', r'], \1', processed_answer) |
| |
| st.markdown(processed_answer) |
| |
| |
| if bio_citation_data: |
| st.markdown(f"### 📖 References ({len(bio_citation_data)} citations)") |
| |
| |
| doc_id_to_info = {} |
| |
| for search_data in bio_search_data: |
| bio_docs = search_data.get('handlerParam', {}).get('bioDocs', []) |
| for doc in bio_docs: |
| doc_id_to_info[doc.get('docId')] = doc |
| |
| for search_data in web_search_data: |
| web_docs = search_data.get('handlerParam', {}).get('bioDocs', []) |
| for doc in web_docs: |
| doc_id_to_info[doc.get('docId')] = doc |
| |
| |
| for citation in bio_citation_data: |
| doc_id = citation.get('docId') |
| citation_num = citation.get('citation') |
| source = citation.get('source', '') |
| |
| if doc_id in doc_id_to_info: |
| doc_info = doc_id_to_info[doc_id] |
| title = doc_info.get('title', 'N/A') |
| url = doc_info.get('url', '#') |
| |
| if source == 'webSearch': |
| st.markdown(f"[{citation_num}] {title}. [Link]({url})") |
| else: |
| author = doc_info.get('author', 'N/A') |
| journal = doc_info.get('JournalInfo', 'N/A') |
| |
| authors = author.split(', ') |
| if len(authors) > 3: |
| display_author = ', '.join(authors[:3]) + ' et al.' |
| else: |
| display_author = author |
| |
| st.markdown(f"[{citation_num}] {display_author}. {title}. {journal}. [Link]({url})") |
| else: |
| st.markdown(f"[{citation_num}] Document ID: {doc_id}") |
| else: |
| |
| st.markdown(content_text) |
| |
| |
| if m["role"] == "assistant" and m["content"]: |
| |
| content_text = str(m["content"]) |
| if ("Literature Review Report" in content_text or |
| "📚 Literature Review Report" in content_text or |
| len(content_text) > 500): |
| |
| st.markdown("---") |
| st.markdown("### 📥 Download Options") |
| col1, col2 = st.columns(2) |
| with col1: |
| create_download_button(content_text, "literature_review", "md", "bio_qa_stream_chat") |
| with col2: |
| create_download_button(content_text, "literature_review", "pdf", "bio_qa_stream_chat") |
|
|
| |
| user_text = st.chat_input("Ask a question or explore available MCP tools") |
|
|
| |
| |
| sd_compents.create_sidebar_chat_buttons() |
| sd_compents.create_provider_select_widget() |
| sd_compents.create_advanced_configuration_widget() |
| sd_compents.create_mcp_connection_widget() |
| sd_compents.create_mcp_tools_widget() |
|
|
| |
| if user_text is None: |
| st.stop() |
| |
| params = st.session_state.get('params') |
| if not ( |
| params.get('api_key') or |
| ( params.get('model_id') == 'Bedrock' and |
| params.get('region_name') and |
| params.get('aws_access_key') and |
| params.get('aws_secret_key') |
| ) |
| ): |
| err_mesg = "❌ Missing credentials: provide either an API key or complete AWS credentials." |
| _append_message_to_session({"role": "assistant", "content": err_mesg}) |
| with messages_container.chat_message("assistant"): |
| st.markdown(err_mesg) |
| st.rerun() |
|
|
| |
| if user_text: |
| |
| logger.log_chat_message("user", user_text, st.session_state.get('current_chat_id')) |
| |
| user_text_dct = {"role": "user", "content": user_text} |
| _append_message_to_session(user_text_dct) |
| with messages_container.chat_message("user"): |
| st.markdown(user_text) |
|
|
| with st.spinner("Thinking…", show_time=True): |
| |
| task_id = str(uuid.uuid4()) |
| task_monitor.start_monitoring( |
| task_id, |
| f"MCP_Agent_Response_{st.session_state.get('current_chat_id', 'unknown')}", |
| st.session_state.get('current_chat_id') |
| ) |
| |
| start_time = time.time() |
| system_prompt = make_system_prompt() |
| main_prompt = make_main_prompt(user_text) |
| try: |
| |
| if st.session_state.agent: |
| logger.log_system_status("Using MCP agent for response") |
| |
| |
| available_tools = [tool.name for tool in st.session_state.tools] |
| logger.log_mcp_agent_usage("ReactAgent", available_tools, st.session_state.get('current_chat_id')) |
| |
| response = run_async(run_agent(st.session_state.agent, user_text)) |
| tool_output = None |
| tools_used_in_response = [] |
| |
| |
| if "messages" in response: |
| logger.log_system_status(f"Processing {len(response['messages'])} messages from agent response") |
| for msg in response["messages"]: |
| |
| msg_type = type(msg).__name__ |
| logger.log_system_status(f"Processing message type: {msg_type}") |
| |
| |
| if hasattr(msg, 'tool_calls') and msg.tool_calls: |
| logger.log_system_status(f"Found tool calls: {msg.tool_calls}") |
| for tool_call in msg.tool_calls: |
| tools_used_in_response.append(tool_call['name']) |
| |
| |
| logger.log_mcp_tool_call( |
| tool_call['name'], |
| tool_call['args'], |
| st.session_state.get('current_chat_id') |
| ) |
| |
| |
| tool_output = next( |
| (m.content for m in response["messages"] |
| if isinstance(m, ToolMessage) and |
| m.tool_call_id == tool_call['id']), |
| None |
| ) |
| if tool_output: |
| |
| logger.log_mcp_tool_response( |
| tool_call['name'], |
| tool_output, |
| st.session_state.get('current_chat_id') |
| ) |
| |
| st.session_state.tool_executions.append({ |
| "tool_name": tool_call['name'], |
| "input": tool_call['args'], |
| "output": tool_output, |
| "timestamp": datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') |
| }) |
| elif hasattr(msg, 'name') and msg.name: |
| logger.log_system_status(f"Found ToolMessage: {msg.name}") |
| else: |
| logger.log_system_status(f"Message has no tool calls or name: {msg}") |
| |
| |
| if tools_used_in_response: |
| logger.log_mcp_agent_usage("Response", tools_used_in_response, st.session_state.get('current_chat_id')) |
| else: |
| logger.log_system_status("No MCP tools used in this response") |
| |
| output = "" |
| tool_count = 0 |
| |
| chat_id = st.session_state.get('current_chat_id') |
| bio_data_key = f"bio_data_{chat_id}" if chat_id else "bio_data_default" |
| |
| if bio_data_key not in st.session_state: |
| st.session_state[bio_data_key] = { |
| 'bio_final_answer_content': "", |
| 'has_bio_final_answer': False, |
| 'review_final_report_content': "", |
| 'has_review_final_report': False, |
| 'bio_search_data': [], |
| 'bio_citation_data': [], |
| 'web_search_data': [] |
| } |
| |
| |
| bio_data = st.session_state[bio_data_key] |
| bio_final_answer_content = bio_data['bio_final_answer_content'] |
| has_bio_final_answer = bio_data['has_bio_final_answer'] |
| review_final_report_content = bio_data['review_final_report_content'] |
| has_review_final_report = bio_data['has_review_final_report'] |
| bio_search_data = bio_data['bio_search_data'] |
| bio_citation_data = bio_data['bio_citation_data'] |
| web_search_data = bio_data['web_search_data'] |
| |
| if "messages" in response: |
| for msg in response["messages"]: |
| if isinstance(msg, HumanMessage): |
| continue |
| elif hasattr(msg, 'name') and msg.name: |
| tool_count += 1 |
| with messages_container.chat_message("assistant"): |
| |
| if (msg.name == "bio_qa_stream_chat" or msg.name == "review_generate" or msg.name == "health_check") and "data:" in msg.content: |
| if msg.name == "bio_qa_stream_chat": |
| st.write("**🔬 Biological Q&A Results:**") |
| elif msg.name == "review_generate": |
| st.write("**📚 Literature Review Generation:**") |
| elif msg.name == "health_check": |
| st.write("**🏥 Health Check Results:**") |
| |
| |
| lines = msg.content.split('\n') |
| handled_final_answer = False |
| handled_final_report = False |
| final_report_content = [] |
| for line in lines: |
| if line.startswith('data: '): |
| try: |
| import json |
| data = json.loads(line[6:]) |
| if data.get('type') == 'result': |
| content = data.get('content', '') |
| |
| if content.startswith("Bio-QA-final-Answer:") and not handled_final_answer: |
| |
| bio_final_answer_content = content.replace("Bio-QA-final-Answer:", "").strip() |
| |
| bio_data['bio_final_answer_content'] = bio_final_answer_content |
| bio_data['has_bio_final_answer'] = True |
| st.session_state[bio_data_key] = bio_data |
| |
| |
| output = bio_final_answer_content |
| |
| has_bio_final_answer = True |
| |
| st.markdown("---") |
| |
| if bio_search_data or web_search_data: |
| total_bio_docs = sum(len(data.get('handlerParam', {}).get('bioDocs', [])) for data in bio_search_data) |
| total_web_docs = sum(len(data.get('handlerParam', {}).get('bioDocs', [])) for data in web_search_data) |
| |
| if total_bio_docs > 0 and total_web_docs > 0: |
| st.markdown(f"### 📚 Analysis based on {total_bio_docs} scientific papers and {total_web_docs} web pages") |
| elif total_bio_docs > 0: |
| st.markdown(f"### 📚 Analysis based on {total_bio_docs} scientific papers") |
| else: |
| st.markdown(f"### 🌐 Analysis based on {total_web_docs} web pages") |
| |
|
|
| |
| st.markdown("### 🎯 Final Answer") |
| |
| |
| processed_answer = bio_final_answer_content |
| if bio_citation_data and (bio_search_data or web_search_data): |
| |
| doc_id_to_info = {} |
| |
| for search_data in bio_search_data: |
| bio_docs = search_data.get('handlerParam', {}).get('bioDocs', []) |
| for doc in bio_docs: |
| doc_id_to_info[doc.get('docId')] = doc |
| |
| for search_data in web_search_data: |
| web_docs = search_data.get('handlerParam', {}).get('bioDocs', []) |
| for doc in web_docs: |
| doc_id_to_info[doc.get('docId')] = doc |
| |
| |
| citation_to_doc = {} |
| for citation in bio_citation_data: |
| citation_num = citation.get('citation') |
| doc_id = citation.get('docId') |
| citation_to_doc[citation_num] = doc_id |
| |
| |
| import re |
| |
| processed_answer = re.sub(r'\[bio-rag-citation:(\d+)\]', replace_citation, processed_answer) |
| |
| processed_answer = re.sub(r'\[\^(\d+)\]', replace_footnote_citation, processed_answer) |
| |
| processed_answer = re.sub(r'\[document (\d+)\]', replace_document_citation, processed_answer) |
| |
| |
| processed_answer = re.sub(r'\n\nReferences:.*$', '', processed_answer, flags=re.DOTALL) |
| |
| |
| processed_answer = re.sub(r'\](\[)', r'], \1', processed_answer) |
| |
| st.markdown(processed_answer) |
| |
| |
| if bio_citation_data: |
| st.markdown(f"### 📖 References ({len(bio_citation_data)} citations)") |
| |
| |
| doc_id_to_info = {} |
| |
| for search_data in bio_search_data: |
| bio_docs = search_data.get('handlerParam', {}).get('bioDocs', []) |
| for doc in bio_docs: |
| doc_id_to_info[doc.get('docId')] = doc |
| |
| for search_data in web_search_data: |
| web_docs = search_data.get('handlerParam', {}).get('bioDocs', []) |
| for doc in web_docs: |
| doc_id_to_info[doc.get('docId')] = doc |
| |
| |
| for citation in bio_citation_data: |
| doc_id = citation.get('docId') |
| citation_num = citation.get('citation') |
| source = citation.get('source', '') |
| |
| if doc_id in doc_id_to_info: |
| doc_info = doc_id_to_info[doc_id] |
| title = doc_info.get('title', 'N/A') |
| url = doc_info.get('url', '#') |
| |
| if source == 'webSearch': |
| |
| st.markdown(f"[{citation_num}] {title}. [Link]({url})") |
| else: |
| |
| author = doc_info.get('author', 'N/A') |
| journal = doc_info.get('JournalInfo', 'N/A') |
| |
| |
| authors = author.split(', ') |
| if len(authors) > 3: |
| display_author = ', '.join(authors[:3]) + ' et al.' |
| else: |
| display_author = author |
| |
| st.markdown(f"[{citation_num}] {display_author}. {title}. {journal}. [Link]({url})") |
| else: |
| st.markdown(f"[{citation_num}] Document ID: {doc_id}") |
| |
| |
| complete_content = "" |
| |
| |
| if bio_search_data or web_search_data: |
| total_bio_docs = sum(len(data.get('handlerParam', {}).get('bioDocs', [])) for data in bio_search_data) |
| total_web_docs = sum(len(data.get('handlerParam', {}).get('bioDocs', [])) for data in web_search_data) |
| if total_bio_docs > 0 and total_web_docs > 0: |
| complete_content += f"### 📚 Analysis based on {total_bio_docs} scientific papers and {total_web_docs} web pages\n\n" |
| elif total_bio_docs > 0: |
| complete_content += f"### 📚 Analysis based on {total_bio_docs} scientific papers\n\n" |
| else: |
| complete_content += f"### 🌐 Analysis based on {total_web_docs} web pages\n\n" |
| |
| |
| complete_content += "### 🎯 Final Answer\n\n" |
| complete_content += processed_answer + "\n\n" |
| |
| |
| if bio_citation_data: |
| complete_content += f"### 📖 References ({len(bio_citation_data)} citations)\n\n" |
| |
| |
| doc_id_to_info = {} |
| |
| for search_data in bio_search_data: |
| bio_docs = search_data.get('handlerParam', {}).get('bioDocs', []) |
| for doc in bio_docs: |
| doc_id_to_info[doc.get('docId')] = doc |
| |
| for search_data in web_search_data: |
| web_docs = search_data.get('handlerParam', {}).get('bioDocs', []) |
| for doc in web_docs: |
| doc_id_to_info[doc.get('docId')] = doc |
| |
| |
| for citation in bio_citation_data: |
| doc_id = citation.get('docId') |
| citation_num = citation.get('citation') |
| source = citation.get('source', '') |
| |
| if doc_id in doc_id_to_info: |
| doc_info = doc_id_to_info[doc_id] |
| title = doc_info.get('title', 'N/A') |
| url = doc_info.get('url', '#') |
| |
| if source == 'webSearch': |
| complete_content += f"[{citation_num}] {title}. [Link]({url})\n\n" |
| else: |
| author = doc_info.get('author', 'N/A') |
| journal = doc_info.get('JournalInfo', 'N/A') |
| |
| authors = author.split(', ') |
| if len(authors) > 3: |
| display_author = ', '.join(authors[:3]) + ' et al.' |
| else: |
| display_author = author |
| |
| complete_content += f"[{citation_num}] {display_author}. {title}. {journal}. [Link]({url})\n\n" |
| else: |
| complete_content += f"[{citation_num}] Document ID: {doc_id}\n\n" |
| |
| |
| st.markdown("---") |
| st.markdown("### 📥 Download Options") |
| col1, col2 = st.columns(2) |
| with col1: |
| create_download_button(complete_content, "bio_qa_report", "md", "bio_qa_stream_chat") |
| with col2: |
| create_download_button(complete_content, "bio_qa_report", "pdf", "bio_qa_stream_chat") |
| |
| |
| _append_message_to_session({'role': 'assistant', 'content': complete_content}) |
| |
| |
| st.rerun() |
|
|
| handled_final_answer = True |
| |
| elif content == "Final_report\n" and not handled_final_report: |
| handled_final_report = True |
| |
| continue |
| elif handled_final_report: |
| |
| final_report_content.append(content) |
| else: |
| |
| try: |
| import json |
| json_data = json.loads(content) |
| if json_data.get("type") == "search" and json_data.get("handler") == "QASearch": |
| handler_param = json_data.get('handlerParam', {}) |
| source = handler_param.get('source', '') |
| if source == 'pubmed': |
| bio_search_data.append(json_data) |
| |
| bio_data['bio_search_data'] = bio_search_data |
| st.session_state[bio_data_key] = bio_data |
| st.write(f"🔍 Found {len(handler_param.get('bioDocs', []))} relevant papers") |
| elif source == 'webSearch': |
| web_search_data.append(json_data) |
| |
| bio_data['web_search_data'] = web_search_data |
| st.session_state[bio_data_key] = bio_data |
| st.write(f"🌐 Found {len(handler_param.get('bioDocs', []))} relevant web pages") |
| elif isinstance(json_data, list) and len(json_data) > 0 and "source" in json_data[0] and "citation" in json_data[0]: |
| |
| bio_citation_data.extend(json_data) |
| |
| bio_data['bio_citation_data'] = bio_citation_data |
| st.session_state[bio_data_key] = bio_data |
| st.write(f"📝 Generated citation information, {len(json_data)} citations total") |
| else: |
| st.write(content) |
| except json.JSONDecodeError: |
| |
| st.write(content) |
| elif data.get('type') == 'done': |
| st.success("✅ Answer completed") |
| except json.JSONDecodeError: |
| continue |
| |
| |
| if handled_final_report and final_report_content: |
| review_final_report_content = "".join(final_report_content).strip() |
| |
| |
| with st.expander(f"🔧 ToolMessage - {tool_count} ({msg.name})", expanded=False): |
| st.code(msg.content, language='yaml') |
| |
| |
| with messages_container.chat_message("assistant"): |
| st.markdown("---") |
| st.markdown("### 📚 Literature Review Report") |
| st.markdown(review_final_report_content) |
| |
| |
| st.markdown("---") |
| st.markdown("### 📥 Download Options") |
| col1, col2 = st.columns(2) |
| with col1: |
| create_download_button(review_final_report_content, "literature_review", "md", "review_generate") |
| with col2: |
| create_download_button(review_final_report_content, "literature_review", "pdf", "review_generate") |
| |
| |
| has_review_final_report = True |
| output = review_final_report_content |
| |
| |
| _append_message_to_session({'role': 'assistant', 'content': review_final_report_content}) |
| |
| _append_message_to_session({'role': 'assistant', 'content': '', 'tool': msg.content}) |
| |
| |
| st.rerun() |
| else: |
| |
| with st.expander(f"🔧 ToolMessage - {tool_count} ({msg.name})", expanded=False): |
| st.code(msg.content, language='yaml') |
| _append_message_to_session({'role': 'assistant', 'content': '', 'tool': msg.content}) |
| else: |
| |
| if msg.name == "bio_qa_stream_chat": |
| |
| try: |
| import json |
| import re |
| |
| json_matches = re.findall(r'```bio-chat-agent-task\n(.*?)\n```', msg.content, re.DOTALL) |
| for json_str in json_matches: |
| try: |
| json_data = json.loads(json_str) |
| if json_data.get("type") == "search" and json_data.get("handler") == "QASearch": |
| handler_param = json_data.get('handlerParam', {}) |
| source = handler_param.get('source', '') |
| if source == 'pubmed': |
| bio_search_data.append(json_data) |
| |
| bio_data['bio_search_data'] = bio_search_data |
| st.session_state[bio_data_key] = bio_data |
| elif source == 'webSearch': |
| web_search_data.append(json_data) |
| |
| bio_data['web_search_data'] = web_search_data |
| st.session_state[bio_data_key] = bio_data |
| except json.JSONDecodeError: |
| continue |
| |
| |
| citation_matches = re.findall(r'```bio-resource-lookup\n(.*?)\n```', msg.content, re.DOTALL) |
| for citation_str in citation_matches: |
| try: |
| citation_data = json.loads(citation_str) |
| if isinstance(citation_data, list) and len(citation_data) > 0 and "source" in citation_data[0] and "citation" in citation_data[0]: |
| bio_citation_data.extend(citation_data) |
| |
| bio_data['bio_citation_data'] = bio_citation_data |
| st.session_state[bio_data_key] = bio_data |
| except json.JSONDecodeError: |
| continue |
| except Exception: |
| pass |
| |
| extracted = extract_bio_final_answer(msg.content) |
| if extracted: |
| |
| with st.expander(f"🔧 ToolMessage - {tool_count} ({msg.name})", expanded=False): |
| st.code(msg.content, language='yaml') |
| |
| |
| with messages_container.chat_message("assistant"): |
| |
| if bio_search_data or web_search_data: |
| total_bio_docs = sum(len(data.get('handlerParam', {}).get('bioDocs', [])) for data in bio_search_data) |
| total_web_docs = sum(len(data.get('handlerParam', {}).get('bioDocs', [])) for data in web_search_data) |
| total_docs = total_bio_docs + total_web_docs |
| if total_bio_docs > 0 and total_web_docs > 0: |
| st.markdown(f"### 📚 Analysis based on {total_bio_docs} scientific papers and {total_web_docs} web pages") |
| elif total_bio_docs > 0: |
| st.markdown(f"### 📚 Analysis based on {total_bio_docs} scientific papers") |
| else: |
| st.markdown(f"### 🌐 Analysis based on {total_web_docs} web pages") |
| |
|
|
| |
| st.markdown("### 🎯 Final Answer") |
| |
| |
| processed_answer = extracted |
| if bio_citation_data and (bio_search_data or web_search_data): |
| |
| doc_id_to_info = {} |
| |
| for search_data in bio_search_data: |
| bio_docs = search_data.get('handlerParam', {}).get('bioDocs', []) |
| for doc in bio_docs: |
| doc_id_to_info[doc.get('docId')] = doc |
| |
| for search_data in web_search_data: |
| web_docs = search_data.get('handlerParam', {}).get('bioDocs', []) |
| for doc in web_docs: |
| doc_id_to_info[doc.get('docId')] = doc |
| |
| |
| citation_to_doc = {} |
| for citation in bio_citation_data: |
| citation_num = citation.get('citation') |
| doc_id = citation.get('docId') |
| citation_to_doc[citation_num] = doc_id |
| |
| |
| import re |
| |
| def replace_citation_local2(match): |
| return replace_citation(match, citation_to_doc, doc_id_to_info) |
| processed_answer = re.sub(r'\[bio-rag-citation:(\d+)\]', replace_citation_local2, processed_answer) |
| |
| def replace_footnote_citation_local2(match): |
| return replace_footnote_citation(match, citation_to_doc, doc_id_to_info) |
| processed_answer = re.sub(r'\[\^(\d+)\]', replace_footnote_citation_local2, processed_answer) |
| |
| def replace_document_citation_local2(match): |
| return replace_document_citation(match, citation_to_doc, doc_id_to_info) |
| processed_answer = re.sub(r'\[document (\d+)\]', replace_document_citation_local2, processed_answer) |
| |
| |
| processed_answer = re.sub(r'\n\nReferences:.*$', '', processed_answer, flags=re.DOTALL) |
| |
| |
| processed_answer = re.sub(r'\](\[)', r'], \1', processed_answer) |
| |
| st.markdown(processed_answer) |
| |
| |
| if bio_citation_data: |
| st.markdown(f"### 📖 References ({len(bio_citation_data)} citations)") |
| |
| |
| doc_id_to_info = {} |
| |
| for search_data in bio_search_data: |
| bio_docs = search_data.get('handlerParam', {}).get('bioDocs', []) |
| for doc in bio_docs: |
| doc_id_to_info[doc.get('docId')] = doc |
| |
| for search_data in web_search_data: |
| web_docs = search_data.get('handlerParam', {}).get('bioDocs', []) |
| for doc in web_docs: |
| doc_id_to_info[doc.get('docId')] = doc |
| |
| |
| for citation in bio_citation_data: |
| doc_id = citation.get('docId') |
| citation_num = citation.get('citation') |
| source = citation.get('source', '') |
| |
| if doc_id in doc_id_to_info: |
| doc_info = doc_id_to_info[doc_id] |
| title = doc_info.get('title', 'N/A') |
| url = doc_info.get('url', '#') |
| |
| if source == 'webSearch': |
| |
| st.markdown(f"[{citation_num}] {title}. [Link]({url})") |
| else: |
| |
| author = doc_info.get('author', 'N/A') |
| journal = doc_info.get('JournalInfo', 'N/A') |
| |
| |
| authors = author.split(', ') |
| if len(authors) > 3: |
| display_author = ', '.join(authors[:3]) + ' et al.' |
| else: |
| display_author = author |
| |
| st.markdown(f"[{citation_num}] {display_author}. {title}. {journal}. [Link]({url})") |
| else: |
| st.markdown(f"[{citation_num}] Document ID: {doc_id}") |
| |
| |
| complete_content = "" |
| |
| |
| if bio_search_data or web_search_data: |
| total_bio_docs = sum(len(data.get('handlerParam', {}).get('bioDocs', [])) for data in bio_search_data) |
| total_web_docs = sum(len(data.get('handlerParam', {}).get('bioDocs', [])) for data in web_search_data) |
| if total_bio_docs > 0 and total_web_docs > 0: |
| complete_content += f"### 📚 Analysis based on {total_bio_docs} scientific papers and {total_web_docs} web pages\n\n" |
| elif total_bio_docs > 0: |
| complete_content += f"### 📚 Analysis based on {total_bio_docs} scientific papers\n\n" |
| else: |
| complete_content += f"### 🌐 Analysis based on {total_web_docs} web pages\n\n" |
| |
| |
| complete_content += "### 🎯 Final Answer\n\n" |
| complete_content += processed_answer + "\n\n" |
| |
| |
| if bio_citation_data: |
| complete_content += f"### 📖 References ({len(bio_citation_data)} citations)\n\n" |
| |
| |
| doc_id_to_info = {} |
| |
| for search_data in bio_search_data: |
| bio_docs = search_data.get('handlerParam', {}).get('bioDocs', []) |
| for doc in bio_docs: |
| doc_id_to_info[doc.get('docId')] = doc |
| |
| for search_data in web_search_data: |
| web_docs = search_data.get('handlerParam', {}).get('bioDocs', []) |
| for doc in web_docs: |
| doc_id_to_info[doc.get('docId')] = doc |
| |
| |
| for citation in bio_citation_data: |
| doc_id = citation.get('docId') |
| citation_num = citation.get('citation') |
| source = citation.get('source', '') |
| |
| if doc_id in doc_id_to_info: |
| doc_info = doc_id_to_info[doc_id] |
| title = doc_info.get('title', 'N/A') |
| url = doc_info.get('url', '#') |
| |
| if source == 'webSearch': |
| complete_content += f"[{citation_num}] {title}. [Link]({url})\n\n" |
| else: |
| author = doc_info.get('author', 'N/A') |
| journal = doc_info.get('JournalInfo', 'N/A') |
| |
| authors = author.split(', ') |
| if len(authors) > 3: |
| display_author = ', '.join(authors[:3]) + ' et al.' |
| else: |
| display_author = author |
| |
| complete_content += f"[{citation_num}] {display_author}. {title}. {journal}. [Link]({url})\n\n" |
| else: |
| complete_content += f"[{citation_num}] Document ID: {doc_id}\n\n" |
| |
| |
| output = complete_content |
| bio_final_answer_content = complete_content |
| |
| has_bio_final_answer = True |
|
|
| |
| st.markdown("---") |
| st.markdown("### 📥 Download Options") |
| col1, col2 = st.columns(2) |
| with col1: |
| create_download_button(complete_content, "bio_qa_report", "md", "bio_qa_stream_chat") |
| with col2: |
| create_download_button(complete_content, "bio_qa_report", "pdf", "bio_qa_stream_chat") |
|
|
| |
| _append_message_to_session({'role': 'assistant', 'content': '', 'tool': msg.content}) |
| _append_message_to_session({'role': 'assistant', 'content': complete_content}) |
| |
| |
| st.rerun() |
|
|
| |
| logger.log_system_status(f"Saved ToolMessage for bio_qa_stream_chat: {len(msg.content)} characters") |
| logger.log_system_status(f"Current chat has {len(st.session_state.get('messages', []))} messages") |
| else: |
| |
| with st.expander(f"🔧 ToolMessage - {tool_count} ({msg.name})", expanded=False): |
| st.code(msg.content, language='yaml') |
| _append_message_to_session({'role': 'assistant', 'content': '', 'tool': msg.content}) |
| elif msg.name == "review_generate": |
| |
| extracted_report = extract_review_final_report(msg.content) |
| if extracted_report: |
| |
| with st.expander(f"🔧 ToolMessage - {tool_count} ({msg.name})", expanded=False): |
| st.code(msg.content, language='yaml') |
| |
| |
| with messages_container.chat_message("assistant"): |
| st.markdown("---") |
| st.markdown("### 📚 Literature Review Report") |
| st.markdown(extracted_report) |
| |
| |
| st.markdown("---") |
| st.markdown("### 📥 Download Options") |
| col1, col2 = st.columns(2) |
| with col1: |
| create_download_button(extracted_report, "literature_review", "md", "review_generate") |
| with col2: |
| create_download_button(extracted_report, "literature_review", "pdf", "review_generate") |
| |
| |
| output = extracted_report |
| review_final_report_content = extracted_report |
| |
| has_review_final_report = True |
|
|
| |
| _append_message_to_session({'role': 'assistant', 'content': extracted_report}) |
| |
| _append_message_to_session({'role': 'assistant', 'content': '', 'tool': msg.content}) |
| |
| |
| st.rerun() |
| else: |
| |
| with st.expander(f"🔧 ToolMessage - {tool_count} ({msg.name})", expanded=False): |
| st.code(msg.content, language='yaml') |
| _append_message_to_session({'role': 'assistant', 'content': '', 'tool': msg.content}) |
| else: |
| |
| with st.expander(f"🔧 ToolMessage - {tool_count} ({msg.name})", expanded=False): |
| st.code(msg.content, language='yaml') |
| _append_message_to_session({'role': 'assistant', 'content': '', 'tool': msg.content}) |
| else: |
| |
| if not has_bio_final_answer and not has_review_final_report and hasattr(msg, "content") and msg.content: |
| with messages_container.chat_message("assistant"): |
| output = str(msg.content) |
| st.markdown(output) |
| |
| |
| if not output and bio_final_answer_content: |
| output = bio_final_answer_content |
| if not output and review_final_report_content: |
| output = review_final_report_content |
| |
| |
| response_dct = None |
| |
| |
| if has_bio_final_answer or has_review_final_report: |
| |
| |
| if has_bio_final_answer: |
| response_dct = {"role": "assistant", "content": bio_final_answer_content} |
| logger.log_chat_message("assistant", bio_final_answer_content, st.session_state.get('current_chat_id'), has_tool=True) |
| elif has_review_final_report: |
| response_dct = {"role": "assistant", "content": review_final_report_content} |
| logger.log_chat_message("assistant", review_final_report_content, st.session_state.get('current_chat_id'), has_tool=True) |
| else: |
| response_dct = {"role": "assistant", "content": output} |
| |
| logger.log_chat_message("assistant", output, st.session_state.get('current_chat_id')) |
| |
| else: |
| st.warning("You are not connect to MCP servers!") |
| response_stream = get_response_stream( |
| main_prompt, |
| llm_provider=st.session_state['params']['model_id'], |
| system=system_prompt, |
| temperature=st.session_state['params'].get('temperature', DEFAULT_TEMPERATURE), |
| max_tokens=st.session_state['params'].get('max_tokens', DEFAULT_MAX_TOKENS), |
| ) |
| with messages_container.chat_message("assistant"): |
| response = st.write_stream(response_stream) |
| response_dct = {"role": "assistant", "content": response} |
| except Exception as e: |
| |
| task_monitor.stop_monitoring(task_id) |
| duration = time.time() - start_time |
| |
| response = f"⚠️ Something went wrong: {str(e)}" |
| logger.log_error( |
| "MCP_Agent_Error", |
| str(e), |
| { |
| 'chat_id': st.session_state.get('current_chat_id'), |
| 'duration_seconds': duration, |
| 'user_text': user_text |
| } |
| ) |
| |
| st.error(response) |
| st.code(traceback.format_exc(), language="python") |
| st.stop() |
| finally: |
| |
| task_monitor.stop_monitoring(task_id) |
| |
| if response_dct is not None: |
| |
| if (has_bio_final_answer or has_review_final_report) and response_dct.get('content'): |
| |
| pass |
| else: |
| _append_message_to_session(response_dct) |
| |
| display_tool_executions() |