azlaan428 commited on
Commit
bcceea4
·
1 Parent(s): fe7e528

feat: multi-agent ARIA pipeline with Groq

Browse files
Files changed (2) hide show
  1. STATUS.md +32 -0
  2. agent/agent.py +126 -30
STATUS.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ARIA Project Status
2
+
3
+ ## What Was Built
4
+ - Multi-agent pipeline in agent/agent.py with 4 stages:
5
+ 1. Query Architect: generates 5 MeSH-optimised PubMed queries via Groq
6
+ 2. Literature Scout: fetches all queries in parallel (ThreadPoolExecutor)
7
+ 3. Evidence Synthesiser: structured synthesis with Background, Key Findings, Level of Evidence, Conflicting Evidence, Research Gaps, Clinical Implications
8
+ 4. Citation Builder: formatted references with PMID
9
+ - Groq API replacing Ollama (llama-3.1-8b-instant) - zero CPU load
10
+ - Flask backend (app.py) with /query endpoint
11
+ - HTML/CSS/JS frontend with dark theme
12
+
13
+ ## Groq API Key
14
+ Stored in ~/.bashrc as GROQ_API_KEY environment variable
15
+
16
+ ## What Remains
17
+ 1. Update app.py to call run_pipeline() instead of build_agent()
18
+ 2. Update frontend to display structured synthesis sections properly
19
+ 3. PDF export (reportlab library)
20
+ 4. New dashboard UI
21
+ 5. AMD MI300X swap (Mistral 7B) when credits arrive
22
+ 6. Concept document
23
+ 7. Demo video
24
+ 8. LinkedIn post with #AMDDevHackathon
25
+ 9. Submit on lablab.ai before May 10
26
+
27
+ ## Next Session Starting Point
28
+ Run: cd ~/glitch-squad-biomedical-assistant && source venv/bin/activate
29
+ Then tell Claude: "Continue ARIA hackathon project, read STATUS.md"
30
+
31
+ ## GitHub
32
+ github.com/azlaan428/glitch-squad-biomedical-assistant
agent/agent.py CHANGED
@@ -1,45 +1,141 @@
1
- import sys, os
 
2
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
3
 
4
- from langchain_core.tools import Tool
5
- from langchain_ollama import ChatOllama
6
  from langgraph.prebuilt import create_react_agent
7
- from langchain_core.messages import SystemMessage
8
  from retrieval.pubmed import fetch_pubmed
9
 
10
 
11
- def pubmed_tool_fn(query: str) -> str:
 
 
 
 
 
 
 
 
 
 
12
  results = fetch_pubmed(query, max_results=5)
13
  if not results:
14
  return "No abstracts found for this query."
15
- return "\n\n".join([f"[PMID {r['pmid']}]\n{r['abstract']}" for r in results])
16
-
17
- pubmed_tool = Tool(
18
- name="PubMedSearch",
19
- func=pubmed_tool_fn,
20
- description=(
21
- "Searches PubMed for biomedical literature. "
22
- "Input should be a clinical or scientific query string. "
23
- "Returns abstracts relevant to the query."
 
 
 
 
 
24
  )
25
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- SYSTEM_PROMPT = """You are a biomedical research assistant. When given a question:
28
- 1. Use the PubMedSearch tool to retrieve relevant literature
29
- 2. Read the retrieved abstracts carefully
30
- 3. Answer the user's specific question directly based on what the abstracts say
31
- 4. Cite the PMID numbers of the papers you reference
32
- 5. Do not summarise unrelated papers — only answer what was asked"""
33
 
34
  def build_agent():
35
- llm = ChatOllama(model="llama3.2", temperature=0)
36
- return create_react_agent(llm, [pubmed_tool], prompt=SYSTEM_PROMPT)
 
37
 
38
  if __name__ == "__main__":
39
- print("Connecting to Ollama...")
40
- agent = build_agent()
41
- query = "What ML methods are used for epilepsy seizure detection?"
42
- print(f"\nQuery: {query}\n")
43
- result = agent.invoke({"messages": [{"role": "user", "content": query}]})
44
- print("\n=== Final Response ===")
45
- print(result["messages"][-1].content)
 
1
+ import sys, os, re
2
+ from concurrent.futures import ThreadPoolExecutor, as_completed
3
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
4
 
5
+ from langchain_groq import ChatGroq
6
+ from langchain_core.tools import tool
7
  from langgraph.prebuilt import create_react_agent
 
8
  from retrieval.pubmed import fetch_pubmed
9
 
10
 
11
+ def get_llm():
12
+ return ChatGroq(
13
+ model="llama-3.1-8b-instant",
14
+ temperature=0,
15
+ api_key=os.environ.get("GROQ_API_KEY")
16
+ )
17
+
18
+
19
+ @tool
20
+ def PubMedSearch(query: str) -> str:
21
+ """Searches PubMed for biomedical literature abstracts."""
22
  results = fetch_pubmed(query, max_results=5)
23
  if not results:
24
  return "No abstracts found for this query."
25
+ out = []
26
+ for r in results:
27
+ out.append("[PMID " + r["pmid"] + "]\n" + r["abstract"])
28
+ return "\n\n".join(out)
29
+
30
+
31
+ def run_query_architect(user_question):
32
+ llm = get_llm()
33
+ prompt = (
34
+ "You are a biomedical librarian expert in PubMed search strategy.\n"
35
+ "Given this clinical question, generate exactly 5 distinct PubMed search queries using "
36
+ "MeSH terminology and clinical keywords to maximise literature coverage.\n"
37
+ "Return ONLY a numbered list 1-5, one query per line, no explanations.\n\n"
38
+ "Question: " + user_question
39
  )
40
+ response = llm.invoke(prompt)
41
+ raw_lines = response.content.strip().split("\n")
42
+ queries = []
43
+ for line in raw_lines:
44
+ clean = re.sub(r"^[\d]+[\.)\s]+", "", line.strip())
45
+ if clean:
46
+ queries.append(clean)
47
+ return queries[:5]
48
+
49
+
50
+ def run_literature_scout(queries):
51
+ all_papers = {}
52
+ def fetch_one(q):
53
+ return fetch_pubmed(q, max_results=5)
54
+ with ThreadPoolExecutor(max_workers=5) as executor:
55
+ futures = {executor.submit(fetch_one, q): q for q in queries}
56
+ for future in as_completed(futures):
57
+ results = future.result()
58
+ for r in results:
59
+ pmid = r["pmid"]
60
+ if pmid not in all_papers:
61
+ all_papers[pmid] = r
62
+ return all_papers
63
+
64
+
65
+ def run_evidence_synthesiser(user_question, papers):
66
+ llm = get_llm()
67
+ parts = []
68
+ for pmid, p in list(papers.items())[:20]:
69
+ title = p.get("title", "N/A")
70
+ abstract = p["abstract"]
71
+ parts.append("[PMID " + pmid + "]\nTitle: " + title + "\n" + abstract)
72
+ corpus = "\n\n".join(parts)
73
+ prompt = (
74
+ "You are a senior biomedical researcher writing a structured evidence synthesis.\n"
75
+ "Answer the clinical question using ONLY this structure:\n\n"
76
+ "## Background\n"
77
+ "Brief context (2-3 sentences).\n\n"
78
+ "## Key Findings\n"
79
+ "Most important findings with PMID citations inline.\n\n"
80
+ "## Level of Evidence\n"
81
+ "Rate: Strong / Moderate / Preliminary. Justify briefly.\n\n"
82
+ "## Conflicting Evidence\n"
83
+ "Any contradictions across studies.\n\n"
84
+ "## Research Gaps\n"
85
+ "What the literature does not answer.\n\n"
86
+ "## Clinical Implications\n"
87
+ "What this means for practice or future research.\n\n"
88
+ "Clinical Question: " + user_question + "\n\n"
89
+ "Retrieved Literature:\n" + corpus + "\n\n"
90
+ "Be precise and cite PMIDs throughout."
91
+ )
92
+ response = llm.invoke(prompt)
93
+ return response.content
94
+
95
+
96
+ def run_citation_builder(papers):
97
+ result_lines = []
98
+ for i, (pmid, p) in enumerate(papers.items(), 1):
99
+ title = p.get("title", "Title unavailable")
100
+ authors = p.get("authors", "Authors unavailable")
101
+ journal = p.get("journal", "Journal unavailable")
102
+ year = p.get("year", "n.d.")
103
+ result_lines.append(
104
+ str(i) + ". " + authors + " (" + year + "). " + title + ". " + journal + ". PMID: " + pmid
105
+ )
106
+ return "\n".join(result_lines)
107
+
108
+
109
+ def run_pipeline(user_question):
110
+ print("[1/4] Query Architect: generating search queries...")
111
+ queries = run_query_architect(user_question)
112
+ print(" Generated " + str(len(queries)) + " queries")
113
+ print("[2/4] Literature Scout: fetching PubMed in parallel...")
114
+ papers = run_literature_scout(queries)
115
+ print(" Retrieved " + str(len(papers)) + " unique papers")
116
+ print("[3/4] Evidence Synthesiser: building structured synthesis...")
117
+ synthesis = run_evidence_synthesiser(user_question, papers)
118
+ print("[4/4] Citation Builder: formatting references...")
119
+ citations = run_citation_builder(papers)
120
+ return {
121
+ "question": user_question,
122
+ "queries": queries,
123
+ "paper_count": len(papers),
124
+ "synthesis": synthesis,
125
+ "citations": citations,
126
+ "papers": papers
127
+ }
128
 
 
 
 
 
 
 
129
 
130
  def build_agent():
131
+ llm = get_llm()
132
+ return create_react_agent(llm, [PubMedSearch])
133
+
134
 
135
  if __name__ == "__main__":
136
+ question = "What are the most effective ML methods for epilepsy seizure detection from EEG signals?"
137
+ result = run_pipeline(question)
138
+ print("\n=== SYNTHESIS ===")
139
+ print(result["synthesis"])
140
+ print("\n=== REFERENCES ===")
141
+ print(result["citations"])