azlaan428 commited on
Commit
904dfee
·
1 Parent(s): 2f495b0

feat: add Europe PMC source + update STATUS.md

Browse files
Files changed (4) hide show
  1. STATUS.md +3 -3
  2. agent/agent.py +6 -3
  3. retrieval/pubmed.py +42 -0
  4. sessions.json +0 -0
STATUS.md CHANGED
@@ -6,7 +6,7 @@ _Last updated: May 4, 2026_
6
  Multi-agent pipeline in agent/agent.py with 5 stages:
7
 
8
  1. Query Architect: generates 5 MeSH-optimised PubMed queries via Qwen2.5-72B on AMD MI300X
9
- 2. Literature Scout: fetches all queries sequentially with rate limiting
10
  3. PRISMA Filter: automatic inclusion/exclusion screening with one-line reasons, user can override any decision
11
  4. Evidence Synthesiser: structured synthesis with Background, Key Findings, Level of Evidence, Conflicting Evidence, Research Gaps, Clinical Implications
12
  5. Citation Builder: formatted references with PMID, synthesis runs on PRISMA-included papers only
@@ -25,7 +25,7 @@ Additional features completed:
25
  * Query refinement suggestions: 3 AI-generated follow-up research questions based on synthesis gaps
26
  * Session history: queries saved to sessions.json, reloadable from sidebar
27
  * Rate limit retry logic: automatic backoff on API errors
28
- * SSL patch for PubMed Entrez on corporate/university networks
29
  * Signature: Azlaan Mohammad 2026 in footer
30
 
31
  ## UI Updates (May 4 2026)
@@ -48,7 +48,7 @@ Additional features completed:
48
 
49
  * LLM: Qwen2.5-72B-Instruct on AMD MI300X via vLLM 0.17.1
50
  * Agent Framework: LangGraph + LangChain
51
- * Literature Retrieval: BioPython Entrez / PubMed NCBI
52
  * Web Framework: Flask with SSE streaming
53
  * PDF: ReportLab
54
  * Frontend: HTML, CSS, vanilla JS
 
6
  Multi-agent pipeline in agent/agent.py with 5 stages:
7
 
8
  1. Query Architect: generates 5 MeSH-optimised PubMed queries via Qwen2.5-72B on AMD MI300X
9
+ 2. Literature Scout: fetches from PubMed and Europe PMC in parallel, deduplicates by PMID
10
  3. PRISMA Filter: automatic inclusion/exclusion screening with one-line reasons, user can override any decision
11
  4. Evidence Synthesiser: structured synthesis with Background, Key Findings, Level of Evidence, Conflicting Evidence, Research Gaps, Clinical Implications
12
  5. Citation Builder: formatted references with PMID, synthesis runs on PRISMA-included papers only
 
25
  * Query refinement suggestions: 3 AI-generated follow-up research questions based on synthesis gaps
26
  * Session history: queries saved to sessions.json, reloadable from sidebar
27
  * Rate limit retry logic: automatic backoff on API errors
28
+ * SSL patch for PubMed and Europe PMC Entrez on corporate/university networks
29
  * Signature: Azlaan Mohammad 2026 in footer
30
 
31
  ## UI Updates (May 4 2026)
 
48
 
49
  * LLM: Qwen2.5-72B-Instruct on AMD MI300X via vLLM 0.17.1
50
  * Agent Framework: LangGraph + LangChain
51
+ * Literature Retrieval: BioPython Entrez / PubMed NCBI + Europe PMC
52
  * Web Framework: Flask with SSE streaming
53
  * PDF: ReportLab
54
  * Frontend: HTML, CSS, vanilla JS
agent/agent.py CHANGED
@@ -64,13 +64,16 @@ def run_query_architect(user_question):
64
 
65
 
66
  def run_literature_scout(queries):
 
67
  all_papers = {}
68
- def fetch_one(q):
69
- return fetch_pubmed(q, max_results=5)
70
  import time
71
  for q in queries:
72
  time.sleep(0.4)
73
- for r in fetch_one(q):
 
 
 
 
74
  if r["pmid"] not in all_papers:
75
  all_papers[r["pmid"]] = r
76
  return all_papers
 
64
 
65
 
66
  def run_literature_scout(queries):
67
+ from retrieval.pubmed import fetch_europepmc
68
  all_papers = {}
 
 
69
  import time
70
  for q in queries:
71
  time.sleep(0.4)
72
+ for r in fetch_pubmed(q, max_results=5):
73
+ if r["pmid"] not in all_papers:
74
+ all_papers[r["pmid"]] = r
75
+ time.sleep(0.4)
76
+ for r in fetch_europepmc(q, max_results=3):
77
  if r["pmid"] not in all_papers:
78
  all_papers[r["pmid"]] = r
79
  return all_papers
retrieval/pubmed.py CHANGED
@@ -87,3 +87,45 @@ if __name__ == "__main__":
87
  print(f"Journal: {r['journal']}")
88
  print(f"Year: {r['year']}")
89
  print("-" * 60)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  print(f"Journal: {r['journal']}")
88
  print(f"Year: {r['year']}")
89
  print("-" * 60)
90
+
91
+ def fetch_europepmc(query: str, max_results: int = 5) -> list:
92
+ import ssl, urllib.request, urllib.parse, json
93
+ ctx = ssl.create_default_context()
94
+ ctx.check_hostname = False
95
+ ctx.verify_mode = ssl.CERT_NONE
96
+ params = urllib.parse.urlencode({
97
+ "query": query,
98
+ "resultType": "core",
99
+ "pageSize": max_results,
100
+ "format": "json",
101
+ "sort": "CITED desc"
102
+ })
103
+ url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search?" + params
104
+ try:
105
+ with urllib.request.urlopen(url, context=ctx, timeout=20) as r:
106
+ data = json.loads(r.read())
107
+ results = []
108
+ for p in data.get("resultList", {}).get("result", []):
109
+ pmid = p.get("pmid", p.get("id", ""))
110
+ if not pmid:
111
+ continue
112
+ authors_list = []
113
+ for a in p.get("authorList", {}).get("author", []):
114
+ name = a.get("fullName", "")
115
+ if name:
116
+ authors_list.append(name)
117
+ authors = ", ".join(authors_list[:3])
118
+ if len(authors_list) > 3:
119
+ authors += " et al."
120
+ results.append({
121
+ "pmid": str(pmid),
122
+ "title": p.get("title", "Title unavailable").rstrip("."),
123
+ "authors": authors or "Authors unavailable",
124
+ "journal": p.get("journalTitle", "Journal unavailable"),
125
+ "year": str(p.get("pubYear", "n.d.")),
126
+ "abstract": p.get("abstractText", "Abstract not available")
127
+ })
128
+ return results
129
+ except Exception as e:
130
+ print(f"[EuropePMC] fetch failed: {e}")
131
+ return []
sessions.json CHANGED
The diff for this file is too large to render. See raw diff