Upload app.py with huggingface_hub
Browse files
app.py
ADDED
|
@@ -0,0 +1,904 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
PubMed Top Journals Student App
|
| 4 |
+
|
| 5 |
+
A beginner-friendly Gradio application that searches PubMed and filters results
|
| 6 |
+
to show only articles from high-impact journals based on Journal Impact Factor data.
|
| 7 |
+
|
| 8 |
+
Author: AI Assistant
|
| 9 |
+
Version: 1.0
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import os
|
| 13 |
+
import json
|
| 14 |
+
import time
|
| 15 |
+
import requests
|
| 16 |
+
import pandas as pd
|
| 17 |
+
import gradio as gr
|
| 18 |
+
from typing import Dict, List, Optional, Tuple
|
| 19 |
+
from lxml import etree
|
| 20 |
+
from dotenv import load_dotenv
|
| 21 |
+
|
| 22 |
+
# Load environment variables
|
| 23 |
+
load_dotenv()
|
| 24 |
+
|
| 25 |
+
class PubMedSearcher:
|
| 26 |
+
"""Handles PubMed API interactions and journal filtering."""
|
| 27 |
+
|
| 28 |
+
def __init__(self):
|
| 29 |
+
self.base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
|
| 30 |
+
self.tool_name = os.getenv('NCBI_TOOL_NAME', 'pubmed-topjournals-student-app')
|
| 31 |
+
self.email = os.getenv('NCBI_CONTACT_EMAIL', 'student@example.com')
|
| 32 |
+
self.api_key = os.getenv('NCBI_API_KEY', '')
|
| 33 |
+
|
| 34 |
+
# Load journal data
|
| 35 |
+
self.journal_data = self._load_journal_data()
|
| 36 |
+
self.journal_lookup = self._build_journal_lookup()
|
| 37 |
+
|
| 38 |
+
print(f"Loaded {len(self.journal_data)} journals from database")
|
| 39 |
+
|
| 40 |
+
def _load_journal_data(self) -> List[Dict]:
|
| 41 |
+
"""Load journal impact factor data from JSON file."""
|
| 42 |
+
try:
|
| 43 |
+
with open('journal_impact_factors/top_journals.json', 'r', encoding='utf-8') as f:
|
| 44 |
+
return json.load(f)
|
| 45 |
+
except FileNotFoundError:
|
| 46 |
+
print("Warning: journal_impact_factors/top_journals.json not found")
|
| 47 |
+
return []
|
| 48 |
+
except Exception as e:
|
| 49 |
+
print(f"Error loading journal data: {e}")
|
| 50 |
+
return []
|
| 51 |
+
|
| 52 |
+
def _build_journal_lookup(self) -> Dict[str, Dict]:
|
| 53 |
+
"""Build a normalized lookup dictionary for journal matching."""
|
| 54 |
+
lookup = {}
|
| 55 |
+
|
| 56 |
+
for journal in self.journal_data:
|
| 57 |
+
# Normalize journal name and aliases
|
| 58 |
+
names_to_add = [journal['name']]
|
| 59 |
+
if journal.get('aliases'):
|
| 60 |
+
names_to_add.extend(journal['aliases'])
|
| 61 |
+
|
| 62 |
+
for name in names_to_add:
|
| 63 |
+
normalized = self._normalize_journal_name(name)
|
| 64 |
+
if normalized:
|
| 65 |
+
lookup[normalized] = {
|
| 66 |
+
'quartile': journal['quartile'],
|
| 67 |
+
'jif': journal['jif'],
|
| 68 |
+
'category': journal.get('category', 'Unknown'),
|
| 69 |
+
'canonical_name': journal['name']
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
return lookup
|
| 73 |
+
|
| 74 |
+
def _normalize_journal_name(self, name: str) -> str:
|
| 75 |
+
"""Normalize journal name for matching."""
|
| 76 |
+
if not name:
|
| 77 |
+
return ""
|
| 78 |
+
|
| 79 |
+
# Convert to lowercase, strip whitespace, collapse spaces, remove trailing periods
|
| 80 |
+
normalized = ' '.join(name.lower().strip().split())
|
| 81 |
+
normalized = normalized.rstrip('.')
|
| 82 |
+
|
| 83 |
+
return normalized
|
| 84 |
+
|
| 85 |
+
def _get_api_params(self) -> Dict[str, str]:
|
| 86 |
+
"""Get common API parameters."""
|
| 87 |
+
params = {
|
| 88 |
+
'tool': self.tool_name,
|
| 89 |
+
'email': self.email
|
| 90 |
+
}
|
| 91 |
+
if self.api_key:
|
| 92 |
+
params['api_key'] = self.api_key
|
| 93 |
+
return params
|
| 94 |
+
|
| 95 |
+
def _make_api_request(self, url: str, params: Dict) -> Optional[Dict]:
|
| 96 |
+
"""Make API request with error handling and retry logic."""
|
| 97 |
+
try:
|
| 98 |
+
response = requests.get(url, params=params, timeout=30)
|
| 99 |
+
|
| 100 |
+
if response.status_code == 200:
|
| 101 |
+
return response.json()
|
| 102 |
+
elif response.status_code >= 500:
|
| 103 |
+
# Server error - retry once
|
| 104 |
+
print(f"Server error {response.status_code}, retrying...")
|
| 105 |
+
time.sleep(1)
|
| 106 |
+
response = requests.get(url, params=params, timeout=30)
|
| 107 |
+
if response.status_code == 200:
|
| 108 |
+
return response.json()
|
| 109 |
+
|
| 110 |
+
print(f"API request failed with status {response.status_code}")
|
| 111 |
+
return None
|
| 112 |
+
|
| 113 |
+
except requests.exceptions.RequestException as e:
|
| 114 |
+
print(f"API request error: {e}")
|
| 115 |
+
return None
|
| 116 |
+
|
| 117 |
+
def _build_search_term(self, query: str, article_type: str, humans_only: bool, open_access: bool) -> str:
|
| 118 |
+
"""Build PubMed search term with filters."""
|
| 119 |
+
search_term = query
|
| 120 |
+
|
| 121 |
+
# Add article type filter
|
| 122 |
+
if article_type:
|
| 123 |
+
type_mapping = {
|
| 124 |
+
"RCT": "Randomized Controlled Trial[Publication Type]",
|
| 125 |
+
"Randomized Controlled Trial": "Randomized Controlled Trial[Publication Type]",
|
| 126 |
+
"Meta-Analysis": "Meta-Analysis[Publication Type]",
|
| 127 |
+
"Systematic Review": "Systematic Review[Publication Type]",
|
| 128 |
+
"Clinical Trial": "Clinical Trial[Publication Type]",
|
| 129 |
+
"Review": "Review[Publication Type]",
|
| 130 |
+
"Research Article": "Journal Article[Publication Type]"
|
| 131 |
+
}
|
| 132 |
+
if article_type in type_mapping:
|
| 133 |
+
search_term += f" AND {type_mapping[article_type]}"
|
| 134 |
+
|
| 135 |
+
# Add human studies filter
|
| 136 |
+
if humans_only:
|
| 137 |
+
search_term += " AND humans[MeSH Terms]"
|
| 138 |
+
|
| 139 |
+
# Add open access filter
|
| 140 |
+
if open_access:
|
| 141 |
+
search_term += " AND free full text[sb]"
|
| 142 |
+
|
| 143 |
+
return search_term
|
| 144 |
+
|
| 145 |
+
def search_pubmed(self, query: str, article_type: str, humans_only: bool, open_access: bool,
|
| 146 |
+
years_back: int, max_results: int, show_all_journals: bool) -> Tuple[str, List[Dict]]:
|
| 147 |
+
"""Search PubMed and return formatted results."""
|
| 148 |
+
|
| 149 |
+
if not query.strip():
|
| 150 |
+
return "Please enter a search query.", []
|
| 151 |
+
|
| 152 |
+
# Cap max results
|
| 153 |
+
max_results = min(max_results, 100)
|
| 154 |
+
|
| 155 |
+
# Build search term
|
| 156 |
+
search_term = self._build_search_term(query, article_type, humans_only, open_access)
|
| 157 |
+
|
| 158 |
+
# Calculate date range
|
| 159 |
+
from datetime import datetime, timedelta
|
| 160 |
+
end_date = datetime.now()
|
| 161 |
+
start_date = end_date - timedelta(days=years_back * 365)
|
| 162 |
+
|
| 163 |
+
print(f"Searching PubMed: {search_term}")
|
| 164 |
+
print(f"Date range: {start_date.strftime('%Y/%m/%d')} to {end_date.strftime('%Y/%m/%d')}")
|
| 165 |
+
|
| 166 |
+
# Step 1: E-Search to get PMIDs
|
| 167 |
+
search_params = {
|
| 168 |
+
'db': 'pubmed',
|
| 169 |
+
'term': search_term,
|
| 170 |
+
'retmode': 'json',
|
| 171 |
+
'retmax': max_results,
|
| 172 |
+
'sort': 'pub+date',
|
| 173 |
+
'mindate': start_date.strftime('%Y/%m/%d'),
|
| 174 |
+
'maxdate': end_date.strftime('%Y/%m/%d'),
|
| 175 |
+
**self._get_api_params()
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
search_response = self._make_api_request(
|
| 179 |
+
f"{self.base_url}esearch.fcgi", search_params
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
if not search_response:
|
| 183 |
+
return "β Error: Could not connect to PubMed. Please check your internet connection and try again.", []
|
| 184 |
+
|
| 185 |
+
# Check for errors in response
|
| 186 |
+
if 'esearchresult' not in search_response:
|
| 187 |
+
return "β Error: Invalid response from PubMed. Please try again.", []
|
| 188 |
+
|
| 189 |
+
esearch_result = search_response['esearchresult']
|
| 190 |
+
|
| 191 |
+
if 'errorlist' in esearch_result and esearch_result['errorlist']:
|
| 192 |
+
error_msg = esearch_result['errorlist'].get('errormessage', ['Unknown error'])
|
| 193 |
+
return f"β PubMed error: {error_msg[0]}", []
|
| 194 |
+
|
| 195 |
+
pmids = esearch_result.get('idlist', [])
|
| 196 |
+
total_found = int(esearch_result.get('count', 0))
|
| 197 |
+
|
| 198 |
+
if not pmids:
|
| 199 |
+
return f"π No articles found for '{query}'. Try:\nβ’ Broader search terms\nβ’ Increase 'Years Back' range\nβ’ Turn on 'Show All Journals'", []
|
| 200 |
+
|
| 201 |
+
print(f"Found {total_found} articles, processing {len(pmids)} PMIDs")
|
| 202 |
+
|
| 203 |
+
# Step 2: E-Summary to get metadata
|
| 204 |
+
articles = []
|
| 205 |
+
batch_size = 200
|
| 206 |
+
|
| 207 |
+
for i in range(0, len(pmids), batch_size):
|
| 208 |
+
batch_pmids = pmids[i:i + batch_size]
|
| 209 |
+
|
| 210 |
+
summary_params = {
|
| 211 |
+
'db': 'pubmed',
|
| 212 |
+
'id': ','.join(batch_pmids),
|
| 213 |
+
'retmode': 'json',
|
| 214 |
+
**self._get_api_params()
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
summary_response = self._make_api_request(
|
| 218 |
+
f"{self.base_url}esummary.fcgi", summary_params
|
| 219 |
+
)
|
| 220 |
+
|
| 221 |
+
if summary_response and 'result' in summary_response:
|
| 222 |
+
for pmid in batch_pmids:
|
| 223 |
+
if pmid in summary_response['result']:
|
| 224 |
+
article_data = summary_response['result'][pmid]
|
| 225 |
+
articles.append(self._process_article_metadata(article_data, pmid))
|
| 226 |
+
|
| 227 |
+
# Be polite to the API
|
| 228 |
+
time.sleep(0.1)
|
| 229 |
+
|
| 230 |
+
# Step 3: E-Fetch to get abstracts
|
| 231 |
+
articles_with_abstracts = []
|
| 232 |
+
abstract_batch_size = 50
|
| 233 |
+
|
| 234 |
+
for i in range(0, len(articles), abstract_batch_size):
|
| 235 |
+
batch_articles = articles[i:i + abstract_batch_size]
|
| 236 |
+
batch_pmids = [article['pmid'] for article in batch_articles]
|
| 237 |
+
|
| 238 |
+
fetch_params = {
|
| 239 |
+
'db': 'pubmed',
|
| 240 |
+
'id': ','.join(batch_pmids),
|
| 241 |
+
'retmode': 'xml',
|
| 242 |
+
**self._get_api_params()
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
fetch_response = requests.get(
|
| 246 |
+
f"{self.base_url}efetch.fcgi", params=fetch_params, timeout=30
|
| 247 |
+
)
|
| 248 |
+
|
| 249 |
+
if fetch_response.status_code == 200:
|
| 250 |
+
abstracts = self._parse_abstracts(fetch_response.text)
|
| 251 |
+
|
| 252 |
+
for article in batch_articles:
|
| 253 |
+
article['abstract'] = abstracts.get(article['pmid'], 'No abstract available')
|
| 254 |
+
articles_with_abstracts.append(article)
|
| 255 |
+
else:
|
| 256 |
+
# Add articles without abstracts
|
| 257 |
+
for article in batch_articles:
|
| 258 |
+
article['abstract'] = 'Abstract temporarily unavailable'
|
| 259 |
+
articles_with_abstracts.append(article)
|
| 260 |
+
|
| 261 |
+
# Be polite to the API
|
| 262 |
+
time.sleep(0.1)
|
| 263 |
+
|
| 264 |
+
# Filter by journals if not showing all
|
| 265 |
+
if not show_all_journals:
|
| 266 |
+
filtered_articles = []
|
| 267 |
+
for article in articles_with_abstracts:
|
| 268 |
+
if self._is_top_journal(article['journal']):
|
| 269 |
+
filtered_articles.append(article)
|
| 270 |
+
else:
|
| 271 |
+
filtered_articles = articles_with_abstracts
|
| 272 |
+
|
| 273 |
+
# Build status message
|
| 274 |
+
status_parts = [f"β
{total_found} found"]
|
| 275 |
+
if len(articles) < total_found:
|
| 276 |
+
status_parts.append(f"β {len(articles)} after date/filter limits")
|
| 277 |
+
|
| 278 |
+
if not show_all_journals:
|
| 279 |
+
status_parts.append(f"β {len(filtered_articles)} kept (Top journals)")
|
| 280 |
+
else:
|
| 281 |
+
status_parts.append(f"β {len(filtered_articles)} kept (All journals)")
|
| 282 |
+
|
| 283 |
+
status_message = " ".join(status_parts)
|
| 284 |
+
|
| 285 |
+
return status_message, filtered_articles
|
| 286 |
+
|
| 287 |
+
def _process_article_metadata(self, article_data: Dict, pmid: str) -> Dict:
|
| 288 |
+
"""Process article metadata from E-Summary response."""
|
| 289 |
+
# Extract title
|
| 290 |
+
title = article_data.get('title', 'No title available')
|
| 291 |
+
|
| 292 |
+
# Extract journal
|
| 293 |
+
journal = article_data.get('fulljournalname', article_data.get('source', 'Unknown Journal'))
|
| 294 |
+
|
| 295 |
+
# Extract publication date
|
| 296 |
+
pubdate = article_data.get('pubdate', '')
|
| 297 |
+
year = self._extract_year(pubdate)
|
| 298 |
+
|
| 299 |
+
# Extract article type
|
| 300 |
+
article_type = article_data.get('pubtype', ['Unknown'])
|
| 301 |
+
if isinstance(article_type, list) and article_type:
|
| 302 |
+
article_type = article_type[0]
|
| 303 |
+
|
| 304 |
+
# Check if it's a top journal and get metadata
|
| 305 |
+
journal_metadata = self._get_journal_metadata(journal)
|
| 306 |
+
|
| 307 |
+
return {
|
| 308 |
+
'pmid': pmid,
|
| 309 |
+
'title': title,
|
| 310 |
+
'journal': journal,
|
| 311 |
+
'year': year,
|
| 312 |
+
'type': article_type,
|
| 313 |
+
'pubmed_url': f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/",
|
| 314 |
+
'jif': journal_metadata.get('jif', None),
|
| 315 |
+
'quartile': journal_metadata.get('quartile', None),
|
| 316 |
+
'category': journal_metadata.get('category', None)
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
def _extract_year(self, pubdate: str) -> str:
|
| 320 |
+
"""Extract year from publication date string."""
|
| 321 |
+
if not pubdate:
|
| 322 |
+
return "Unknown"
|
| 323 |
+
|
| 324 |
+
# Try to extract year from various date formats
|
| 325 |
+
import re
|
| 326 |
+
year_match = re.search(r'\b(19|20)\d{2}\b', pubdate)
|
| 327 |
+
if year_match:
|
| 328 |
+
return year_match.group()
|
| 329 |
+
|
| 330 |
+
return "Unknown"
|
| 331 |
+
|
| 332 |
+
def _parse_abstracts(self, xml_content: str) -> Dict[str, str]:
|
| 333 |
+
"""Parse abstracts from E-Fetch XML response."""
|
| 334 |
+
abstracts = {}
|
| 335 |
+
|
| 336 |
+
try:
|
| 337 |
+
root = etree.fromstring(xml_content)
|
| 338 |
+
|
| 339 |
+
for article in root.xpath('//PubmedArticle'):
|
| 340 |
+
pmid = article.find('.//PMID')
|
| 341 |
+
if pmid is not None:
|
| 342 |
+
pmid_text = pmid.text
|
| 343 |
+
|
| 344 |
+
abstract_parts = []
|
| 345 |
+
for abstract_text in article.xpath('.//AbstractText'):
|
| 346 |
+
label = abstract_text.get('Label', '')
|
| 347 |
+
text = abstract_text.text or ''
|
| 348 |
+
|
| 349 |
+
if text.strip():
|
| 350 |
+
if label:
|
| 351 |
+
abstract_parts.append(f"{label}: {text}")
|
| 352 |
+
else:
|
| 353 |
+
abstract_parts.append(text)
|
| 354 |
+
|
| 355 |
+
if abstract_parts:
|
| 356 |
+
abstracts[pmid_text] = '\n\n'.join(abstract_parts)
|
| 357 |
+
|
| 358 |
+
except Exception as e:
|
| 359 |
+
print(f"Error parsing abstracts: {e}")
|
| 360 |
+
|
| 361 |
+
return abstracts
|
| 362 |
+
|
| 363 |
+
def _is_top_journal(self, journal_name: str) -> bool:
|
| 364 |
+
"""Check if journal is in top journals database."""
|
| 365 |
+
normalized = self._normalize_journal_name(journal_name)
|
| 366 |
+
return normalized in self.journal_lookup
|
| 367 |
+
|
| 368 |
+
def _get_journal_metadata(self, journal_name: str) -> Dict:
|
| 369 |
+
"""Get journal metadata (JIF, quartile, category) if available."""
|
| 370 |
+
normalized = self._normalize_journal_name(journal_name)
|
| 371 |
+
return self.journal_lookup.get(normalized, {})
|
| 372 |
+
|
| 373 |
+
|
| 374 |
+
def create_article_card(article: Dict) -> str:
|
| 375 |
+
"""Create HTML card for article display."""
|
| 376 |
+
title = article['title']
|
| 377 |
+
journal = article['journal']
|
| 378 |
+
year = article['year']
|
| 379 |
+
article_type = article['type']
|
| 380 |
+
abstract = article['abstract']
|
| 381 |
+
pubmed_url = article['pubmed_url']
|
| 382 |
+
|
| 383 |
+
# Create badges for JIF and quartile
|
| 384 |
+
badges_html = ""
|
| 385 |
+
if article['jif'] is not None:
|
| 386 |
+
badges_html += f'<span class="badge jif-badge">JIF {article["jif"]}</span> '
|
| 387 |
+
if article['quartile']:
|
| 388 |
+
badges_html += f'<span class="badge quartile-badge">{article["quartile"]}</span> '
|
| 389 |
+
|
| 390 |
+
# Truncate abstract for display
|
| 391 |
+
abstract_preview = abstract[:300] + "..." if len(abstract) > 300 else abstract
|
| 392 |
+
|
| 393 |
+
card_html = f"""
|
| 394 |
+
<div class="article-card">
|
| 395 |
+
<h3><a href="{pubmed_url}" target="_blank" class="article-title">{title}</a></h3>
|
| 396 |
+
<div class="article-meta">
|
| 397 |
+
<strong>{journal}</strong> β’ {year} β’ {article_type}
|
| 398 |
+
{badges_html}
|
| 399 |
+
</div>
|
| 400 |
+
<details class="abstract-details">
|
| 401 |
+
<summary class="abstract-summary">Abstract</summary>
|
| 402 |
+
<div class="abstract-content">{abstract_preview}</div>
|
| 403 |
+
</details>
|
| 404 |
+
</div>
|
| 405 |
+
"""
|
| 406 |
+
|
| 407 |
+
return card_html
|
| 408 |
+
|
| 409 |
+
|
| 410 |
+
def sort_articles(articles: List[Dict], sort_option: str) -> List[Dict]:
|
| 411 |
+
"""Sort articles based on the selected option."""
|
| 412 |
+
|
| 413 |
+
if sort_option == "Default (by relevance)":
|
| 414 |
+
# Keep original order (already sorted by PubMed relevance)
|
| 415 |
+
return articles
|
| 416 |
+
|
| 417 |
+
elif sort_option == "JIF (High to Low)":
|
| 418 |
+
# Sort by JIF descending, with articles without JIF at the end
|
| 419 |
+
return sorted(articles, key=lambda x: x.get('jif', 0) or 0, reverse=True)
|
| 420 |
+
|
| 421 |
+
elif sort_option == "JIF (Low to High)":
|
| 422 |
+
# Sort by JIF ascending, with articles without JIF at the beginning
|
| 423 |
+
return sorted(articles, key=lambda x: x.get('jif', 0) or 0, reverse=False)
|
| 424 |
+
|
| 425 |
+
elif sort_option == "Quartile (Q1 to Q4)":
|
| 426 |
+
# Sort by quartile: Q1, Q2, Q3, Q4, then articles without quartile
|
| 427 |
+
quartile_order = {'Q1': 1, 'Q2': 2, 'Q3': 3, 'Q4': 4}
|
| 428 |
+
return sorted(articles, key=lambda x: quartile_order.get(x.get('quartile'), 999))
|
| 429 |
+
|
| 430 |
+
elif sort_option == "Quartile (Q4 to Q1)":
|
| 431 |
+
# Sort by quartile: Q4, Q3, Q2, Q1, then articles without quartile
|
| 432 |
+
quartile_order = {'Q4': 1, 'Q3': 2, 'Q2': 3, 'Q1': 4}
|
| 433 |
+
return sorted(articles, key=lambda x: quartile_order.get(x.get('quartile'), 999))
|
| 434 |
+
|
| 435 |
+
else:
|
| 436 |
+
# Default fallback
|
| 437 |
+
return articles
|
| 438 |
+
|
| 439 |
+
|
| 440 |
+
def search_interface(query: str, article_type: str, humans_only: bool, open_access: bool,
|
| 441 |
+
years_back: int, max_results: int, show_all_journals: bool, sort_by: str) -> Tuple[str, str]:
|
| 442 |
+
"""Main search interface function."""
|
| 443 |
+
|
| 444 |
+
# Show loading state
|
| 445 |
+
loading_html = """
|
| 446 |
+
<div style="text-align: center; padding: 3rem; color: #667eea;">
|
| 447 |
+
<div style="font-size: 1.2rem; margin-bottom: 1rem;">π Searching PubMed...</div>
|
| 448 |
+
<div style="display: inline-block; width: 40px; height: 40px; border: 4px solid #e0e6ff; border-top: 4px solid #667eea; border-radius: 50%; animation: spin 1s linear infinite;"></div>
|
| 449 |
+
<div style="margin-top: 1rem; font-size: 0.9rem; opacity: 0.8;">Please wait while we fetch your results</div>
|
| 450 |
+
</div>
|
| 451 |
+
<style>
|
| 452 |
+
@keyframes spin {
|
| 453 |
+
0% { transform: rotate(0deg); }
|
| 454 |
+
100% { transform: rotate(360deg); }
|
| 455 |
+
}
|
| 456 |
+
</style>
|
| 457 |
+
"""
|
| 458 |
+
|
| 459 |
+
# Initialize searcher
|
| 460 |
+
searcher = PubMedSearcher()
|
| 461 |
+
|
| 462 |
+
# Perform search
|
| 463 |
+
status_message, articles = searcher.search_pubmed(
|
| 464 |
+
query, article_type, humans_only, open_access, years_back, max_results, show_all_journals
|
| 465 |
+
)
|
| 466 |
+
|
| 467 |
+
# Create HTML output
|
| 468 |
+
if not articles:
|
| 469 |
+
return status_message, ""
|
| 470 |
+
|
| 471 |
+
# Sort articles based on user selection
|
| 472 |
+
articles = sort_articles(articles, sort_by)
|
| 473 |
+
|
| 474 |
+
# Add CSS styling
|
| 475 |
+
css_style = """
|
| 476 |
+
<style>
|
| 477 |
+
.article-card {
|
| 478 |
+
border: none;
|
| 479 |
+
border-radius: 20px;
|
| 480 |
+
padding: 24px;
|
| 481 |
+
margin: 20px 0;
|
| 482 |
+
background: linear-gradient(135deg, #ffffff 0%, #f8f9ff 100%);
|
| 483 |
+
box-shadow: 0 8px 32px rgba(0,0,0,0.1);
|
| 484 |
+
transition: all 0.3s ease;
|
| 485 |
+
border-left: 5px solid #667eea;
|
| 486 |
+
}
|
| 487 |
+
|
| 488 |
+
.article-card:hover {
|
| 489 |
+
transform: translateY(-4px);
|
| 490 |
+
box-shadow: 0 12px 48px rgba(0,0,0,0.15);
|
| 491 |
+
}
|
| 492 |
+
|
| 493 |
+
.article-title {
|
| 494 |
+
color: #1976d2;
|
| 495 |
+
text-decoration: none;
|
| 496 |
+
font-size: 1.3em;
|
| 497 |
+
line-height: 1.4;
|
| 498 |
+
font-weight: 700;
|
| 499 |
+
margin-bottom: 12px;
|
| 500 |
+
display: block;
|
| 501 |
+
transition: all 0.3s ease;
|
| 502 |
+
}
|
| 503 |
+
|
| 504 |
+
.article-title:hover {
|
| 505 |
+
color: #1565c0;
|
| 506 |
+
text-decoration: none;
|
| 507 |
+
transform: translateX(4px);
|
| 508 |
+
}
|
| 509 |
+
|
| 510 |
+
.article-meta {
|
| 511 |
+
color: #424242;
|
| 512 |
+
margin: 12px 0;
|
| 513 |
+
font-size: 1em;
|
| 514 |
+
font-weight: 600;
|
| 515 |
+
background: #f0f4ff;
|
| 516 |
+
padding: 8px 16px;
|
| 517 |
+
border-radius: 12px;
|
| 518 |
+
border: 1px solid #e0e6ff;
|
| 519 |
+
}
|
| 520 |
+
|
| 521 |
+
.badge {
|
| 522 |
+
display: inline-block;
|
| 523 |
+
padding: 6px 14px;
|
| 524 |
+
border-radius: 20px;
|
| 525 |
+
font-size: 0.85em;
|
| 526 |
+
font-weight: 700;
|
| 527 |
+
margin-left: 10px;
|
| 528 |
+
text-shadow: none;
|
| 529 |
+
transition: all 0.3s ease;
|
| 530 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.2);
|
| 531 |
+
}
|
| 532 |
+
|
| 533 |
+
.badge:hover {
|
| 534 |
+
transform: scale(1.05);
|
| 535 |
+
}
|
| 536 |
+
|
| 537 |
+
.jif-badge {
|
| 538 |
+
background: linear-gradient(135deg, #4caf50 0%, #45a049 100%);
|
| 539 |
+
color: white;
|
| 540 |
+
}
|
| 541 |
+
|
| 542 |
+
.quartile-badge {
|
| 543 |
+
background: linear-gradient(135deg, #2196f3 0%, #1976d2 100%);
|
| 544 |
+
color: white;
|
| 545 |
+
}
|
| 546 |
+
|
| 547 |
+
.abstract-details {
|
| 548 |
+
margin-top: 16px;
|
| 549 |
+
background: white;
|
| 550 |
+
border-radius: 12px;
|
| 551 |
+
padding: 16px;
|
| 552 |
+
border: 1px solid #e0e6ff;
|
| 553 |
+
}
|
| 554 |
+
|
| 555 |
+
.abstract-summary {
|
| 556 |
+
cursor: pointer;
|
| 557 |
+
color: #1976d2;
|
| 558 |
+
font-weight: 700;
|
| 559 |
+
font-size: 1.1em;
|
| 560 |
+
padding: 8px 16px;
|
| 561 |
+
background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
|
| 562 |
+
border-radius: 8px;
|
| 563 |
+
border: 1px solid #90caf9;
|
| 564 |
+
transition: all 0.3s ease;
|
| 565 |
+
display: inline-block;
|
| 566 |
+
margin-bottom: 8px;
|
| 567 |
+
}
|
| 568 |
+
|
| 569 |
+
.abstract-summary:hover {
|
| 570 |
+
color: #1565c0;
|
| 571 |
+
background: linear-gradient(135deg, #bbdefb 0%, #90caf9 100%);
|
| 572 |
+
transform: translateY(-1px);
|
| 573 |
+
}
|
| 574 |
+
|
| 575 |
+
.abstract-content {
|
| 576 |
+
margin-top: 12px;
|
| 577 |
+
line-height: 1.7;
|
| 578 |
+
color: #212121;
|
| 579 |
+
font-size: 0.95em;
|
| 580 |
+
background: #fafafa;
|
| 581 |
+
padding: 16px;
|
| 582 |
+
border-radius: 8px;
|
| 583 |
+
border-left: 4px solid #667eea;
|
| 584 |
+
}
|
| 585 |
+
|
| 586 |
+
.status-message {
|
| 587 |
+
background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%);
|
| 588 |
+
border: 2px solid #4caf50;
|
| 589 |
+
border-radius: 15px;
|
| 590 |
+
padding: 20px;
|
| 591 |
+
margin: 20px 0;
|
| 592 |
+
font-weight: 700;
|
| 593 |
+
color: black !important;
|
| 594 |
+
font-size: 1.1em;
|
| 595 |
+
text-align: center;
|
| 596 |
+
box-shadow: 0 4px 16px rgba(76, 175, 80, 0.3);
|
| 597 |
+
}
|
| 598 |
+
|
| 599 |
+
.status-message * {
|
| 600 |
+
color: black !important;
|
| 601 |
+
}
|
| 602 |
+
|
| 603 |
+
.status-message span {
|
| 604 |
+
color: black !important;
|
| 605 |
+
}
|
| 606 |
+
|
| 607 |
+
/* Ensure text is visible on all backgrounds */
|
| 608 |
+
body, html {
|
| 609 |
+
color: #212121 !important;
|
| 610 |
+
}
|
| 611 |
+
|
| 612 |
+
/* Make sure article details are clearly visible */
|
| 613 |
+
.article-card * {
|
| 614 |
+
color: #212121 !important;
|
| 615 |
+
}
|
| 616 |
+
|
| 617 |
+
.article-title {
|
| 618 |
+
color: #1976d2 !important;
|
| 619 |
+
}
|
| 620 |
+
|
| 621 |
+
.abstract-summary {
|
| 622 |
+
color: #1976d2 !important;
|
| 623 |
+
}
|
| 624 |
+
|
| 625 |
+
/* Add some animation */
|
| 626 |
+
@keyframes fadeInUp {
|
| 627 |
+
from {
|
| 628 |
+
opacity: 0;
|
| 629 |
+
transform: translateY(30px);
|
| 630 |
+
}
|
| 631 |
+
to {
|
| 632 |
+
opacity: 1;
|
| 633 |
+
transform: translateY(0);
|
| 634 |
+
}
|
| 635 |
+
}
|
| 636 |
+
|
| 637 |
+
.article-card {
|
| 638 |
+
animation: fadeInUp 0.6s ease-out;
|
| 639 |
+
}
|
| 640 |
+
</style>
|
| 641 |
+
"""
|
| 642 |
+
|
| 643 |
+
# Create articles HTML with properly formatted status message
|
| 644 |
+
formatted_status = f"<span style='color: black !important;'>{status_message}</span>"
|
| 645 |
+
articles_html = css_style + "<div class='status-message'>" + formatted_status + "</div>"
|
| 646 |
+
|
| 647 |
+
for article in articles:
|
| 648 |
+
articles_html += create_article_card(article)
|
| 649 |
+
|
| 650 |
+
return status_message, articles_html
|
| 651 |
+
|
| 652 |
+
|
| 653 |
+
def create_gradio_interface():
|
| 654 |
+
"""Create and configure the Gradio interface."""
|
| 655 |
+
|
| 656 |
+
# Custom CSS for enhanced styling
|
| 657 |
+
custom_css = """
|
| 658 |
+
.gradio-container {
|
| 659 |
+
background: #000000;
|
| 660 |
+
min-height: 100vh;
|
| 661 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
| 662 |
+
}
|
| 663 |
+
|
| 664 |
+
.main-header {
|
| 665 |
+
background: #1a1a1a;
|
| 666 |
+
color: white;
|
| 667 |
+
padding: 2rem;
|
| 668 |
+
border-radius: 15px;
|
| 669 |
+
margin-bottom: 2rem;
|
| 670 |
+
box-shadow: 0 8px 32px rgba(255,255,255,0.1);
|
| 671 |
+
text-align: center;
|
| 672 |
+
border: 1px solid #333333;
|
| 673 |
+
}
|
| 674 |
+
|
| 675 |
+
.main-header h1 {
|
| 676 |
+
margin: 0;
|
| 677 |
+
font-size: 2.5rem;
|
| 678 |
+
font-weight: 700;
|
| 679 |
+
text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
|
| 680 |
+
}
|
| 681 |
+
|
| 682 |
+
.main-header p {
|
| 683 |
+
margin: 0.5rem 0 0 0;
|
| 684 |
+
font-size: 1.1rem;
|
| 685 |
+
opacity: 0.9;
|
| 686 |
+
}
|
| 687 |
+
|
| 688 |
+
|
| 689 |
+
.info-panel {
|
| 690 |
+
background: #1a1a1a;
|
| 691 |
+
color: white;
|
| 692 |
+
border-radius: 20px;
|
| 693 |
+
padding: 2rem;
|
| 694 |
+
box-shadow: 0 10px 40px rgba(255,255,255,0.1);
|
| 695 |
+
margin-bottom: 2rem;
|
| 696 |
+
border: 1px solid #333333;
|
| 697 |
+
}
|
| 698 |
+
|
| 699 |
+
.search-button {
|
| 700 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 701 |
+
border: none;
|
| 702 |
+
border-radius: 20px;
|
| 703 |
+
padding: 1.2rem 3rem;
|
| 704 |
+
color: white;
|
| 705 |
+
font-size: 1.3rem;
|
| 706 |
+
font-weight: 700;
|
| 707 |
+
transition: all 0.3s ease;
|
| 708 |
+
box-shadow: 0 6px 20px rgba(102, 126, 234, 0.4);
|
| 709 |
+
text-transform: uppercase;
|
| 710 |
+
letter-spacing: 1px;
|
| 711 |
+
position: relative;
|
| 712 |
+
overflow: hidden;
|
| 713 |
+
width: 100%;
|
| 714 |
+
margin-top: 1rem;
|
| 715 |
+
}
|
| 716 |
+
|
| 717 |
+
.search-button:hover {
|
| 718 |
+
transform: translateY(-3px);
|
| 719 |
+
box-shadow: 0 10px 30px rgba(102, 126, 234, 0.6);
|
| 720 |
+
background: linear-gradient(135deg, #764ba2 0%, #667eea 100%);
|
| 721 |
+
}
|
| 722 |
+
|
| 723 |
+
.search-button:active {
|
| 724 |
+
transform: translateY(-1px);
|
| 725 |
+
}
|
| 726 |
+
|
| 727 |
+
.search-button.loading {
|
| 728 |
+
pointer-events: none;
|
| 729 |
+
opacity: 0.8;
|
| 730 |
+
}
|
| 731 |
+
|
| 732 |
+
.search-button.loading::after {
|
| 733 |
+
content: '';
|
| 734 |
+
position: absolute;
|
| 735 |
+
width: 20px;
|
| 736 |
+
height: 20px;
|
| 737 |
+
top: 50%;
|
| 738 |
+
left: 50%;
|
| 739 |
+
margin-left: -10px;
|
| 740 |
+
margin-top: -10px;
|
| 741 |
+
border: 2px solid #ffffff;
|
| 742 |
+
border-radius: 50%;
|
| 743 |
+
border-top-color: transparent;
|
| 744 |
+
animation: spin 1s linear infinite;
|
| 745 |
+
}
|
| 746 |
+
|
| 747 |
+
@keyframes spin {
|
| 748 |
+
to {
|
| 749 |
+
transform: rotate(360deg);
|
| 750 |
+
}
|
| 751 |
+
}
|
| 752 |
+
|
| 753 |
+
|
| 754 |
+
"""
|
| 755 |
+
|
| 756 |
+
with gr.Blocks(title="PubMed Search Engine", theme=gr.themes.Soft(), css=custom_css) as app:
|
| 757 |
+
|
| 758 |
+
# Main Header
|
| 759 |
+
with gr.Row():
|
| 760 |
+
with gr.Column():
|
| 761 |
+
gr.HTML("""
|
| 762 |
+
<div class="main-header">
|
| 763 |
+
<h1>π¬ PubMed Search Engine</h1>
|
| 764 |
+
<p>Search PubMed and filter results to show only articles from high-impact journals.<br>
|
| 765 |
+
Perfect for students and researchers who want to focus on the most credible research.</p>
|
| 766 |
+
</div>
|
| 767 |
+
""")
|
| 768 |
+
|
| 769 |
+
with gr.Row():
|
| 770 |
+
with gr.Column(scale=3):
|
| 771 |
+
# Search Panel
|
| 772 |
+
with gr.Column():
|
| 773 |
+
query_input = gr.Textbox(
|
| 774 |
+
label="π Search Query",
|
| 775 |
+
placeholder="Enter keywords (e.g., 'GLP-1 obesity meta-analysis')",
|
| 776 |
+
lines=2
|
| 777 |
+
)
|
| 778 |
+
|
| 779 |
+
with gr.Row():
|
| 780 |
+
article_type = gr.Dropdown(
|
| 781 |
+
choices=["", "Research Article", "RCT", "Randomized Controlled Trial", "Meta-Analysis",
|
| 782 |
+
"Systematic Review", "Clinical Trial", "Review"],
|
| 783 |
+
label="π Article Type Filter",
|
| 784 |
+
value=""
|
| 785 |
+
)
|
| 786 |
+
|
| 787 |
+
humans_only = gr.Checkbox(
|
| 788 |
+
label="π₯ Humans Only",
|
| 789 |
+
value=True,
|
| 790 |
+
info="Exclude animal studies"
|
| 791 |
+
)
|
| 792 |
+
|
| 793 |
+
open_access = gr.Checkbox(
|
| 794 |
+
label="π Open Access Only",
|
| 795 |
+
value=False,
|
| 796 |
+
info="Show only freely accessible articles"
|
| 797 |
+
)
|
| 798 |
+
|
| 799 |
+
with gr.Row():
|
| 800 |
+
years_back = gr.Slider(
|
| 801 |
+
minimum=1, maximum=15, value=5, step=1,
|
| 802 |
+
label="π
Years Back",
|
| 803 |
+
info="How many years to search"
|
| 804 |
+
)
|
| 805 |
+
|
| 806 |
+
max_results = gr.Slider(
|
| 807 |
+
minimum=10, maximum=100, value=50, step=10,
|
| 808 |
+
label="π Max Results",
|
| 809 |
+
info="Maximum articles to return"
|
| 810 |
+
)
|
| 811 |
+
|
| 812 |
+
with gr.Row():
|
| 813 |
+
show_all_journals = gr.Checkbox(
|
| 814 |
+
label="π Show All Journals",
|
| 815 |
+
value=False,
|
| 816 |
+
info="Show all journals (not just top journals)"
|
| 817 |
+
)
|
| 818 |
+
|
| 819 |
+
sort_by = gr.Dropdown(
|
| 820 |
+
choices=["Default (by relevance)", "JIF (High to Low)", "JIF (Low to High)", "Quartile (Q1 to Q4)", "Quartile (Q4 to Q1)"],
|
| 821 |
+
label="π Sort Results By",
|
| 822 |
+
value="Default (by relevance)"
|
| 823 |
+
)
|
| 824 |
+
|
| 825 |
+
search_button = gr.Button("π Search PubMed", variant="primary", size="lg", elem_classes="search-button")
|
| 826 |
+
|
| 827 |
+
with gr.Column(scale=1):
|
| 828 |
+
# Info Panel
|
| 829 |
+
with gr.Column():
|
| 830 |
+
gr.HTML("""
|
| 831 |
+
<div class="info-panel">
|
| 832 |
+
<h3 style="margin-top: 0; color: white;">π About Journal Rankings</h3>
|
| 833 |
+
<div style="color: white;">
|
| 834 |
+
<p><strong>Q1 (Quartile 1):</strong> Top 25% of journals</p>
|
| 835 |
+
<p><strong>Q2 (Quartile 2):</strong> 25-50th percentile</p>
|
| 836 |
+
<p><strong>Q3 (Quartile 3):</strong> 50-75th percentile</p>
|
| 837 |
+
<p><strong>Q4 (Quartile 4):</strong> Bottom 25%</p>
|
| 838 |
+
<br>
|
| 839 |
+
<p><strong>Higher JIF = More influential journal</strong></p>
|
| 840 |
+
</div>
|
| 841 |
+
</div>
|
| 842 |
+
""")
|
| 843 |
+
|
| 844 |
+
# Results section
|
| 845 |
+
with gr.Row():
|
| 846 |
+
with gr.Column():
|
| 847 |
+
status_output = gr.Markdown(label="Search Status")
|
| 848 |
+
results_output = gr.HTML(label="Search Results")
|
| 849 |
+
|
| 850 |
+
# Event handlers
|
| 851 |
+
search_button.click(
|
| 852 |
+
fn=search_interface,
|
| 853 |
+
inputs=[query_input, article_type, humans_only, open_access, years_back, max_results, show_all_journals, sort_by],
|
| 854 |
+
outputs=[status_output, results_output]
|
| 855 |
+
)
|
| 856 |
+
|
| 857 |
+
# Example queries
|
| 858 |
+
with gr.Row():
|
| 859 |
+
with gr.Column():
|
| 860 |
+
gr.Examples(
|
| 861 |
+
examples=[
|
| 862 |
+
["GLP-1 obesity meta-analysis", "Meta-Analysis", True, False, 5, 50, False, "JIF (High to Low)"],
|
| 863 |
+
["COVID-19 vaccine efficacy RCT", "RCT", True, False, 3, 30, False, "Quartile (Q1 to Q4)"],
|
| 864 |
+
["machine learning healthcare", "Research Article", True, True, 10, 50, True, "Default (by relevance)"],
|
| 865 |
+
["diabetes prevention systematic review", "Systematic Review", True, False, 8, 40, False, "JIF (High to Low)"]
|
| 866 |
+
],
|
| 867 |
+
inputs=[query_input, article_type, humans_only, open_access, years_back, max_results, show_all_journals, sort_by],
|
| 868 |
+
label="π‘ Example Queries"
|
| 869 |
+
)
|
| 870 |
+
|
| 871 |
+
# Footer
|
| 872 |
+
with gr.Row():
|
| 873 |
+
with gr.Column():
|
| 874 |
+
gr.Markdown("""
|
| 875 |
+
---
|
| 876 |
+
<div style="text-align: center; color: #666; padding: 2rem;">
|
| 877 |
+
**π Data Sources:** PubMed (NCBI) β’ Journal Impact Factors 2024<br>
|
| 878 |
+
**π‘ Tips:** Use specific medical terms for better results β’ Try "Show All Journals" if you get few results<br>
|
| 879 |
+
**π± Mobile Friendly:** Works great on all devices
|
| 880 |
+
</div>
|
| 881 |
+
""")
|
| 882 |
+
|
| 883 |
+
return app
|
| 884 |
+
|
| 885 |
+
|
| 886 |
+
def main():
|
| 887 |
+
"""Main application entry point."""
|
| 888 |
+
print("Starting PubMed Top Journals Student App...")
|
| 889 |
+
|
| 890 |
+
# Create Gradio interface
|
| 891 |
+
app = create_gradio_interface()
|
| 892 |
+
|
| 893 |
+
# Launch the app
|
| 894 |
+
app.launch(
|
| 895 |
+
server_name="0.0.0.0",
|
| 896 |
+
server_port=None, # Let Gradio find an available port automatically
|
| 897 |
+
share=False,
|
| 898 |
+
show_error=True,
|
| 899 |
+
quiet=False
|
| 900 |
+
)
|
| 901 |
+
|
| 902 |
+
|
| 903 |
+
if __name__ == "__main__":
|
| 904 |
+
main()
|