Babajaan commited on
Commit
6c50c38
Β·
verified Β·
1 Parent(s): ae3d207

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +904 -0
app.py ADDED
@@ -0,0 +1,904 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ PubMed Top Journals Student App
4
+
5
+ A beginner-friendly Gradio application that searches PubMed and filters results
6
+ to show only articles from high-impact journals based on Journal Impact Factor data.
7
+
8
+ Author: AI Assistant
9
+ Version: 1.0
10
+ """
11
+
12
+ import os
13
+ import json
14
+ import time
15
+ import requests
16
+ import pandas as pd
17
+ import gradio as gr
18
+ from typing import Dict, List, Optional, Tuple
19
+ from lxml import etree
20
+ from dotenv import load_dotenv
21
+
22
+ # Load environment variables
23
+ load_dotenv()
24
+
25
+ class PubMedSearcher:
26
+ """Handles PubMed API interactions and journal filtering."""
27
+
28
+ def __init__(self):
29
+ self.base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
30
+ self.tool_name = os.getenv('NCBI_TOOL_NAME', 'pubmed-topjournals-student-app')
31
+ self.email = os.getenv('NCBI_CONTACT_EMAIL', 'student@example.com')
32
+ self.api_key = os.getenv('NCBI_API_KEY', '')
33
+
34
+ # Load journal data
35
+ self.journal_data = self._load_journal_data()
36
+ self.journal_lookup = self._build_journal_lookup()
37
+
38
+ print(f"Loaded {len(self.journal_data)} journals from database")
39
+
40
+ def _load_journal_data(self) -> List[Dict]:
41
+ """Load journal impact factor data from JSON file."""
42
+ try:
43
+ with open('journal_impact_factors/top_journals.json', 'r', encoding='utf-8') as f:
44
+ return json.load(f)
45
+ except FileNotFoundError:
46
+ print("Warning: journal_impact_factors/top_journals.json not found")
47
+ return []
48
+ except Exception as e:
49
+ print(f"Error loading journal data: {e}")
50
+ return []
51
+
52
+ def _build_journal_lookup(self) -> Dict[str, Dict]:
53
+ """Build a normalized lookup dictionary for journal matching."""
54
+ lookup = {}
55
+
56
+ for journal in self.journal_data:
57
+ # Normalize journal name and aliases
58
+ names_to_add = [journal['name']]
59
+ if journal.get('aliases'):
60
+ names_to_add.extend(journal['aliases'])
61
+
62
+ for name in names_to_add:
63
+ normalized = self._normalize_journal_name(name)
64
+ if normalized:
65
+ lookup[normalized] = {
66
+ 'quartile': journal['quartile'],
67
+ 'jif': journal['jif'],
68
+ 'category': journal.get('category', 'Unknown'),
69
+ 'canonical_name': journal['name']
70
+ }
71
+
72
+ return lookup
73
+
74
+ def _normalize_journal_name(self, name: str) -> str:
75
+ """Normalize journal name for matching."""
76
+ if not name:
77
+ return ""
78
+
79
+ # Convert to lowercase, strip whitespace, collapse spaces, remove trailing periods
80
+ normalized = ' '.join(name.lower().strip().split())
81
+ normalized = normalized.rstrip('.')
82
+
83
+ return normalized
84
+
85
+ def _get_api_params(self) -> Dict[str, str]:
86
+ """Get common API parameters."""
87
+ params = {
88
+ 'tool': self.tool_name,
89
+ 'email': self.email
90
+ }
91
+ if self.api_key:
92
+ params['api_key'] = self.api_key
93
+ return params
94
+
95
+ def _make_api_request(self, url: str, params: Dict) -> Optional[Dict]:
96
+ """Make API request with error handling and retry logic."""
97
+ try:
98
+ response = requests.get(url, params=params, timeout=30)
99
+
100
+ if response.status_code == 200:
101
+ return response.json()
102
+ elif response.status_code >= 500:
103
+ # Server error - retry once
104
+ print(f"Server error {response.status_code}, retrying...")
105
+ time.sleep(1)
106
+ response = requests.get(url, params=params, timeout=30)
107
+ if response.status_code == 200:
108
+ return response.json()
109
+
110
+ print(f"API request failed with status {response.status_code}")
111
+ return None
112
+
113
+ except requests.exceptions.RequestException as e:
114
+ print(f"API request error: {e}")
115
+ return None
116
+
117
+ def _build_search_term(self, query: str, article_type: str, humans_only: bool, open_access: bool) -> str:
118
+ """Build PubMed search term with filters."""
119
+ search_term = query
120
+
121
+ # Add article type filter
122
+ if article_type:
123
+ type_mapping = {
124
+ "RCT": "Randomized Controlled Trial[Publication Type]",
125
+ "Randomized Controlled Trial": "Randomized Controlled Trial[Publication Type]",
126
+ "Meta-Analysis": "Meta-Analysis[Publication Type]",
127
+ "Systematic Review": "Systematic Review[Publication Type]",
128
+ "Clinical Trial": "Clinical Trial[Publication Type]",
129
+ "Review": "Review[Publication Type]",
130
+ "Research Article": "Journal Article[Publication Type]"
131
+ }
132
+ if article_type in type_mapping:
133
+ search_term += f" AND {type_mapping[article_type]}"
134
+
135
+ # Add human studies filter
136
+ if humans_only:
137
+ search_term += " AND humans[MeSH Terms]"
138
+
139
+ # Add open access filter
140
+ if open_access:
141
+ search_term += " AND free full text[sb]"
142
+
143
+ return search_term
144
+
145
+ def search_pubmed(self, query: str, article_type: str, humans_only: bool, open_access: bool,
146
+ years_back: int, max_results: int, show_all_journals: bool) -> Tuple[str, List[Dict]]:
147
+ """Search PubMed and return formatted results."""
148
+
149
+ if not query.strip():
150
+ return "Please enter a search query.", []
151
+
152
+ # Cap max results
153
+ max_results = min(max_results, 100)
154
+
155
+ # Build search term
156
+ search_term = self._build_search_term(query, article_type, humans_only, open_access)
157
+
158
+ # Calculate date range
159
+ from datetime import datetime, timedelta
160
+ end_date = datetime.now()
161
+ start_date = end_date - timedelta(days=years_back * 365)
162
+
163
+ print(f"Searching PubMed: {search_term}")
164
+ print(f"Date range: {start_date.strftime('%Y/%m/%d')} to {end_date.strftime('%Y/%m/%d')}")
165
+
166
+ # Step 1: E-Search to get PMIDs
167
+ search_params = {
168
+ 'db': 'pubmed',
169
+ 'term': search_term,
170
+ 'retmode': 'json',
171
+ 'retmax': max_results,
172
+ 'sort': 'pub+date',
173
+ 'mindate': start_date.strftime('%Y/%m/%d'),
174
+ 'maxdate': end_date.strftime('%Y/%m/%d'),
175
+ **self._get_api_params()
176
+ }
177
+
178
+ search_response = self._make_api_request(
179
+ f"{self.base_url}esearch.fcgi", search_params
180
+ )
181
+
182
+ if not search_response:
183
+ return "❌ Error: Could not connect to PubMed. Please check your internet connection and try again.", []
184
+
185
+ # Check for errors in response
186
+ if 'esearchresult' not in search_response:
187
+ return "❌ Error: Invalid response from PubMed. Please try again.", []
188
+
189
+ esearch_result = search_response['esearchresult']
190
+
191
+ if 'errorlist' in esearch_result and esearch_result['errorlist']:
192
+ error_msg = esearch_result['errorlist'].get('errormessage', ['Unknown error'])
193
+ return f"❌ PubMed error: {error_msg[0]}", []
194
+
195
+ pmids = esearch_result.get('idlist', [])
196
+ total_found = int(esearch_result.get('count', 0))
197
+
198
+ if not pmids:
199
+ return f"πŸ” No articles found for '{query}'. Try:\nβ€’ Broader search terms\nβ€’ Increase 'Years Back' range\nβ€’ Turn on 'Show All Journals'", []
200
+
201
+ print(f"Found {total_found} articles, processing {len(pmids)} PMIDs")
202
+
203
+ # Step 2: E-Summary to get metadata
204
+ articles = []
205
+ batch_size = 200
206
+
207
+ for i in range(0, len(pmids), batch_size):
208
+ batch_pmids = pmids[i:i + batch_size]
209
+
210
+ summary_params = {
211
+ 'db': 'pubmed',
212
+ 'id': ','.join(batch_pmids),
213
+ 'retmode': 'json',
214
+ **self._get_api_params()
215
+ }
216
+
217
+ summary_response = self._make_api_request(
218
+ f"{self.base_url}esummary.fcgi", summary_params
219
+ )
220
+
221
+ if summary_response and 'result' in summary_response:
222
+ for pmid in batch_pmids:
223
+ if pmid in summary_response['result']:
224
+ article_data = summary_response['result'][pmid]
225
+ articles.append(self._process_article_metadata(article_data, pmid))
226
+
227
+ # Be polite to the API
228
+ time.sleep(0.1)
229
+
230
+ # Step 3: E-Fetch to get abstracts
231
+ articles_with_abstracts = []
232
+ abstract_batch_size = 50
233
+
234
+ for i in range(0, len(articles), abstract_batch_size):
235
+ batch_articles = articles[i:i + abstract_batch_size]
236
+ batch_pmids = [article['pmid'] for article in batch_articles]
237
+
238
+ fetch_params = {
239
+ 'db': 'pubmed',
240
+ 'id': ','.join(batch_pmids),
241
+ 'retmode': 'xml',
242
+ **self._get_api_params()
243
+ }
244
+
245
+ fetch_response = requests.get(
246
+ f"{self.base_url}efetch.fcgi", params=fetch_params, timeout=30
247
+ )
248
+
249
+ if fetch_response.status_code == 200:
250
+ abstracts = self._parse_abstracts(fetch_response.text)
251
+
252
+ for article in batch_articles:
253
+ article['abstract'] = abstracts.get(article['pmid'], 'No abstract available')
254
+ articles_with_abstracts.append(article)
255
+ else:
256
+ # Add articles without abstracts
257
+ for article in batch_articles:
258
+ article['abstract'] = 'Abstract temporarily unavailable'
259
+ articles_with_abstracts.append(article)
260
+
261
+ # Be polite to the API
262
+ time.sleep(0.1)
263
+
264
+ # Filter by journals if not showing all
265
+ if not show_all_journals:
266
+ filtered_articles = []
267
+ for article in articles_with_abstracts:
268
+ if self._is_top_journal(article['journal']):
269
+ filtered_articles.append(article)
270
+ else:
271
+ filtered_articles = articles_with_abstracts
272
+
273
+ # Build status message
274
+ status_parts = [f"βœ… {total_found} found"]
275
+ if len(articles) < total_found:
276
+ status_parts.append(f"β†’ {len(articles)} after date/filter limits")
277
+
278
+ if not show_all_journals:
279
+ status_parts.append(f"β†’ {len(filtered_articles)} kept (Top journals)")
280
+ else:
281
+ status_parts.append(f"β†’ {len(filtered_articles)} kept (All journals)")
282
+
283
+ status_message = " ".join(status_parts)
284
+
285
+ return status_message, filtered_articles
286
+
287
+ def _process_article_metadata(self, article_data: Dict, pmid: str) -> Dict:
288
+ """Process article metadata from E-Summary response."""
289
+ # Extract title
290
+ title = article_data.get('title', 'No title available')
291
+
292
+ # Extract journal
293
+ journal = article_data.get('fulljournalname', article_data.get('source', 'Unknown Journal'))
294
+
295
+ # Extract publication date
296
+ pubdate = article_data.get('pubdate', '')
297
+ year = self._extract_year(pubdate)
298
+
299
+ # Extract article type
300
+ article_type = article_data.get('pubtype', ['Unknown'])
301
+ if isinstance(article_type, list) and article_type:
302
+ article_type = article_type[0]
303
+
304
+ # Check if it's a top journal and get metadata
305
+ journal_metadata = self._get_journal_metadata(journal)
306
+
307
+ return {
308
+ 'pmid': pmid,
309
+ 'title': title,
310
+ 'journal': journal,
311
+ 'year': year,
312
+ 'type': article_type,
313
+ 'pubmed_url': f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/",
314
+ 'jif': journal_metadata.get('jif', None),
315
+ 'quartile': journal_metadata.get('quartile', None),
316
+ 'category': journal_metadata.get('category', None)
317
+ }
318
+
319
+ def _extract_year(self, pubdate: str) -> str:
320
+ """Extract year from publication date string."""
321
+ if not pubdate:
322
+ return "Unknown"
323
+
324
+ # Try to extract year from various date formats
325
+ import re
326
+ year_match = re.search(r'\b(19|20)\d{2}\b', pubdate)
327
+ if year_match:
328
+ return year_match.group()
329
+
330
+ return "Unknown"
331
+
332
+ def _parse_abstracts(self, xml_content: str) -> Dict[str, str]:
333
+ """Parse abstracts from E-Fetch XML response."""
334
+ abstracts = {}
335
+
336
+ try:
337
+ root = etree.fromstring(xml_content)
338
+
339
+ for article in root.xpath('//PubmedArticle'):
340
+ pmid = article.find('.//PMID')
341
+ if pmid is not None:
342
+ pmid_text = pmid.text
343
+
344
+ abstract_parts = []
345
+ for abstract_text in article.xpath('.//AbstractText'):
346
+ label = abstract_text.get('Label', '')
347
+ text = abstract_text.text or ''
348
+
349
+ if text.strip():
350
+ if label:
351
+ abstract_parts.append(f"{label}: {text}")
352
+ else:
353
+ abstract_parts.append(text)
354
+
355
+ if abstract_parts:
356
+ abstracts[pmid_text] = '\n\n'.join(abstract_parts)
357
+
358
+ except Exception as e:
359
+ print(f"Error parsing abstracts: {e}")
360
+
361
+ return abstracts
362
+
363
+ def _is_top_journal(self, journal_name: str) -> bool:
364
+ """Check if journal is in top journals database."""
365
+ normalized = self._normalize_journal_name(journal_name)
366
+ return normalized in self.journal_lookup
367
+
368
+ def _get_journal_metadata(self, journal_name: str) -> Dict:
369
+ """Get journal metadata (JIF, quartile, category) if available."""
370
+ normalized = self._normalize_journal_name(journal_name)
371
+ return self.journal_lookup.get(normalized, {})
372
+
373
+
374
+ def create_article_card(article: Dict) -> str:
375
+ """Create HTML card for article display."""
376
+ title = article['title']
377
+ journal = article['journal']
378
+ year = article['year']
379
+ article_type = article['type']
380
+ abstract = article['abstract']
381
+ pubmed_url = article['pubmed_url']
382
+
383
+ # Create badges for JIF and quartile
384
+ badges_html = ""
385
+ if article['jif'] is not None:
386
+ badges_html += f'<span class="badge jif-badge">JIF {article["jif"]}</span> '
387
+ if article['quartile']:
388
+ badges_html += f'<span class="badge quartile-badge">{article["quartile"]}</span> '
389
+
390
+ # Truncate abstract for display
391
+ abstract_preview = abstract[:300] + "..." if len(abstract) > 300 else abstract
392
+
393
+ card_html = f"""
394
+ <div class="article-card">
395
+ <h3><a href="{pubmed_url}" target="_blank" class="article-title">{title}</a></h3>
396
+ <div class="article-meta">
397
+ <strong>{journal}</strong> β€’ {year} β€’ {article_type}
398
+ {badges_html}
399
+ </div>
400
+ <details class="abstract-details">
401
+ <summary class="abstract-summary">Abstract</summary>
402
+ <div class="abstract-content">{abstract_preview}</div>
403
+ </details>
404
+ </div>
405
+ """
406
+
407
+ return card_html
408
+
409
+
410
+ def sort_articles(articles: List[Dict], sort_option: str) -> List[Dict]:
411
+ """Sort articles based on the selected option."""
412
+
413
+ if sort_option == "Default (by relevance)":
414
+ # Keep original order (already sorted by PubMed relevance)
415
+ return articles
416
+
417
+ elif sort_option == "JIF (High to Low)":
418
+ # Sort by JIF descending, with articles without JIF at the end
419
+ return sorted(articles, key=lambda x: x.get('jif', 0) or 0, reverse=True)
420
+
421
+ elif sort_option == "JIF (Low to High)":
422
+ # Sort by JIF ascending, with articles without JIF at the beginning
423
+ return sorted(articles, key=lambda x: x.get('jif', 0) or 0, reverse=False)
424
+
425
+ elif sort_option == "Quartile (Q1 to Q4)":
426
+ # Sort by quartile: Q1, Q2, Q3, Q4, then articles without quartile
427
+ quartile_order = {'Q1': 1, 'Q2': 2, 'Q3': 3, 'Q4': 4}
428
+ return sorted(articles, key=lambda x: quartile_order.get(x.get('quartile'), 999))
429
+
430
+ elif sort_option == "Quartile (Q4 to Q1)":
431
+ # Sort by quartile: Q4, Q3, Q2, Q1, then articles without quartile
432
+ quartile_order = {'Q4': 1, 'Q3': 2, 'Q2': 3, 'Q1': 4}
433
+ return sorted(articles, key=lambda x: quartile_order.get(x.get('quartile'), 999))
434
+
435
+ else:
436
+ # Default fallback
437
+ return articles
438
+
439
+
440
+ def search_interface(query: str, article_type: str, humans_only: bool, open_access: bool,
441
+ years_back: int, max_results: int, show_all_journals: bool, sort_by: str) -> Tuple[str, str]:
442
+ """Main search interface function."""
443
+
444
+ # Show loading state
445
+ loading_html = """
446
+ <div style="text-align: center; padding: 3rem; color: #667eea;">
447
+ <div style="font-size: 1.2rem; margin-bottom: 1rem;">πŸ” Searching PubMed...</div>
448
+ <div style="display: inline-block; width: 40px; height: 40px; border: 4px solid #e0e6ff; border-top: 4px solid #667eea; border-radius: 50%; animation: spin 1s linear infinite;"></div>
449
+ <div style="margin-top: 1rem; font-size: 0.9rem; opacity: 0.8;">Please wait while we fetch your results</div>
450
+ </div>
451
+ <style>
452
+ @keyframes spin {
453
+ 0% { transform: rotate(0deg); }
454
+ 100% { transform: rotate(360deg); }
455
+ }
456
+ </style>
457
+ """
458
+
459
+ # Initialize searcher
460
+ searcher = PubMedSearcher()
461
+
462
+ # Perform search
463
+ status_message, articles = searcher.search_pubmed(
464
+ query, article_type, humans_only, open_access, years_back, max_results, show_all_journals
465
+ )
466
+
467
+ # Create HTML output
468
+ if not articles:
469
+ return status_message, ""
470
+
471
+ # Sort articles based on user selection
472
+ articles = sort_articles(articles, sort_by)
473
+
474
+ # Add CSS styling
475
+ css_style = """
476
+ <style>
477
+ .article-card {
478
+ border: none;
479
+ border-radius: 20px;
480
+ padding: 24px;
481
+ margin: 20px 0;
482
+ background: linear-gradient(135deg, #ffffff 0%, #f8f9ff 100%);
483
+ box-shadow: 0 8px 32px rgba(0,0,0,0.1);
484
+ transition: all 0.3s ease;
485
+ border-left: 5px solid #667eea;
486
+ }
487
+
488
+ .article-card:hover {
489
+ transform: translateY(-4px);
490
+ box-shadow: 0 12px 48px rgba(0,0,0,0.15);
491
+ }
492
+
493
+ .article-title {
494
+ color: #1976d2;
495
+ text-decoration: none;
496
+ font-size: 1.3em;
497
+ line-height: 1.4;
498
+ font-weight: 700;
499
+ margin-bottom: 12px;
500
+ display: block;
501
+ transition: all 0.3s ease;
502
+ }
503
+
504
+ .article-title:hover {
505
+ color: #1565c0;
506
+ text-decoration: none;
507
+ transform: translateX(4px);
508
+ }
509
+
510
+ .article-meta {
511
+ color: #424242;
512
+ margin: 12px 0;
513
+ font-size: 1em;
514
+ font-weight: 600;
515
+ background: #f0f4ff;
516
+ padding: 8px 16px;
517
+ border-radius: 12px;
518
+ border: 1px solid #e0e6ff;
519
+ }
520
+
521
+ .badge {
522
+ display: inline-block;
523
+ padding: 6px 14px;
524
+ border-radius: 20px;
525
+ font-size: 0.85em;
526
+ font-weight: 700;
527
+ margin-left: 10px;
528
+ text-shadow: none;
529
+ transition: all 0.3s ease;
530
+ box-shadow: 0 2px 8px rgba(0,0,0,0.2);
531
+ }
532
+
533
+ .badge:hover {
534
+ transform: scale(1.05);
535
+ }
536
+
537
+ .jif-badge {
538
+ background: linear-gradient(135deg, #4caf50 0%, #45a049 100%);
539
+ color: white;
540
+ }
541
+
542
+ .quartile-badge {
543
+ background: linear-gradient(135deg, #2196f3 0%, #1976d2 100%);
544
+ color: white;
545
+ }
546
+
547
+ .abstract-details {
548
+ margin-top: 16px;
549
+ background: white;
550
+ border-radius: 12px;
551
+ padding: 16px;
552
+ border: 1px solid #e0e6ff;
553
+ }
554
+
555
+ .abstract-summary {
556
+ cursor: pointer;
557
+ color: #1976d2;
558
+ font-weight: 700;
559
+ font-size: 1.1em;
560
+ padding: 8px 16px;
561
+ background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
562
+ border-radius: 8px;
563
+ border: 1px solid #90caf9;
564
+ transition: all 0.3s ease;
565
+ display: inline-block;
566
+ margin-bottom: 8px;
567
+ }
568
+
569
+ .abstract-summary:hover {
570
+ color: #1565c0;
571
+ background: linear-gradient(135deg, #bbdefb 0%, #90caf9 100%);
572
+ transform: translateY(-1px);
573
+ }
574
+
575
+ .abstract-content {
576
+ margin-top: 12px;
577
+ line-height: 1.7;
578
+ color: #212121;
579
+ font-size: 0.95em;
580
+ background: #fafafa;
581
+ padding: 16px;
582
+ border-radius: 8px;
583
+ border-left: 4px solid #667eea;
584
+ }
585
+
586
+ .status-message {
587
+ background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%);
588
+ border: 2px solid #4caf50;
589
+ border-radius: 15px;
590
+ padding: 20px;
591
+ margin: 20px 0;
592
+ font-weight: 700;
593
+ color: black !important;
594
+ font-size: 1.1em;
595
+ text-align: center;
596
+ box-shadow: 0 4px 16px rgba(76, 175, 80, 0.3);
597
+ }
598
+
599
+ .status-message * {
600
+ color: black !important;
601
+ }
602
+
603
+ .status-message span {
604
+ color: black !important;
605
+ }
606
+
607
+ /* Ensure text is visible on all backgrounds */
608
+ body, html {
609
+ color: #212121 !important;
610
+ }
611
+
612
+ /* Make sure article details are clearly visible */
613
+ .article-card * {
614
+ color: #212121 !important;
615
+ }
616
+
617
+ .article-title {
618
+ color: #1976d2 !important;
619
+ }
620
+
621
+ .abstract-summary {
622
+ color: #1976d2 !important;
623
+ }
624
+
625
+ /* Add some animation */
626
+ @keyframes fadeInUp {
627
+ from {
628
+ opacity: 0;
629
+ transform: translateY(30px);
630
+ }
631
+ to {
632
+ opacity: 1;
633
+ transform: translateY(0);
634
+ }
635
+ }
636
+
637
+ .article-card {
638
+ animation: fadeInUp 0.6s ease-out;
639
+ }
640
+ </style>
641
+ """
642
+
643
+ # Create articles HTML with properly formatted status message
644
+ formatted_status = f"<span style='color: black !important;'>{status_message}</span>"
645
+ articles_html = css_style + "<div class='status-message'>" + formatted_status + "</div>"
646
+
647
+ for article in articles:
648
+ articles_html += create_article_card(article)
649
+
650
+ return status_message, articles_html
651
+
652
+
653
+ def create_gradio_interface():
654
+ """Create and configure the Gradio interface."""
655
+
656
+ # Custom CSS for enhanced styling
657
+ custom_css = """
658
+ .gradio-container {
659
+ background: #000000;
660
+ min-height: 100vh;
661
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
662
+ }
663
+
664
+ .main-header {
665
+ background: #1a1a1a;
666
+ color: white;
667
+ padding: 2rem;
668
+ border-radius: 15px;
669
+ margin-bottom: 2rem;
670
+ box-shadow: 0 8px 32px rgba(255,255,255,0.1);
671
+ text-align: center;
672
+ border: 1px solid #333333;
673
+ }
674
+
675
+ .main-header h1 {
676
+ margin: 0;
677
+ font-size: 2.5rem;
678
+ font-weight: 700;
679
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
680
+ }
681
+
682
+ .main-header p {
683
+ margin: 0.5rem 0 0 0;
684
+ font-size: 1.1rem;
685
+ opacity: 0.9;
686
+ }
687
+
688
+
689
+ .info-panel {
690
+ background: #1a1a1a;
691
+ color: white;
692
+ border-radius: 20px;
693
+ padding: 2rem;
694
+ box-shadow: 0 10px 40px rgba(255,255,255,0.1);
695
+ margin-bottom: 2rem;
696
+ border: 1px solid #333333;
697
+ }
698
+
699
+ .search-button {
700
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
701
+ border: none;
702
+ border-radius: 20px;
703
+ padding: 1.2rem 3rem;
704
+ color: white;
705
+ font-size: 1.3rem;
706
+ font-weight: 700;
707
+ transition: all 0.3s ease;
708
+ box-shadow: 0 6px 20px rgba(102, 126, 234, 0.4);
709
+ text-transform: uppercase;
710
+ letter-spacing: 1px;
711
+ position: relative;
712
+ overflow: hidden;
713
+ width: 100%;
714
+ margin-top: 1rem;
715
+ }
716
+
717
+ .search-button:hover {
718
+ transform: translateY(-3px);
719
+ box-shadow: 0 10px 30px rgba(102, 126, 234, 0.6);
720
+ background: linear-gradient(135deg, #764ba2 0%, #667eea 100%);
721
+ }
722
+
723
+ .search-button:active {
724
+ transform: translateY(-1px);
725
+ }
726
+
727
+ .search-button.loading {
728
+ pointer-events: none;
729
+ opacity: 0.8;
730
+ }
731
+
732
+ .search-button.loading::after {
733
+ content: '';
734
+ position: absolute;
735
+ width: 20px;
736
+ height: 20px;
737
+ top: 50%;
738
+ left: 50%;
739
+ margin-left: -10px;
740
+ margin-top: -10px;
741
+ border: 2px solid #ffffff;
742
+ border-radius: 50%;
743
+ border-top-color: transparent;
744
+ animation: spin 1s linear infinite;
745
+ }
746
+
747
+ @keyframes spin {
748
+ to {
749
+ transform: rotate(360deg);
750
+ }
751
+ }
752
+
753
+
754
+ """
755
+
756
+ with gr.Blocks(title="PubMed Search Engine", theme=gr.themes.Soft(), css=custom_css) as app:
757
+
758
+ # Main Header
759
+ with gr.Row():
760
+ with gr.Column():
761
+ gr.HTML("""
762
+ <div class="main-header">
763
+ <h1>πŸ”¬ PubMed Search Engine</h1>
764
+ <p>Search PubMed and filter results to show only articles from high-impact journals.<br>
765
+ Perfect for students and researchers who want to focus on the most credible research.</p>
766
+ </div>
767
+ """)
768
+
769
+ with gr.Row():
770
+ with gr.Column(scale=3):
771
+ # Search Panel
772
+ with gr.Column():
773
+ query_input = gr.Textbox(
774
+ label="πŸ” Search Query",
775
+ placeholder="Enter keywords (e.g., 'GLP-1 obesity meta-analysis')",
776
+ lines=2
777
+ )
778
+
779
+ with gr.Row():
780
+ article_type = gr.Dropdown(
781
+ choices=["", "Research Article", "RCT", "Randomized Controlled Trial", "Meta-Analysis",
782
+ "Systematic Review", "Clinical Trial", "Review"],
783
+ label="πŸ“„ Article Type Filter",
784
+ value=""
785
+ )
786
+
787
+ humans_only = gr.Checkbox(
788
+ label="πŸ‘₯ Humans Only",
789
+ value=True,
790
+ info="Exclude animal studies"
791
+ )
792
+
793
+ open_access = gr.Checkbox(
794
+ label="πŸ”“ Open Access Only",
795
+ value=False,
796
+ info="Show only freely accessible articles"
797
+ )
798
+
799
+ with gr.Row():
800
+ years_back = gr.Slider(
801
+ minimum=1, maximum=15, value=5, step=1,
802
+ label="πŸ“… Years Back",
803
+ info="How many years to search"
804
+ )
805
+
806
+ max_results = gr.Slider(
807
+ minimum=10, maximum=100, value=50, step=10,
808
+ label="πŸ“Š Max Results",
809
+ info="Maximum articles to return"
810
+ )
811
+
812
+ with gr.Row():
813
+ show_all_journals = gr.Checkbox(
814
+ label="🌐 Show All Journals",
815
+ value=False,
816
+ info="Show all journals (not just top journals)"
817
+ )
818
+
819
+ sort_by = gr.Dropdown(
820
+ choices=["Default (by relevance)", "JIF (High to Low)", "JIF (Low to High)", "Quartile (Q1 to Q4)", "Quartile (Q4 to Q1)"],
821
+ label="πŸ“ˆ Sort Results By",
822
+ value="Default (by relevance)"
823
+ )
824
+
825
+ search_button = gr.Button("πŸ” Search PubMed", variant="primary", size="lg", elem_classes="search-button")
826
+
827
+ with gr.Column(scale=1):
828
+ # Info Panel
829
+ with gr.Column():
830
+ gr.HTML("""
831
+ <div class="info-panel">
832
+ <h3 style="margin-top: 0; color: white;">πŸ“Š About Journal Rankings</h3>
833
+ <div style="color: white;">
834
+ <p><strong>Q1 (Quartile 1):</strong> Top 25% of journals</p>
835
+ <p><strong>Q2 (Quartile 2):</strong> 25-50th percentile</p>
836
+ <p><strong>Q3 (Quartile 3):</strong> 50-75th percentile</p>
837
+ <p><strong>Q4 (Quartile 4):</strong> Bottom 25%</p>
838
+ <br>
839
+ <p><strong>Higher JIF = More influential journal</strong></p>
840
+ </div>
841
+ </div>
842
+ """)
843
+
844
+ # Results section
845
+ with gr.Row():
846
+ with gr.Column():
847
+ status_output = gr.Markdown(label="Search Status")
848
+ results_output = gr.HTML(label="Search Results")
849
+
850
+ # Event handlers
851
+ search_button.click(
852
+ fn=search_interface,
853
+ inputs=[query_input, article_type, humans_only, open_access, years_back, max_results, show_all_journals, sort_by],
854
+ outputs=[status_output, results_output]
855
+ )
856
+
857
+ # Example queries
858
+ with gr.Row():
859
+ with gr.Column():
860
+ gr.Examples(
861
+ examples=[
862
+ ["GLP-1 obesity meta-analysis", "Meta-Analysis", True, False, 5, 50, False, "JIF (High to Low)"],
863
+ ["COVID-19 vaccine efficacy RCT", "RCT", True, False, 3, 30, False, "Quartile (Q1 to Q4)"],
864
+ ["machine learning healthcare", "Research Article", True, True, 10, 50, True, "Default (by relevance)"],
865
+ ["diabetes prevention systematic review", "Systematic Review", True, False, 8, 40, False, "JIF (High to Low)"]
866
+ ],
867
+ inputs=[query_input, article_type, humans_only, open_access, years_back, max_results, show_all_journals, sort_by],
868
+ label="πŸ’‘ Example Queries"
869
+ )
870
+
871
+ # Footer
872
+ with gr.Row():
873
+ with gr.Column():
874
+ gr.Markdown("""
875
+ ---
876
+ <div style="text-align: center; color: #666; padding: 2rem;">
877
+ **πŸ”— Data Sources:** PubMed (NCBI) β€’ Journal Impact Factors 2024<br>
878
+ **πŸ’‘ Tips:** Use specific medical terms for better results β€’ Try "Show All Journals" if you get few results<br>
879
+ **πŸ“± Mobile Friendly:** Works great on all devices
880
+ </div>
881
+ """)
882
+
883
+ return app
884
+
885
+
886
+ def main():
887
+ """Main application entry point."""
888
+ print("Starting PubMed Top Journals Student App...")
889
+
890
+ # Create Gradio interface
891
+ app = create_gradio_interface()
892
+
893
+ # Launch the app
894
+ app.launch(
895
+ server_name="0.0.0.0",
896
+ server_port=None, # Let Gradio find an available port automatically
897
+ share=False,
898
+ show_error=True,
899
+ quiet=False
900
+ )
901
+
902
+
903
+ if __name__ == "__main__":
904
+ main()