nkshirsa commited on
Commit
480c90d
Β·
verified Β·
1 Parent(s): 8a99156

v2.0: phd_research_os_v2/app.py

Browse files
Files changed (1) hide show
  1. phd_research_os_v2/app.py +630 -0
phd_research_os_v2/app.py ADDED
@@ -0,0 +1,630 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PhD Research OS v2.0 β€” Local Application
3
+ ==========================================
4
+ A guided application that walks the user through all phases
5
+ of setting up and using the Research OS.
6
+
7
+ Launch: python -m phd_research_os_v2.app
8
+ """
9
+
10
+ import os
11
+ import sys
12
+ import json
13
+ import gradio as gr
14
+
15
+ # Ensure package is importable
16
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
17
+
18
+ from phd_research_os_v2.core.database import (
19
+ init_db, get_db, get_stats, get_state, set_state,
20
+ from_fixed, to_fixed, now_iso, gen_id, DB_PATH
21
+ )
22
+ from phd_research_os_v2.layer0.parser import StructuralParser
23
+ from phd_research_os_v2.layer2.extractor import QualifiedExtractor
24
+ from phd_research_os_v2.layer4.graph import KnowledgeGraph
25
+ from phd_research_os_v2.layer5.scorer import CalibratedScorer
26
+
27
+ # Initialize
28
+ os.makedirs("data", exist_ok=True)
29
+ os.makedirs("inbox", exist_ok=True)
30
+ os.makedirs("vault", exist_ok=True)
31
+ init_db(DB_PATH)
32
+
33
+
34
+ # ============================================================
35
+ # Phase Status Logic
36
+ # ============================================================
37
+
38
+ def get_phase_status():
39
+ """Get completion status for each phase."""
40
+ stats = get_stats(DB_PATH)
41
+ phase = int(get_state(DB_PATH, "setup_phase") or "0")
42
+
43
+ return {
44
+ "current_phase": phase,
45
+ "phase_0": {"name": "Foundation", "done": True, "desc": "Database initialized"},
46
+ "phase_1": {
47
+ "name": "Paper Ingestion",
48
+ "done": stats.get("documents", 0) > 0,
49
+ "desc": f"{stats.get('documents', 0)} documents ingested, {stats.get('regions', 0)} regions parsed"
50
+ },
51
+ "phase_2": {
52
+ "name": "Claim Extraction",
53
+ "done": stats.get("claims", 0) > 0,
54
+ "desc": f"{stats.get('claims', 0)} claims extracted"
55
+ },
56
+ "phase_3": {
57
+ "name": "Knowledge Graph",
58
+ "done": stats.get("graph_nodes", 0) > 0,
59
+ "desc": f"{stats.get('graph_nodes', 0)} nodes, {stats.get('graph_edges', 0)} edges"
60
+ },
61
+ "phase_4": {
62
+ "name": "Conflict Detection",
63
+ "done": stats.get("conflicts", 0) > 0,
64
+ "desc": f"{stats.get('conflicts', 0)} conflicts detected"
65
+ },
66
+ "phase_5": {
67
+ "name": "Calibrated Scoring",
68
+ "done": False,
69
+ "desc": "Score claims with code-computed confidence"
70
+ },
71
+ "phase_6": {
72
+ "name": "Research Goals",
73
+ "done": stats.get("goals", 0) > 0,
74
+ "desc": f"{stats.get('goals', 0)} active goals"
75
+ },
76
+ }
77
+
78
+
79
+ def render_phase_overview():
80
+ """Render markdown overview of all phases."""
81
+ status = get_phase_status()
82
+ lines = ["# 🧬 PhD Research OS v2.0\n"]
83
+ lines.append("## System Status\n")
84
+
85
+ for i in range(7):
86
+ p = status[f"phase_{i}"]
87
+ icon = "βœ…" if p["done"] else "⬜"
88
+ lines.append(f"{icon} **Phase {i}: {p['name']}** β€” {p['desc']}")
89
+
90
+ stats = get_stats(DB_PATH)
91
+ lines.append(f"\n---\n### Database Summary")
92
+ lines.append(f"| Table | Count |")
93
+ lines.append(f"|-------|-------|")
94
+ for table, count in stats.items():
95
+ lines.append(f"| {table} | {count} |")
96
+
97
+ return "\n".join(lines)
98
+
99
+
100
+ # ============================================================
101
+ # Phase 1: Paper Ingestion
102
+ # ============================================================
103
+
104
+ def ingest_paper(file_obj, doc_type, title, doi):
105
+ """Ingest a paper through Layer 0."""
106
+ if file_obj is None:
107
+ return "❌ Please upload a file", "", render_phase_overview()
108
+
109
+ parser = StructuralParser(DB_PATH)
110
+ result = parser.ingest_document(
111
+ file_obj.name if hasattr(file_obj, 'name') else str(file_obj),
112
+ doc_type=doc_type or "main",
113
+ title=title or None,
114
+ doi=doi or None,
115
+ )
116
+
117
+ if result.get("error"):
118
+ return f"❌ {result['error']}", "", render_phase_overview()
119
+
120
+ # Format result
121
+ summary = f"""βœ… **Document ingested successfully!**
122
+
123
+ | Metric | Value |
124
+ |--------|-------|
125
+ | Document ID | `{result['doc_id']}` |
126
+ | Parse Method | {result['parse_method']} |
127
+ | Total Regions | {result['total_regions']} |
128
+ | Average Quality | {result['avg_quality']:.2f} |
129
+ | Sections Found | {', '.join(result.get('sections_found', [])) or 'None detected'} |
130
+
131
+ **Regions by type:**
132
+ """
133
+ for rtype, count in result.get("regions_by_type", {}).items():
134
+ summary += f"- {rtype}: {count}\n"
135
+
136
+ # Show first few regions as preview
137
+ preview_rows = []
138
+ parser2 = StructuralParser(DB_PATH)
139
+ regions = parser2.get_extractable_regions(result["doc_id"])
140
+ for r in regions[:10]:
141
+ preview_rows.append([
142
+ r["region_id"][:12], r["region_type"], r.get("section", "β€”"),
143
+ r["content_text"][:100] + "..." if len(r.get("content_text", "")) > 100 else r.get("content_text", ""),
144
+ f"{from_fixed(r['parse_confidence']):.2f}",
145
+ ])
146
+
147
+ return summary, preview_rows, render_phase_overview()
148
+
149
+
150
+ # ============================================================
151
+ # Phase 2: Claim Extraction
152
+ # ============================================================
153
+
154
+ def extract_claims_from_doc(doc_id):
155
+ """Extract claims from a specific document."""
156
+ if not doc_id or not doc_id.strip():
157
+ return "❌ Please enter a document ID", []
158
+
159
+ extractor = QualifiedExtractor(DB_PATH)
160
+ result = extractor.extract_from_document(doc_id.strip())
161
+
162
+ summary = f"""βœ… **Claims extracted!**
163
+
164
+ | Metric | Value |
165
+ |--------|-------|
166
+ | Total Claims | {result['total_claims']} |
167
+ | Null Results | {result['null_results']} |
168
+ | Incomplete | {result['incomplete']} |
169
+ | Avg Confidence | {from_fixed(result['avg_confidence']):.3f} |
170
+
171
+ **By Section:** {json.dumps(result.get('section_distribution', {}), indent=2)}
172
+
173
+ **By Epistemic Tag:** {json.dumps(result.get('epistemic_distribution', {}), indent=2)}
174
+ """
175
+
176
+ # Get claims for display
177
+ conn = get_db(DB_PATH)
178
+ rows = conn.execute("""
179
+ SELECT claim_id, text, epistemic_tag, composite_confidence,
180
+ source_section, status, is_null_result
181
+ FROM claims WHERE source_doc_id = ?
182
+ ORDER BY composite_confidence DESC
183
+ """, (doc_id.strip(),)).fetchall()
184
+ conn.close()
185
+
186
+ table_rows = []
187
+ for r in rows:
188
+ d = dict(r)
189
+ table_rows.append([
190
+ d["claim_id"][:12],
191
+ d["text"][:120] + ("..." if len(d.get("text","")) > 120 else ""),
192
+ d["epistemic_tag"],
193
+ f"{from_fixed(d['composite_confidence']):.3f}",
194
+ d.get("source_section", "β€”"),
195
+ "πŸ”΄ NULL" if d.get("is_null_result") else d.get("status", ""),
196
+ ])
197
+
198
+ return summary, table_rows
199
+
200
+
201
+ def extract_from_text(text_input, section):
202
+ """Extract claims from raw text input."""
203
+ if not text_input or len(text_input.strip()) < 50:
204
+ return "❌ Please enter at least 50 characters of scientific text", []
205
+
206
+ extractor = QualifiedExtractor(DB_PATH)
207
+ chunk = {
208
+ "text": text_input,
209
+ "section": section or "unknown",
210
+ "page": 0,
211
+ "min_confidence": 900,
212
+ "doc_id": None,
213
+ "region_ids": [],
214
+ }
215
+
216
+ claims = extractor.extract_from_chunk(chunk)
217
+
218
+ table_rows = []
219
+ for c in claims:
220
+ table_rows.append([
221
+ c["claim_id"][:12],
222
+ c["text"][:120],
223
+ c["epistemic_tag"],
224
+ f"{from_fixed(c['composite_confidence']):.3f}",
225
+ ", ".join(c.get("qualifiers", [])) or "β€”",
226
+ "πŸ”΄ NULL" if c.get("is_null_result") else c.get("status", ""),
227
+ ])
228
+
229
+ return f"βœ… Extracted {len(claims)} claims from text", table_rows
230
+
231
+
232
+ # ============================================================
233
+ # Phase 3: Knowledge Graph
234
+ # ============================================================
235
+
236
+ def build_graph():
237
+ """Build knowledge graph from all claims."""
238
+ conn = get_db(DB_PATH)
239
+ claims = conn.execute("SELECT * FROM claims LIMIT 500").fetchall()
240
+ conn.close()
241
+
242
+ graph = KnowledgeGraph(DB_PATH)
243
+
244
+ # Add all claims as nodes
245
+ for row in claims:
246
+ c = dict(row)
247
+ graph.add_claim_node(c["claim_id"], c["text"], {
248
+ "tag": c["epistemic_tag"],
249
+ "confidence": c.get("composite_confidence", 0),
250
+ "section": c.get("source_section"),
251
+ })
252
+
253
+ stats = graph.get_stats()
254
+ return f"""βœ… **Knowledge graph built!**
255
+
256
+ | Metric | Value |
257
+ |--------|-------|
258
+ | Total Nodes | {stats['total_nodes']} |
259
+ | Total Edges | {stats['total_edges']} |
260
+ | Observed Edges | {stats['observed_edges']} |
261
+ | Inferred Edges | {stats['inferred_edges']} |
262
+ """
263
+
264
+
265
+ def find_gaps():
266
+ """Run gap analysis on the knowledge graph."""
267
+ graph = KnowledgeGraph(DB_PATH)
268
+ gaps = graph.find_gaps()
269
+
270
+ if not gaps:
271
+ return "No gaps found (need more nodes with edges to detect structural holes)"
272
+
273
+ lines = ["## πŸ” Research Gaps Detected\n"]
274
+ for g in gaps[:10]:
275
+ lines.append(f"- **{g['entity_a']}** ↔ **{g['entity_b']}** "
276
+ f"(info gain: {g['information_gain']:.3f}, "
277
+ f"degrees: {g['a_degree']}/{g['b_degree']})")
278
+ return "\n".join(lines)
279
+
280
+
281
+ # ============================================================
282
+ # Phase 4: Conflict Detection
283
+ # ============================================================
284
+
285
+ def detect_conflicts():
286
+ """Run conflict detection."""
287
+ graph = KnowledgeGraph(DB_PATH)
288
+ pairs = graph.find_conflicts(min_similarity=0.3, limit=20)
289
+
290
+ if not pairs:
291
+ return "No potential conflicts found", []
292
+
293
+ conn = get_db(DB_PATH)
294
+ table_rows = []
295
+ for p in pairs:
296
+ # Store conflict
297
+ conflict_id = gen_id("CONF")
298
+ conn.execute("""
299
+ INSERT OR IGNORE INTO conflicts (conflict_id, claim_a_id, claim_b_id,
300
+ conflict_type, hypothesis_confidence, comparability_confidence,
301
+ schema_version, created_at)
302
+ VALUES (?, ?, ?, 'value_mismatch', 'low', ?, '2.0', ?)
303
+ """, (conflict_id, p["claim_a"]["claim_id"], p["claim_b"]["claim_id"],
304
+ to_fixed(p["overlap"]), now_iso()))
305
+
306
+ table_rows.append([
307
+ conflict_id[:12],
308
+ p["claim_a"]["text"][:80],
309
+ p["claim_b"]["text"][:80],
310
+ f"{p['overlap']:.2f}",
311
+ "Unresolved",
312
+ ])
313
+
314
+ conn.commit()
315
+ conn.close()
316
+
317
+ return f"βœ… Found {len(pairs)} potential conflicts", table_rows
318
+
319
+
320
+ # ============================================================
321
+ # Phase 5: Scoring
322
+ # ============================================================
323
+
324
+ def rescore_all():
325
+ """Rescore all claims with code-computed confidence."""
326
+ scorer = CalibratedScorer(DB_PATH)
327
+ count = scorer.rescore_all_claims()
328
+ return f"βœ… Rescored {count} claims with code-computed confidence (3-score system)"
329
+
330
+
331
+ # ============================================================
332
+ # Phase 6: Goals & Decisions
333
+ # ============================================================
334
+
335
+ def create_goal(description, priority):
336
+ if not description:
337
+ return "❌ Please enter a goal description"
338
+ conn = get_db(DB_PATH)
339
+ goal_id = gen_id("GOAL")
340
+ conn.execute("""
341
+ INSERT INTO goals (goal_id, description, priority, status, schema_version, created_at, updated_at)
342
+ VALUES (?, ?, ?, 'Active', '2.0', ?, ?)
343
+ """, (goal_id, description, priority or "medium", now_iso(), now_iso()))
344
+ conn.commit()
345
+ conn.close()
346
+ return f"βœ… Goal created: `{goal_id}`"
347
+
348
+
349
+ # ============================================================
350
+ # Claim Browser
351
+ # ============================================================
352
+
353
+ def browse_claims(tag_filter, min_conf, section_filter):
354
+ """Browse claims with filters."""
355
+ conn = get_db(DB_PATH)
356
+ conditions = []
357
+ params = []
358
+
359
+ if tag_filter and tag_filter != "All":
360
+ conditions.append("epistemic_tag = ?")
361
+ params.append(tag_filter)
362
+ if min_conf and min_conf > 0:
363
+ conditions.append("composite_confidence >= ?")
364
+ params.append(to_fixed(min_conf))
365
+ if section_filter and section_filter != "All":
366
+ conditions.append("source_section = ?")
367
+ params.append(section_filter)
368
+
369
+ where = " AND ".join(conditions) if conditions else "1=1"
370
+ rows = conn.execute(f"""
371
+ SELECT claim_id, text, epistemic_tag, composite_confidence,
372
+ evidence_quality, truth_likelihood, qualifier_strength_score,
373
+ source_section, status, is_null_result, qualifiers
374
+ FROM claims WHERE {where}
375
+ ORDER BY composite_confidence DESC LIMIT 100
376
+ """, params).fetchall()
377
+ conn.close()
378
+
379
+ table_rows = []
380
+ for r in rows:
381
+ d = dict(r)
382
+ quals = json.loads(d.get("qualifiers", "[]")) if isinstance(d.get("qualifiers"), str) else d.get("qualifiers", [])
383
+ table_rows.append([
384
+ d["claim_id"][:12],
385
+ d["text"][:150],
386
+ d["epistemic_tag"],
387
+ f"{from_fixed(d.get('composite_confidence', 0)):.3f}",
388
+ f"{from_fixed(d.get('evidence_quality', 0)):.3f}" if d.get('evidence_quality') else "β€”",
389
+ f"{from_fixed(d.get('truth_likelihood', 0)):.3f}" if d.get('truth_likelihood') else "β€”",
390
+ d.get("source_section", "β€”"),
391
+ "πŸ”΄" if d.get("is_null_result") else "β€”",
392
+ ])
393
+
394
+ return table_rows
395
+
396
+
397
+ # ============================================================
398
+ # THE GUIDED UI
399
+ # ============================================================
400
+
401
+ THEME = gr.themes.Soft(
402
+ primary_hue="blue",
403
+ secondary_hue="slate",
404
+ neutral_hue="slate",
405
+ ).set(
406
+ body_background_fill="*neutral_950",
407
+ body_background_fill_dark="*neutral_950",
408
+ block_background_fill="*neutral_900",
409
+ block_background_fill_dark="*neutral_900",
410
+ input_background_fill="*neutral_800",
411
+ input_background_fill_dark="*neutral_800",
412
+ )
413
+
414
+ with gr.Blocks(theme=THEME, title="PhD Research OS v2.0") as app:
415
+
416
+ # ── Header ──
417
+ overview = gr.Markdown(value=render_phase_overview())
418
+
419
+ with gr.Tabs() as tabs:
420
+
421
+ # ══════════════════════════════════════════════════════
422
+ # TAB 1: PHASE 1 β€” Paper Ingestion (Layer 0)
423
+ # ══════════════════════════════════════════════════════
424
+ with gr.Tab("πŸ“„ Phase 1: Ingest Papers"):
425
+ gr.Markdown("""### Layer 0: Structural PDF Ingestion
426
+ Upload a PDF, and the system will parse it into section-aware regions with quality scores.
427
+ Each region gets: section tag, bounding box, parse confidence, cross-references.""")
428
+
429
+ with gr.Row():
430
+ file_input = gr.File(label="Upload PDF or text file", file_types=[".pdf", ".txt", ".md", ".csv"])
431
+ with gr.Column():
432
+ doc_type = gr.Dropdown(["main", "supplement", "dataset"], value="main", label="Document Type")
433
+ title_input = gr.Textbox(label="Title (optional)")
434
+ doi_input = gr.Textbox(label="DOI (optional)", placeholder="10.1234/example")
435
+
436
+ ingest_btn = gr.Button("πŸ“₯ Ingest Document", variant="primary")
437
+ ingest_status = gr.Markdown()
438
+ region_preview = gr.Dataframe(
439
+ headers=["Region ID", "Type", "Section", "Content Preview", "Quality"],
440
+ label="Parsed Regions",
441
+ )
442
+
443
+ ingest_btn.click(
444
+ ingest_paper,
445
+ inputs=[file_input, doc_type, title_input, doi_input],
446
+ outputs=[ingest_status, region_preview, overview],
447
+ )
448
+
449
+ # ══════════════════════════════════════════════════════
450
+ # TAB 2: PHASE 2 β€” Claim Extraction (Layer 2)
451
+ # ══════════════════════════════════════════════════════
452
+ with gr.Tab("πŸ”¬ Phase 2: Extract Claims"):
453
+ gr.Markdown("""### Layer 2: Qualified Claim Extraction
454
+ Extract epistemic-tagged claims from ingested documents or raw text.
455
+ Claims are tagged with qualifiers, null results, and section-aware confidence.""")
456
+
457
+ with gr.Tabs():
458
+ with gr.Tab("From Document"):
459
+ doc_id_input = gr.Textbox(label="Document ID", placeholder="DOC_XXXXXXXX")
460
+ extract_doc_btn = gr.Button("πŸ”¬ Extract Claims", variant="primary")
461
+ extract_doc_status = gr.Markdown()
462
+ extract_doc_table = gr.Dataframe(
463
+ headers=["Claim ID", "Text", "Tag", "Confidence", "Section", "Status"],
464
+ )
465
+ extract_doc_btn.click(
466
+ extract_claims_from_doc,
467
+ inputs=[doc_id_input],
468
+ outputs=[extract_doc_status, extract_doc_table],
469
+ )
470
+
471
+ with gr.Tab("From Text"):
472
+ text_input = gr.Textbox(label="Scientific Text", lines=8,
473
+ placeholder="Paste scientific text here...")
474
+ section_input = gr.Dropdown(
475
+ ["abstract", "introduction", "methods", "results", "discussion", "conclusion", "unknown"],
476
+ value="results", label="Section"
477
+ )
478
+ extract_text_btn = gr.Button("πŸ”¬ Extract Claims", variant="primary")
479
+ extract_text_status = gr.Markdown()
480
+ extract_text_table = gr.Dataframe(
481
+ headers=["Claim ID", "Text", "Tag", "Confidence", "Qualifiers", "Status"],
482
+ )
483
+ extract_text_btn.click(
484
+ extract_from_text,
485
+ inputs=[text_input, section_input],
486
+ outputs=[extract_text_status, extract_text_table],
487
+ )
488
+
489
+ # ══════════════════════════════════════════════════════
490
+ # TAB 3: PHASE 3 β€” Knowledge Graph (Layer 4)
491
+ # ══════════════════════════════════════════════════════
492
+ with gr.Tab("πŸ•ΈοΈ Phase 3: Knowledge Graph"):
493
+ gr.Markdown("""### Layer 4: Knowledge Graph + Gap Analysis
494
+ Build a graph from extracted claims. Detect structural holes where evidence is missing.""")
495
+
496
+ with gr.Row():
497
+ build_btn = gr.Button("πŸ•ΈοΈ Build Graph from Claims", variant="primary")
498
+ gap_btn = gr.Button("πŸ” Find Research Gaps", variant="secondary")
499
+
500
+ graph_status = gr.Markdown()
501
+ gap_results = gr.Markdown()
502
+
503
+ build_btn.click(build_graph, outputs=graph_status)
504
+ gap_btn.click(find_gaps, outputs=gap_results)
505
+
506
+ # ══════════════════════════════════════════════════════
507
+ # TAB 4: PHASE 4 β€” Conflict Detection
508
+ # ══════════════════════════════════════════════════════
509
+ with gr.Tab("βš”οΈ Phase 4: Conflicts"):
510
+ gr.Markdown("""### Conflict Detection & Resolution
511
+ Find contradictions between claims from different sources.
512
+ All conflict hypotheses are tagged confidence="low" β€” human review required.""")
513
+
514
+ detect_btn = gr.Button("βš”οΈ Detect Conflicts", variant="primary")
515
+ conflict_status = gr.Markdown()
516
+ conflict_table = gr.Dataframe(
517
+ headers=["Conflict ID", "Claim A", "Claim B", "Similarity", "Status"],
518
+ )
519
+
520
+ detect_btn.click(detect_conflicts, outputs=[conflict_status, conflict_table])
521
+
522
+ # ══════════════════════════════════════════════════════
523
+ # TAB 5: PHASE 5 β€” Calibrated Scoring (Layer 5)
524
+ # ══════════════════════════════════════════════════════
525
+ with gr.Tab("πŸ“Š Phase 5: Scoring"):
526
+ gr.Markdown("""### Layer 5: Code-Computed Calibrated Scoring
527
+ Rescore all claims using the 3-score system:
528
+ - **Evidence Quality**: evidence Γ— study_quality Γ— journal_tier Γ— completeness Γ— section
529
+ - **Truth Likelihood**: evidence_quality + corroboration - conflict_penalty
530
+ - **Qualifier Strength**: 1.0 - qualifier_countΓ—0.1 - null_penalty
531
+
532
+ *The LLM provides components. The CODE computes final scores.*""")
533
+
534
+ rescore_btn = gr.Button("πŸ“Š Rescore All Claims", variant="primary")
535
+ rescore_status = gr.Markdown()
536
+ rescore_btn.click(rescore_all, outputs=rescore_status)
537
+
538
+ # ══════════════════════════════════════════════════════
539
+ # TAB 6: PHASE 6 β€” Research Goals & Decisions
540
+ # ══════════════════════════════════════════════════════
541
+ with gr.Tab("🎯 Phase 6: Goals"):
542
+ gr.Markdown("### Set Research Goals\nDefine what you're trying to achieve. The system will link claims and gaps to your goals.")
543
+
544
+ with gr.Row():
545
+ goal_desc = gr.Textbox(label="Goal Description", placeholder="Achieve sub-fM LOD for cardiac troponin...")
546
+ goal_priority = gr.Dropdown(["high", "medium", "low"], value="high", label="Priority")
547
+ goal_btn = gr.Button("🎯 Create Goal", variant="primary")
548
+ goal_status = gr.Markdown()
549
+ goal_btn.click(create_goal, inputs=[goal_desc, goal_priority], outputs=goal_status)
550
+
551
+ # ══════════════════════════════════════════════════════
552
+ # TAB 7: Claim Browser
553
+ # ══════════════════════════════════════════════════════
554
+ with gr.Tab("πŸ“‹ Browse Claims"):
555
+ gr.Markdown("### Claim Browser\nFilter and explore all extracted claims.")
556
+
557
+ with gr.Row():
558
+ tag_filter = gr.Dropdown(
559
+ ["All", "Fact", "Interpretation", "Hypothesis", "Conflict_Hypothesis"],
560
+ value="All", label="Epistemic Tag"
561
+ )
562
+ conf_slider = gr.Slider(0, 1, value=0, step=0.05, label="Minimum Confidence")
563
+ section_filter = gr.Dropdown(
564
+ ["All", "abstract", "introduction", "methods", "results", "discussion", "conclusion"],
565
+ value="All", label="Section"
566
+ )
567
+
568
+ browse_btn = gr.Button("πŸ” Search", variant="primary")
569
+ claims_table = gr.Dataframe(
570
+ headers=["ID", "Text", "Tag", "Composite", "Evidence Q", "Truth L", "Section", "Null?"],
571
+ )
572
+ browse_btn.click(
573
+ browse_claims,
574
+ inputs=[tag_filter, conf_slider, section_filter],
575
+ outputs=claims_table,
576
+ )
577
+
578
+ # ══════════════════════════════════════════════════════
579
+ # TAB 8: System Settings
580
+ # ══════════════════════════════════════════════════════
581
+ with gr.Tab("βš™οΈ Settings"):
582
+ gr.Markdown(f"""### System Configuration
583
+
584
+ | Setting | Value |
585
+ |---------|-------|
586
+ | Database | `{DB_PATH}` |
587
+ | Schema Version | 2.0 |
588
+ | Pipeline Version | 2.1.0 |
589
+
590
+ ### Local Model Setup
591
+
592
+ To use AI-powered extraction (instead of heuristic), set up a local model:
593
+
594
+ ```bash
595
+ # Option 1: Ollama (simplest)
596
+ curl -fsSL https://ollama.com/install.sh | sh
597
+ ollama pull qwen3:8b
598
+
599
+ # Option 2: Set API key for cloud fallback
600
+ export ANTHROPIC_API_KEY=sk-...
601
+ # or
602
+ export OPENAI_API_KEY=sk-...
603
+ ```
604
+
605
+ ### Upgrade Parser
606
+
607
+ For best PDF parsing, install Marker:
608
+ ```bash
609
+ pip install marker-pdf
610
+ ```
611
+
612
+ ### Layer Status
613
+ """)
614
+
615
+ refresh_btn = gr.Button("πŸ”„ Refresh Status")
616
+ status_display = gr.Markdown(value=render_phase_overview())
617
+ refresh_btn.click(render_phase_overview, outputs=status_display)
618
+
619
+
620
+ # ============================================================
621
+ # Launch
622
+ # ============================================================
623
+
624
+ if __name__ == "__main__":
625
+ app.launch(
626
+ server_name="0.0.0.0",
627
+ server_port=7860,
628
+ share=False,
629
+ show_error=True,
630
+ )