dtufail commited on
Commit
6eef1ab
·
verified ·
1 Parent(s): 7b615fe

Update rag.py

Browse files
Files changed (1) hide show
  1. rag.py +0 -133
rag.py CHANGED
@@ -533,136 +533,3 @@ class NurembergScholar:
533
  def clear_cache(self) -> None:
534
  self._cache.clear()
535
  print(" Cache cleared.")
536
-
537
- # ── Gradio UI ─────────────────────────────────────────────────────────────────
538
-
539
- def build_gradio_app(scholar: NurembergScholar):
540
- import gradio as gr
541
-
542
- def gradio_query(query: str, top_k: int):
543
- if not query.strip():
544
- return "Please enter a question.", "", ""
545
- result = scholar.answer(query, top_k=int(top_k))
546
- answer = result["answer"]
547
- sources = _format_sources(result["sources"])
548
- report = _format_citation_report(
549
- result["citation_report"],
550
- result.get("cache_hit", False),
551
- )
552
- return answer, sources, report
553
-
554
- def _format_sources(results) -> str:
555
- if not results:
556
- return "No sources retrieved."
557
- lines = []
558
- for i, r in enumerate(results, 1):
559
- lines.append(
560
- f"**[SOURCE {i}]** `{r.collection}` | {r.date_iso or '?'} | "
561
- f"speaker: *{r.speaker or '-'}* | page {r.page_number or '?'} | "
562
- f"rerank: `{f'{r.rerank_score:.4f}' if r.rerank_score is not None else 'n/a'}`\n\n"
563
- f"> {r.body[:300]}..."
564
- )
565
- return "\n\n---\n\n".join(lines)
566
-
567
- def _format_citation_report(report: dict, cache_hit: bool = False) -> str:
568
- if not report:
569
- return ""
570
- cache_label = "HIT" if cache_hit else "MISS"
571
- status = "All citations valid" if report.get("clean") else "Issues found"
572
- lines = [
573
- f"**Citation check:** {status} | "
574
- f"**Backend:** Groq ({GROQ_MODEL}) | **Cache:** {cache_label}",
575
- ]
576
- if report.get("cited"):
577
- lines.append(f"- Referenced: SOURCE {report['cited']}")
578
- if report.get("hallucinated"):
579
- lines.append(f"- Hallucinated refs stripped: {report['hallucinated']}")
580
- if report.get("uncited_sources"):
581
- lines.append(
582
- f"- Retrieved but not cited: SOURCE {report['uncited_sources']}")
583
- if report.get("uncited_sentences"):
584
- lines.append(
585
- f"- Paragraphs without citation: "
586
- f"{len(report['uncited_sentences'])}")
587
- stats = scholar.cache_stats
588
- lines.append(
589
- f"- Cache: {stats['size']} entries | "
590
- f"{stats['hits']} hits / {stats['hits']+stats['misses']} queries "
591
- f"({stats['hit_rate']:.0%})"
592
- )
593
- return "\n".join(lines)
594
-
595
- example_queries = [
596
- ["What did Goering say in his defense about the Luftwaffe?", 5],
597
- ["How did the Tribunal define crimes against humanity under Article 6(c)?", 5],
598
- ["What was Ohlendorf's confession regarding Einsatzgruppen killings?", 5],
599
- ["What evidence was presented about the Final Solution?", 5],
600
- ["What was the London Agreement and why was it significant?", 5],
601
- ["How were the defendants sentenced on 1 October 1946?", 5],
602
- ]
603
-
604
- with gr.Blocks(title="Nuremberg Scholar", theme=gr.themes.Soft()) as app:
605
- gr.Markdown(
606
- """
607
- # ⚖️ Nuremberg Scholar
608
- **AI research assistant for the Nuremberg Trials (1945–1946)**
609
-
610
- Answers are grounded exclusively in primary source documents.
611
- Every factual claim is cited to a specific source passage.
612
-
613
- *Llama-3.1-8B via Groq · BGE-M3 hybrid retrieval · 46,325 indexed passages*
614
- """
615
- )
616
- with gr.Row():
617
- with gr.Column(scale=3):
618
- query_box = gr.Textbox(
619
- label = "Your question",
620
- placeholder = "e.g. What did Speer claim about his knowledge of the Holocaust?",
621
- lines = 2,
622
- )
623
- with gr.Row():
624
- top_k_slider = gr.Slider(
625
- minimum = 1, maximum = 10, value = 5, step = 1,
626
- label = "Sources to retrieve",
627
- )
628
- submit_btn = gr.Button("Ask", variant="primary")
629
- gr.Examples(
630
- examples = example_queries,
631
- inputs = [query_box, top_k_slider],
632
- label = "Example questions",
633
- )
634
- with gr.Column(scale=1):
635
- gr.Markdown(
636
- """
637
- **Corpus**
638
- - 221 trial session transcripts
639
- - Full Tribunal judgment (Oct 1946)
640
- - Key prosecution documents
641
- - 46,325 indexed passages
642
-
643
- **Retrieval**
644
- Dense + sparse hybrid search,
645
- re-ranked by cross-encoder.
646
-
647
- **Citation policy**
648
- All claims must cite a SOURCE.
649
- Hallucinated citations are stripped.
650
- """
651
- )
652
-
653
- answer_box = gr.Markdown(label="Answer")
654
-
655
- with gr.Accordion("Citation verification", open=False):
656
- citation_box = gr.Markdown()
657
-
658
- with gr.Accordion("Retrieved sources", open=False):
659
- sources_box = gr.Markdown()
660
-
661
- for trigger in (submit_btn.click, query_box.submit):
662
- trigger(
663
- fn = gradio_query,
664
- inputs = [query_box, top_k_slider],
665
- outputs = [answer_box, sources_box, citation_box],
666
- )
667
-
668
- return app
 
533
  def clear_cache(self) -> None:
534
  self._cache.clear()
535
  print(" Cache cleared.")