Spaces:

Adherence
/

nuremberg-trials-ai

Sleeping

App Files Files Community

Adherence commited on Dec 15, 2025

Commit

df69d5c

verified ·

1 Parent(s): 4c96130

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +21 -10

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ Nuremberg Trials AI - RAG-powered Q&A system
 Deployed on HuggingFace Spaces
 """
 import json
 import gradio as gr
 import numpy as np
@@ -14,9 +15,11 @@ from datasets import load_dataset
 # Configuration
 DATASET_ID = "Adherence/nuremberg-trials-rag"
 EMBEDDING_MODEL = "all-MiniLM-L6-v2"
-LLM_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
 TOP_K = 5
 class NurembergRAG:
     def __init__(self):
@@ -50,9 +53,13 @@ class NurembergRAG:
         )
         self.index = faiss.read_index(index_path)
-        # Initialize LLM client (free inference API)
-        print("  Initializing LLM client...")
-        self.llm_client = InferenceClient(model=LLM_MODEL)
         print(f"  Loaded {len(self.chunks)} document chunks")
         print("Ready!")
@@ -75,25 +82,29 @@ class NurembergRAG:
     def generate_answer(self, question: str, context: str) -> str:
         """Generate answer using LLM with retrieved context."""
-        prompt = f"""You are an expert on the Nuremberg Trials. Answer the question based ONLY on the provided context from historical documents. If the context doesn't contain enough information, say so.
 Context from Nuremberg Trial documents:
 {context}
 Question: {question}
-Answer (be specific and cite sources when possible):"""
         try:
             response = self.llm_client.text_generation(
                 prompt,
-                max_new_tokens=500,
                 temperature=0.3,
-                do_sample=True,
             )
             return response
         except Exception as e:
-            return f"Error generating answer: {str(e)}"
     def query(self, question: str) -> tuple:
         """Full RAG pipeline: retrieve + generate."""
@@ -114,7 +125,7 @@ Answer (be specific and cite sources when possible):"""
             context_parts.append(f"[{i}] {chunk['text'][:1000]}")
             sources_md.append(
                 f"**[{i}] {chunk['source']}** (relevance: {score:.0%})\n\n"
-                f"{chunk['text'][:500]}..."
             )
         context = "\n\n".join(context_parts)

 Deployed on HuggingFace Spaces
 """
+import os
 import json
 import gradio as gr
 import numpy as np
 # Configuration
 DATASET_ID = "Adherence/nuremberg-trials-rag"
 EMBEDDING_MODEL = "all-MiniLM-L6-v2"
 TOP_K = 5
+# Try to get HF token from environment (set in Space secrets)
+HF_TOKEN = os.environ.get("HF_TOKEN")
 class NurembergRAG:
     def __init__(self):
         )
         self.index = faiss.read_index(index_path)
+        # Initialize LLM client if token available
+        if HF_TOKEN:
+            print("  Initializing LLM client...")
+            self.llm_client = InferenceClient(token=HF_TOKEN)
+        else:
+            print("  No HF_TOKEN - running in retrieval-only mode")
+            self.llm_client = None
         print(f"  Loaded {len(self.chunks)} document chunks")
         print("Ready!")
     def generate_answer(self, question: str, context: str) -> str:
         """Generate answer using LLM with retrieved context."""
+        if not self.llm_client:
+            # No LLM available - provide retrieval-only summary
+            return "**Retrieved passages below contain the answer.** (LLM generation requires HF_TOKEN)"
+        prompt = f"""You are an expert on the Nuremberg Trials. Answer the question based ONLY on the provided context from historical documents. If the context doesn't contain enough information, say so. Be concise.
 Context from Nuremberg Trial documents:
 {context}
 Question: {question}
+Answer:"""
         try:
             response = self.llm_client.text_generation(
                 prompt,
+                model="HuggingFaceH4/zephyr-7b-beta",
+                max_new_tokens=400,
                 temperature=0.3,
             )
             return response
         except Exception as e:
+            return f"**Retrieved passages below contain the answer.** (LLM error: {str(e)[:100]})"
     def query(self, question: str) -> tuple:
         """Full RAG pipeline: retrieve + generate."""
             context_parts.append(f"[{i}] {chunk['text'][:1000]}")
             sources_md.append(
                 f"**[{i}] {chunk['source']}** (relevance: {score:.0%})\n\n"
+                f"{chunk['text'][:600]}..."
             )
         context = "\n\n".join(context_parts)