Spaces:

ChaoticEconomist
/

arXivForMe

Running

App Files Files Community

ChaoticEconomist commited on 16 days ago

Commit

afb65eb

verified ·

1 Parent(s): c2582a5

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -90

app.py CHANGED Viewed

@@ -3,143 +3,134 @@ import requests
 import os
 import arxiv
 import io
 from pypdf import PdfReader
 # -----------------------------
-# LLM ENGINE (Grok-4)
 # -----------------------------
 API_KEY = os.getenv("XAI_API_KEY")
 API_URL = "https://api.x.ai/v1/chat/completions"
 def call_grok(prompt):
-    headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
     payload = {
-        "model": "grok-4-1-fast-non-reasoning",
         "messages": [
-            {"role": "system", "content": "You are an expert polymath, research scientist, and critic. Use LaTeX for math ($...$ or $$...$$). Be dense, technical, and objective."},
             {"role": "user", "content": prompt}
         ],
-        "temperature": 0.15
     }
     try:
         response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
-        return response.json()["choices"][0]["message"]["content"]
     except Exception as e:
-        return f"LLM Error: {str(e)}"
 # -----------------------------
-# THE ENGINE ROOM
 # -----------------------------
-import re
-def analyze_full_paper(url):
     try:
-        # 1. Robust ID Extraction
         match = re.search(r"(\d{4}\.\d{4,5})", url)
         if not match:
-            return ["❌ Error: Invalid arXiv URL format."] + [""] * 8
         paper_id = match.group(1)
         search = arxiv.Search(id_list=[paper_id])
         paper = next(search.results())
-        # 2. Extract PDF Text
         resp = requests.get(paper.pdf_url)
         reader = PdfReader(io.BytesIO(resp.content))
-        content = "".join([page.extract_text() for page in reader.pages[:10]])
-        # 3. Prompt (Strict instructions for markers)
-        prompt = f"""
-        Title: {paper.title}
-        Content: {content[:15000]}
-        Analyze this paper. You MUST start each section with the bracketed header on a new line.
-        [SUMMARY]
-        [PROBLEM]
-        [IDEAS]
-        [THEORY]
-        [ALGO]
-        [FINDINGS]
-        [AUTHORS]
-        [VERDICT]
-        """
-        raw_resp = call_grok(prompt)
-        # 4. ROBUST PARSING (Regex based)
         markers = ["SUMMARY", "PROBLEM", "IDEAS", "THEORY", "ALGO", "FINDINGS", "AUTHORS", "VERDICT"]
         results = {}
-        for i, marker in enumerate(markers):
-            # Regex looks for [MARKER] and grabs everything until the next [MARKER]
-            start_pattern = rf"\[{marker}\]"
-            end_pattern = rf"\[{markers[i+1]}\]" if i+1 < len(markers) else "$"
-            pattern = f"{start_pattern}(.*?){end_pattern}"
-            match = re.search(pattern, raw_resp, re.DOTALL | re.IGNORECASE)
-            if match:
-                results[marker] = match.group(1).strip()
-            else:
-                results[marker] = "⚠️ Parsing failed for this section."
-        # If everything failed, dump raw response into Summary for debugging
-        if all(v == "⚠️ Parsing failed for this section." for v in results.values()):
-            results["SUMMARY"] = f"DEBUG: Raw Output below:\n\n{raw_resp}"
         return [paper.title] + [results[m] for m in markers]
     except Exception as e:
-        return [f"❌ System Error: {str(e)}"] + [""] * 8
 # -----------------------------
-# THE DASHBOARD UI
 # -----------------------------
-with gr.Blocks(theme=gr.themes.Default(primary_hue="orange", secondary_hue="slate"), title="arXivForMe Ultra") as demo:
-    gr.Markdown("# 🧬 arXivForMe: The Full Intelligence Suite")
-    gr.Markdown("*Deep-dive analysis of any arXiv paper: Theory, Math, and Critical Review.*")
     with gr.Row():
-        url_input = gr.Textbox(label="arXiv URL", placeholder="https://arxiv.org/abs/2401.xxxxx", scale=4)
-        run_btn = gr.Button("RUN FULL ANALYSIS", variant="primary", scale=1)
-    paper_head = gr.HTML("<h2 style='text-align: center;'>Submit a paper to begin extraction</h2>")
     with gr.Tabs():
-        with gr.Tab("📋 Executive Summary"):
             with gr.Row():
-                with gr.Column():
-                    gr.Markdown("### TL;DR")
-                    out_sum = gr.Markdown()
-                with gr.Column():
-                    gr.Markdown("### The Research Problem")
-                    out_prob = gr.Markdown()
-        with gr.Tab("💡 Main Ideas"):
-            out_idea = gr.Markdown()
         with gr.Tab("📐 Math & Theory"):
-            gr.Markdown("### Mathematical Framework")
-            out_theo = gr.Markdown()
         with gr.Tab("💻 Algorithm"):
-            gr.Markdown("### Logic & Implementation")
-            out_algo = gr.Markdown()
         with gr.Tab("📊 Findings"):
-            gr.Markdown("### Results & Benchmarks")
-            out_find = gr.Markdown()
-        with gr.Tab("👤 Author Analysis"):
-            gr.Markdown("### Reputation & Context")
-            out_auth = gr.Markdown()
-        with gr.Tab("⚖️ AI Opinion"):
-            gr.Markdown("### The Brutal Verdict")
-            out_verd = gr.Markdown()
-    output_list = [paper_head, out_sum, out_prob, out_idea, out_theo, out_algo, out_find, out_auth, out_verd]
-    run_btn.click(fn=analyze_full_paper, inputs=url_input, outputs=output_list)
-demo.launch()

 import os
 import arxiv
 import io
+import re
 from pypdf import PdfReader
 # -----------------------------
+# 1. CONFIGURATION & LLM ENGINE
 # -----------------------------
 API_KEY = os.getenv("XAI_API_KEY")
+# Current 2026 Production Endpoint
 API_URL = "https://api.x.ai/v1/chat/completions"
+MODEL_NAME = "grok-4-1-fast-non-reasoning"
 def call_grok(prompt):
+    if not API_KEY:
+        return "❌ ERROR: XAI_API_KEY not found in Hugging Face Secrets."
+    headers = {
+        "Authorization": f"Bearer {API_KEY}",
+        "Content-Type": "application/json"
+    }
     payload = {
+        "model": MODEL_NAME,
         "messages": [
+            {"role": "system", "content": "You are a world-class research scientist. Use LaTeX for math. Use exact headers: [SUMMARY], [PROBLEM], [IDEAS], [THEORY], [ALGO], [FINDINGS], [AUTHORS], [VERDICT]."},
             {"role": "user", "content": prompt}
         ],
+        "temperature": 0.1
     }
     try:
         response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
+        data = response.json()
+        if "error" in data:
+            return f"❌ API ERROR: {data['error']['message']}"
+        if "choices" not in data:
+            return f"❌ Unexpected Response: {str(data)}"
+        return data["choices"][0]["message"]["content"]
     except Exception as e:
+        return f"❌ Connection Failed: {str(e)}"
 # -----------------------------
+# 2. CORE LOGIC
 # -----------------------------
+def analyze_paper(url):
     try:
+        # Extract ID
         match = re.search(r"(\d{4}\.\d{4,5})", url)
         if not match:
+            return ["❌ Invalid URL. Please use a standard arXiv link."] + [""] * 8
         paper_id = match.group(1)
+        # Fetch Metadata
         search = arxiv.Search(id_list=[paper_id])
         paper = next(search.results())
+        # Extract PDF Text (Targeting Intro, Math, and Conclusion)
         resp = requests.get(paper.pdf_url)
         reader = PdfReader(io.BytesIO(resp.content))
+        num_pages = len(reader.pages)
+        # Get first 7 pages and last 2 pages for comprehensive context
+        text_pages = [p.extract_text() for p in reader.pages[:7]]
+        if num_pages > 7:
+            text_pages.append(reader.pages[-1].extract_text())
+        content = " ".join(text_pages)
+        prompt = f"Title: {paper.title}\nAuthors: {', '.join([a.name for a in paper.authors])}\n\nFull Text Snippet: {content[:18000]}"
+        raw_analysis = call_grok(prompt)
+        # If API returned an error string, put it in the summary and exit
+        if "❌" in raw_analysis:
+            return [paper.title, raw_analysis] + [""] * 7
+        # Parsing logic
         markers = ["SUMMARY", "PROBLEM", "IDEAS", "THEORY", "ALGO", "FINDINGS", "AUTHORS", "VERDICT"]
         results = {}
+        for i, m in enumerate(markers):
+            start = rf"\[{m}\]"
+            end = rf"\[{markers[i+1]}\]" if i+1 < len(markers) else "$"
+            match = re.search(f"{start}(.*?){end}", raw_analysis, re.DOTALL | re.IGNORECASE)
+            results[m] = match.group(1).strip() if match else "Section could not be parsed."
         return [paper.title] + [results[m] for m in markers]
     except Exception as e:
+        return [f"❌ Error: {str(e)}"] + [""] * 8
 # -----------------------------
+# 3. GRADIO UI
 # -----------------------------
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="cyan"), title="arXivForMe Ultra") as demo:
+    gr.Markdown("# 🔬 arXivForMe Ultra")
+    gr.Markdown("### *2026 Research Deconstruction Suite*")
     with gr.Row():
+        url_input = gr.Textbox(label="arXiv URL", placeholder="https://arxiv.org/abs/2401.12345", scale=4)
+        run_btn = gr.Button("ANALYZE", variant="primary", scale=1)
+    paper_display = gr.HTML("<center><h3>Enter a link to begin analysis</h3></center>")
     with gr.Tabs():
+        with gr.Tab("📋 Summary & Problem"):
             with gr.Row():
+                out_sum = gr.Markdown(label="Summary")
+                out_prob = gr.Markdown(label="Research Problem")
+        with gr.Tab("💡 Innovation"):
+            out_idea = gr.Markdown(label="Main Ideas")
         with gr.Tab("📐 Math & Theory"):
+            out_theo = gr.Markdown(label="Formal Framework")
         with gr.Tab("💻 Algorithm"):
+            out_algo = gr.Markdown(label="Implementation Logic")
         with gr.Tab("📊 Findings"):
+            out_find = gr.Markdown(label="Results")
+        with gr.Tab("🕵️ Author Reputation"):
+            out_auth = gr.Markdown(label="Author Context")
+        with gr.Tab("⚖️ The Verdict"):
+            out_verd = gr.Markdown(label="AI Critical Opinion")
+    # Map function to UI components
+    outputs = [paper_display, out_sum, out_prob, out_idea, out_theo, out_algo, out_find, out_auth, out_verd]
+    run_btn.click(fn=analyze_paper, inputs=url_input, outputs=outputs)
+if __name__ == "__main__":
+    demo.launch()