Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,143 +3,134 @@ import requests
|
|
| 3 |
import os
|
| 4 |
import arxiv
|
| 5 |
import io
|
|
|
|
| 6 |
from pypdf import PdfReader
|
| 7 |
|
| 8 |
# -----------------------------
|
| 9 |
-
# LLM ENGINE
|
| 10 |
# -----------------------------
|
| 11 |
API_KEY = os.getenv("XAI_API_KEY")
|
|
|
|
| 12 |
API_URL = "https://api.x.ai/v1/chat/completions"
|
|
|
|
| 13 |
|
| 14 |
def call_grok(prompt):
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
payload = {
|
| 17 |
-
"model":
|
| 18 |
"messages": [
|
| 19 |
-
{"role": "system", "content": "You are
|
| 20 |
{"role": "user", "content": prompt}
|
| 21 |
],
|
| 22 |
-
"temperature": 0.
|
| 23 |
}
|
|
|
|
| 24 |
try:
|
| 25 |
response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
except Exception as e:
|
| 28 |
-
return f"
|
| 29 |
|
| 30 |
# -----------------------------
|
| 31 |
-
#
|
| 32 |
# -----------------------------
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
def analyze_full_paper(url):
|
| 36 |
try:
|
| 37 |
-
#
|
| 38 |
match = re.search(r"(\d{4}\.\d{4,5})", url)
|
| 39 |
if not match:
|
| 40 |
-
return ["β
|
| 41 |
paper_id = match.group(1)
|
| 42 |
|
|
|
|
| 43 |
search = arxiv.Search(id_list=[paper_id])
|
| 44 |
paper = next(search.results())
|
| 45 |
|
| 46 |
-
#
|
| 47 |
resp = requests.get(paper.pdf_url)
|
| 48 |
reader = PdfReader(io.BytesIO(resp.content))
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
[SUMMARY]
|
| 58 |
-
[PROBLEM]
|
| 59 |
-
[IDEAS]
|
| 60 |
-
[THEORY]
|
| 61 |
-
[ALGO]
|
| 62 |
-
[FINDINGS]
|
| 63 |
-
[AUTHORS]
|
| 64 |
-
[VERDICT]
|
| 65 |
-
"""
|
| 66 |
|
| 67 |
-
|
| 68 |
|
| 69 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
markers = ["SUMMARY", "PROBLEM", "IDEAS", "THEORY", "ALGO", "FINDINGS", "AUTHORS", "VERDICT"]
|
| 71 |
results = {}
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
pattern = f"{start_pattern}(.*?){end_pattern}"
|
| 79 |
-
match = re.search(pattern, raw_resp, re.DOTALL | re.IGNORECASE)
|
| 80 |
-
|
| 81 |
-
if match:
|
| 82 |
-
results[marker] = match.group(1).strip()
|
| 83 |
-
else:
|
| 84 |
-
results[marker] = "β οΈ Parsing failed for this section."
|
| 85 |
-
|
| 86 |
-
# If everything failed, dump raw response into Summary for debugging
|
| 87 |
-
if all(v == "β οΈ Parsing failed for this section." for v in results.values()):
|
| 88 |
-
results["SUMMARY"] = f"DEBUG: Raw Output below:\n\n{raw_resp}"
|
| 89 |
|
| 90 |
return [paper.title] + [results[m] for m in markers]
|
| 91 |
-
|
| 92 |
except Exception as e:
|
| 93 |
-
return [f"β
|
| 94 |
|
| 95 |
# -----------------------------
|
| 96 |
-
#
|
| 97 |
# -----------------------------
|
| 98 |
-
with gr.Blocks(theme=gr.themes.
|
| 99 |
-
gr.Markdown("#
|
| 100 |
-
gr.Markdown("*
|
| 101 |
|
| 102 |
with gr.Row():
|
| 103 |
-
url_input = gr.Textbox(label="arXiv URL", placeholder="https://arxiv.org/abs/2401.
|
| 104 |
-
run_btn = gr.Button("
|
| 105 |
|
| 106 |
-
|
| 107 |
|
| 108 |
with gr.Tabs():
|
| 109 |
-
with gr.Tab("π
|
| 110 |
with gr.Row():
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
with gr.Tab("π‘ Main Ideas"):
|
| 119 |
-
out_idea = gr.Markdown()
|
| 120 |
-
|
| 121 |
with gr.Tab("π Math & Theory"):
|
| 122 |
-
gr.Markdown("
|
| 123 |
-
|
| 124 |
-
|
| 125 |
with gr.Tab("π» Algorithm"):
|
| 126 |
-
gr.Markdown("
|
| 127 |
-
|
| 128 |
-
|
| 129 |
with gr.Tab("π Findings"):
|
| 130 |
-
gr.Markdown("
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
with gr.Tab("βοΈ AI Opinion"):
|
| 138 |
-
gr.Markdown("### The Brutal Verdict")
|
| 139 |
-
out_verd = gr.Markdown()
|
| 140 |
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
run_btn.click(fn=
|
| 144 |
|
| 145 |
-
|
|
|
|
|
|
| 3 |
import os
|
| 4 |
import arxiv
|
| 5 |
import io
|
| 6 |
+
import re
|
| 7 |
from pypdf import PdfReader
|
| 8 |
|
| 9 |
# -----------------------------
|
| 10 |
+
# 1. CONFIGURATION & LLM ENGINE
|
| 11 |
# -----------------------------
|
| 12 |
API_KEY = os.getenv("XAI_API_KEY")
|
| 13 |
+
# Current 2026 Production Endpoint
|
| 14 |
API_URL = "https://api.x.ai/v1/chat/completions"
|
| 15 |
+
MODEL_NAME = "grok-4-1-fast-non-reasoning"
|
| 16 |
|
| 17 |
def call_grok(prompt):
|
| 18 |
+
if not API_KEY:
|
| 19 |
+
return "β ERROR: XAI_API_KEY not found in Hugging Face Secrets."
|
| 20 |
+
|
| 21 |
+
headers = {
|
| 22 |
+
"Authorization": f"Bearer {API_KEY}",
|
| 23 |
+
"Content-Type": "application/json"
|
| 24 |
+
}
|
| 25 |
payload = {
|
| 26 |
+
"model": MODEL_NAME,
|
| 27 |
"messages": [
|
| 28 |
+
{"role": "system", "content": "You are a world-class research scientist. Use LaTeX for math. Use exact headers: [SUMMARY], [PROBLEM], [IDEAS], [THEORY], [ALGO], [FINDINGS], [AUTHORS], [VERDICT]."},
|
| 29 |
{"role": "user", "content": prompt}
|
| 30 |
],
|
| 31 |
+
"temperature": 0.1
|
| 32 |
}
|
| 33 |
+
|
| 34 |
try:
|
| 35 |
response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
|
| 36 |
+
data = response.json()
|
| 37 |
+
|
| 38 |
+
if "error" in data:
|
| 39 |
+
return f"β API ERROR: {data['error']['message']}"
|
| 40 |
+
if "choices" not in data:
|
| 41 |
+
return f"β Unexpected Response: {str(data)}"
|
| 42 |
+
|
| 43 |
+
return data["choices"][0]["message"]["content"]
|
| 44 |
except Exception as e:
|
| 45 |
+
return f"β Connection Failed: {str(e)}"
|
| 46 |
|
| 47 |
# -----------------------------
|
| 48 |
+
# 2. CORE LOGIC
|
| 49 |
# -----------------------------
|
| 50 |
+
def analyze_paper(url):
|
|
|
|
|
|
|
| 51 |
try:
|
| 52 |
+
# Extract ID
|
| 53 |
match = re.search(r"(\d{4}\.\d{4,5})", url)
|
| 54 |
if not match:
|
| 55 |
+
return ["β Invalid URL. Please use a standard arXiv link."] + [""] * 8
|
| 56 |
paper_id = match.group(1)
|
| 57 |
|
| 58 |
+
# Fetch Metadata
|
| 59 |
search = arxiv.Search(id_list=[paper_id])
|
| 60 |
paper = next(search.results())
|
| 61 |
|
| 62 |
+
# Extract PDF Text (Targeting Intro, Math, and Conclusion)
|
| 63 |
resp = requests.get(paper.pdf_url)
|
| 64 |
reader = PdfReader(io.BytesIO(resp.content))
|
| 65 |
+
num_pages = len(reader.pages)
|
| 66 |
+
# Get first 7 pages and last 2 pages for comprehensive context
|
| 67 |
+
text_pages = [p.extract_text() for p in reader.pages[:7]]
|
| 68 |
+
if num_pages > 7:
|
| 69 |
+
text_pages.append(reader.pages[-1].extract_text())
|
| 70 |
+
content = " ".join(text_pages)
|
| 71 |
+
|
| 72 |
+
prompt = f"Title: {paper.title}\nAuthors: {', '.join([a.name for a in paper.authors])}\n\nFull Text Snippet: {content[:18000]}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
+
raw_analysis = call_grok(prompt)
|
| 75 |
|
| 76 |
+
# If API returned an error string, put it in the summary and exit
|
| 77 |
+
if "β" in raw_analysis:
|
| 78 |
+
return [paper.title, raw_analysis] + [""] * 7
|
| 79 |
+
|
| 80 |
+
# Parsing logic
|
| 81 |
markers = ["SUMMARY", "PROBLEM", "IDEAS", "THEORY", "ALGO", "FINDINGS", "AUTHORS", "VERDICT"]
|
| 82 |
results = {}
|
| 83 |
+
for i, m in enumerate(markers):
|
| 84 |
+
start = rf"\[{m}\]"
|
| 85 |
+
end = rf"\[{markers[i+1]}\]" if i+1 < len(markers) else "$"
|
| 86 |
+
match = re.search(f"{start}(.*?){end}", raw_analysis, re.DOTALL | re.IGNORECASE)
|
| 87 |
+
results[m] = match.group(1).strip() if match else "Section could not be parsed."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
return [paper.title] + [results[m] for m in markers]
|
| 90 |
+
|
| 91 |
except Exception as e:
|
| 92 |
+
return [f"β Error: {str(e)}"] + [""] * 8
|
| 93 |
|
| 94 |
# -----------------------------
|
| 95 |
+
# 3. GRADIO UI
|
| 96 |
# -----------------------------
|
| 97 |
+
with gr.Blocks(theme=gr.themes.Soft(primary_hue="cyan"), title="arXivForMe Ultra") as demo:
|
| 98 |
+
gr.Markdown("# π¬ arXivForMe Ultra")
|
| 99 |
+
gr.Markdown("### *2026 Research Deconstruction Suite*")
|
| 100 |
|
| 101 |
with gr.Row():
|
| 102 |
+
url_input = gr.Textbox(label="arXiv URL", placeholder="https://arxiv.org/abs/2401.12345", scale=4)
|
| 103 |
+
run_btn = gr.Button("ANALYZE", variant="primary", scale=1)
|
| 104 |
|
| 105 |
+
paper_display = gr.HTML("<center><h3>Enter a link to begin analysis</h3></center>")
|
| 106 |
|
| 107 |
with gr.Tabs():
|
| 108 |
+
with gr.Tab("π Summary & Problem"):
|
| 109 |
with gr.Row():
|
| 110 |
+
out_sum = gr.Markdown(label="Summary")
|
| 111 |
+
out_prob = gr.Markdown(label="Research Problem")
|
| 112 |
+
|
| 113 |
+
with gr.Tab("π‘ Innovation"):
|
| 114 |
+
out_idea = gr.Markdown(label="Main Ideas")
|
| 115 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
with gr.Tab("π Math & Theory"):
|
| 117 |
+
out_theo = gr.Markdown(label="Formal Framework")
|
| 118 |
+
|
|
|
|
| 119 |
with gr.Tab("π» Algorithm"):
|
| 120 |
+
out_algo = gr.Markdown(label="Implementation Logic")
|
| 121 |
+
|
|
|
|
| 122 |
with gr.Tab("π Findings"):
|
| 123 |
+
out_find = gr.Markdown(label="Results")
|
| 124 |
+
|
| 125 |
+
with gr.Tab("π΅οΈ Author Reputation"):
|
| 126 |
+
out_auth = gr.Markdown(label="Author Context")
|
| 127 |
+
|
| 128 |
+
with gr.Tab("βοΈ The Verdict"):
|
| 129 |
+
out_verd = gr.Markdown(label="AI Critical Opinion")
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
+
# Map function to UI components
|
| 132 |
+
outputs = [paper_display, out_sum, out_prob, out_idea, out_theo, out_algo, out_find, out_auth, out_verd]
|
| 133 |
+
run_btn.click(fn=analyze_paper, inputs=url_input, outputs=outputs)
|
| 134 |
|
| 135 |
+
if __name__ == "__main__":
|
| 136 |
+
demo.launch()
|