ChaoticEconomist commited on
Commit
afb65eb
Β·
verified Β·
1 Parent(s): c2582a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -90
app.py CHANGED
@@ -3,143 +3,134 @@ import requests
3
  import os
4
  import arxiv
5
  import io
 
6
  from pypdf import PdfReader
7
 
8
  # -----------------------------
9
- # LLM ENGINE (Grok-4)
10
  # -----------------------------
11
  API_KEY = os.getenv("XAI_API_KEY")
 
12
  API_URL = "https://api.x.ai/v1/chat/completions"
 
13
 
14
  def call_grok(prompt):
15
- headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
 
 
 
 
 
 
16
  payload = {
17
- "model": "grok-4-1-fast-non-reasoning",
18
  "messages": [
19
- {"role": "system", "content": "You are an expert polymath, research scientist, and critic. Use LaTeX for math ($...$ or $$...$$). Be dense, technical, and objective."},
20
  {"role": "user", "content": prompt}
21
  ],
22
- "temperature": 0.15
23
  }
 
24
  try:
25
  response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
26
- return response.json()["choices"][0]["message"]["content"]
 
 
 
 
 
 
 
27
  except Exception as e:
28
- return f"LLM Error: {str(e)}"
29
 
30
  # -----------------------------
31
- # THE ENGINE ROOM
32
  # -----------------------------
33
- import re
34
-
35
- def analyze_full_paper(url):
36
  try:
37
- # 1. Robust ID Extraction
38
  match = re.search(r"(\d{4}\.\d{4,5})", url)
39
  if not match:
40
- return ["❌ Error: Invalid arXiv URL format."] + [""] * 8
41
  paper_id = match.group(1)
42
 
 
43
  search = arxiv.Search(id_list=[paper_id])
44
  paper = next(search.results())
45
 
46
- # 2. Extract PDF Text
47
  resp = requests.get(paper.pdf_url)
48
  reader = PdfReader(io.BytesIO(resp.content))
49
- content = "".join([page.extract_text() for page in reader.pages[:10]])
50
-
51
- # 3. Prompt (Strict instructions for markers)
52
- prompt = f"""
53
- Title: {paper.title}
54
- Content: {content[:15000]}
55
-
56
- Analyze this paper. You MUST start each section with the bracketed header on a new line.
57
- [SUMMARY]
58
- [PROBLEM]
59
- [IDEAS]
60
- [THEORY]
61
- [ALGO]
62
- [FINDINGS]
63
- [AUTHORS]
64
- [VERDICT]
65
- """
66
 
67
- raw_resp = call_grok(prompt)
68
 
69
- # 4. ROBUST PARSING (Regex based)
 
 
 
 
70
  markers = ["SUMMARY", "PROBLEM", "IDEAS", "THEORY", "ALGO", "FINDINGS", "AUTHORS", "VERDICT"]
71
  results = {}
72
-
73
- for i, marker in enumerate(markers):
74
- # Regex looks for [MARKER] and grabs everything until the next [MARKER]
75
- start_pattern = rf"\[{marker}\]"
76
- end_pattern = rf"\[{markers[i+1]}\]" if i+1 < len(markers) else "$"
77
-
78
- pattern = f"{start_pattern}(.*?){end_pattern}"
79
- match = re.search(pattern, raw_resp, re.DOTALL | re.IGNORECASE)
80
-
81
- if match:
82
- results[marker] = match.group(1).strip()
83
- else:
84
- results[marker] = "⚠️ Parsing failed for this section."
85
-
86
- # If everything failed, dump raw response into Summary for debugging
87
- if all(v == "⚠️ Parsing failed for this section." for v in results.values()):
88
- results["SUMMARY"] = f"DEBUG: Raw Output below:\n\n{raw_resp}"
89
 
90
  return [paper.title] + [results[m] for m in markers]
91
-
92
  except Exception as e:
93
- return [f"❌ System Error: {str(e)}"] + [""] * 8
94
 
95
  # -----------------------------
96
- # THE DASHBOARD UI
97
  # -----------------------------
98
- with gr.Blocks(theme=gr.themes.Default(primary_hue="orange", secondary_hue="slate"), title="arXivForMe Ultra") as demo:
99
- gr.Markdown("# 🧬 arXivForMe: The Full Intelligence Suite")
100
- gr.Markdown("*Deep-dive analysis of any arXiv paper: Theory, Math, and Critical Review.*")
101
 
102
  with gr.Row():
103
- url_input = gr.Textbox(label="arXiv URL", placeholder="https://arxiv.org/abs/2401.xxxxx", scale=4)
104
- run_btn = gr.Button("RUN FULL ANALYSIS", variant="primary", scale=1)
105
 
106
- paper_head = gr.HTML("<h2 style='text-align: center;'>Submit a paper to begin extraction</h2>")
107
 
108
  with gr.Tabs():
109
- with gr.Tab("πŸ“‹ Executive Summary"):
110
  with gr.Row():
111
- with gr.Column():
112
- gr.Markdown("### TL;DR")
113
- out_sum = gr.Markdown()
114
- with gr.Column():
115
- gr.Markdown("### The Research Problem")
116
- out_prob = gr.Markdown()
117
-
118
- with gr.Tab("πŸ’‘ Main Ideas"):
119
- out_idea = gr.Markdown()
120
-
121
  with gr.Tab("πŸ“ Math & Theory"):
122
- gr.Markdown("### Mathematical Framework")
123
- out_theo = gr.Markdown()
124
-
125
  with gr.Tab("πŸ’» Algorithm"):
126
- gr.Markdown("### Logic & Implementation")
127
- out_algo = gr.Markdown()
128
-
129
  with gr.Tab("πŸ“Š Findings"):
130
- gr.Markdown("### Results & Benchmarks")
131
- out_find = gr.Markdown()
132
-
133
- with gr.Tab("πŸ‘€ Author Analysis"):
134
- gr.Markdown("### Reputation & Context")
135
- out_auth = gr.Markdown()
136
-
137
- with gr.Tab("βš–οΈ AI Opinion"):
138
- gr.Markdown("### The Brutal Verdict")
139
- out_verd = gr.Markdown()
140
 
141
- output_list = [paper_head, out_sum, out_prob, out_idea, out_theo, out_algo, out_find, out_auth, out_verd]
142
-
143
- run_btn.click(fn=analyze_full_paper, inputs=url_input, outputs=output_list)
144
 
145
- demo.launch()
 
 
3
  import os
4
  import arxiv
5
  import io
6
+ import re
7
  from pypdf import PdfReader
8
 
9
  # -----------------------------
10
+ # 1. CONFIGURATION & LLM ENGINE
11
  # -----------------------------
12
  API_KEY = os.getenv("XAI_API_KEY")
13
+ # Current 2026 Production Endpoint
14
  API_URL = "https://api.x.ai/v1/chat/completions"
15
+ MODEL_NAME = "grok-4-1-fast-non-reasoning"
16
 
17
  def call_grok(prompt):
18
+ if not API_KEY:
19
+ return "❌ ERROR: XAI_API_KEY not found in Hugging Face Secrets."
20
+
21
+ headers = {
22
+ "Authorization": f"Bearer {API_KEY}",
23
+ "Content-Type": "application/json"
24
+ }
25
  payload = {
26
+ "model": MODEL_NAME,
27
  "messages": [
28
+ {"role": "system", "content": "You are a world-class research scientist. Use LaTeX for math. Use exact headers: [SUMMARY], [PROBLEM], [IDEAS], [THEORY], [ALGO], [FINDINGS], [AUTHORS], [VERDICT]."},
29
  {"role": "user", "content": prompt}
30
  ],
31
+ "temperature": 0.1
32
  }
33
+
34
  try:
35
  response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
36
+ data = response.json()
37
+
38
+ if "error" in data:
39
+ return f"❌ API ERROR: {data['error']['message']}"
40
+ if "choices" not in data:
41
+ return f"❌ Unexpected Response: {str(data)}"
42
+
43
+ return data["choices"][0]["message"]["content"]
44
  except Exception as e:
45
+ return f"❌ Connection Failed: {str(e)}"
46
 
47
  # -----------------------------
48
+ # 2. CORE LOGIC
49
  # -----------------------------
50
+ def analyze_paper(url):
 
 
51
  try:
52
+ # Extract ID
53
  match = re.search(r"(\d{4}\.\d{4,5})", url)
54
  if not match:
55
+ return ["❌ Invalid URL. Please use a standard arXiv link."] + [""] * 8
56
  paper_id = match.group(1)
57
 
58
+ # Fetch Metadata
59
  search = arxiv.Search(id_list=[paper_id])
60
  paper = next(search.results())
61
 
62
+ # Extract PDF Text (Targeting Intro, Math, and Conclusion)
63
  resp = requests.get(paper.pdf_url)
64
  reader = PdfReader(io.BytesIO(resp.content))
65
+ num_pages = len(reader.pages)
66
+ # Get first 7 pages and last 2 pages for comprehensive context
67
+ text_pages = [p.extract_text() for p in reader.pages[:7]]
68
+ if num_pages > 7:
69
+ text_pages.append(reader.pages[-1].extract_text())
70
+ content = " ".join(text_pages)
71
+
72
+ prompt = f"Title: {paper.title}\nAuthors: {', '.join([a.name for a in paper.authors])}\n\nFull Text Snippet: {content[:18000]}"
 
 
 
 
 
 
 
 
 
73
 
74
+ raw_analysis = call_grok(prompt)
75
 
76
+ # If API returned an error string, put it in the summary and exit
77
+ if "❌" in raw_analysis:
78
+ return [paper.title, raw_analysis] + [""] * 7
79
+
80
+ # Parsing logic
81
  markers = ["SUMMARY", "PROBLEM", "IDEAS", "THEORY", "ALGO", "FINDINGS", "AUTHORS", "VERDICT"]
82
  results = {}
83
+ for i, m in enumerate(markers):
84
+ start = rf"\[{m}\]"
85
+ end = rf"\[{markers[i+1]}\]" if i+1 < len(markers) else "$"
86
+ match = re.search(f"{start}(.*?){end}", raw_analysis, re.DOTALL | re.IGNORECASE)
87
+ results[m] = match.group(1).strip() if match else "Section could not be parsed."
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  return [paper.title] + [results[m] for m in markers]
90
+
91
  except Exception as e:
92
+ return [f"❌ Error: {str(e)}"] + [""] * 8
93
 
94
  # -----------------------------
95
+ # 3. GRADIO UI
96
  # -----------------------------
97
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="cyan"), title="arXivForMe Ultra") as demo:
98
+ gr.Markdown("# πŸ”¬ arXivForMe Ultra")
99
+ gr.Markdown("### *2026 Research Deconstruction Suite*")
100
 
101
  with gr.Row():
102
+ url_input = gr.Textbox(label="arXiv URL", placeholder="https://arxiv.org/abs/2401.12345", scale=4)
103
+ run_btn = gr.Button("ANALYZE", variant="primary", scale=1)
104
 
105
+ paper_display = gr.HTML("<center><h3>Enter a link to begin analysis</h3></center>")
106
 
107
  with gr.Tabs():
108
+ with gr.Tab("πŸ“‹ Summary & Problem"):
109
  with gr.Row():
110
+ out_sum = gr.Markdown(label="Summary")
111
+ out_prob = gr.Markdown(label="Research Problem")
112
+
113
+ with gr.Tab("πŸ’‘ Innovation"):
114
+ out_idea = gr.Markdown(label="Main Ideas")
115
+
 
 
 
 
116
  with gr.Tab("πŸ“ Math & Theory"):
117
+ out_theo = gr.Markdown(label="Formal Framework")
118
+
 
119
  with gr.Tab("πŸ’» Algorithm"):
120
+ out_algo = gr.Markdown(label="Implementation Logic")
121
+
 
122
  with gr.Tab("πŸ“Š Findings"):
123
+ out_find = gr.Markdown(label="Results")
124
+
125
+ with gr.Tab("πŸ•΅οΈ Author Reputation"):
126
+ out_auth = gr.Markdown(label="Author Context")
127
+
128
+ with gr.Tab("βš–οΈ The Verdict"):
129
+ out_verd = gr.Markdown(label="AI Critical Opinion")
 
 
 
130
 
131
+ # Map function to UI components
132
+ outputs = [paper_display, out_sum, out_prob, out_idea, out_theo, out_algo, out_find, out_auth, out_verd]
133
+ run_btn.click(fn=analyze_paper, inputs=url_input, outputs=outputs)
134
 
135
+ if __name__ == "__main__":
136
+ demo.launch()