File size: 4,961 Bytes
706d0c5
 
 
718c553
 
afb65eb
718c553
706d0c5
 
afb65eb
706d0c5
718c553
 
afb65eb
706d0c5
 
afb65eb
 
 
 
 
 
 
718c553
afb65eb
706d0c5
5ddb4c7
706d0c5
 
afb65eb
706d0c5
afb65eb
706d0c5
718c553
afb65eb
 
 
 
 
 
 
 
706d0c5
afb65eb
706d0c5
 
afb65eb
706d0c5
afb65eb
718c553
afb65eb
c2582a5
 
afb65eb
c2582a5
8899c7c
718c553
 
 
 
 
afb65eb
8899c7c
afb65eb
 
 
 
 
 
718c553
afb65eb
718c553
afb65eb
 
 
c2582a5
 
afb65eb
 
 
 
 
718c553
c2582a5
afb65eb
718c553
afb65eb
706d0c5
 
afb65eb
706d0c5
afb65eb
8899c7c
7c38bfe
718c553
 
afb65eb
 
718c553
afb65eb
718c553
 
afb65eb
718c553
afb65eb
 
 
 
 
 
718c553
afb65eb
 
718c553
afb65eb
 
718c553
afb65eb
 
 
 
 
 
 
718c553
afb65eb
 
718c553
afb65eb
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import gradio as gr
import requests
import os
import arxiv
import io
import re
from pypdf import PdfReader

# -----------------------------
# 1. CONFIGURATION & LLM ENGINE
# -----------------------------
API_KEY = os.getenv("XAI_API_KEY")
API_URL = "https://api.x.ai/v1/chat/completions"
MODEL_NAME = "grok-4-1-fast-non-reasoning"

def call_grok(prompt):
    if not API_KEY:
        return "❌ ERROR: XAI_API_KEY not found in Hugging Face Secrets."
    
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": MODEL_NAME,
        "messages": [
            {"role": "system", "content": "You are a world-class research scientist. Use LaTeX for math and format the equations properly, the equations and math symbols should all be clear. Use exact headers: [SUMMARY], [PROBLEM], [IDEAS], [THEORY], [ALGO], [FINDINGS], [AUTHORS], [VERDICT]. EXPLAIN EACH SECTION in a detailed manner for the audiences to understand the paper deeply. Explain the math and problem statement in a simple manner"},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.1
    }
    
    try:
        response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
        data = response.json()
        
        if "error" in data:
            return f"❌ API ERROR: {data['error']['message']}"
        if "choices" not in data:
            return f"❌ Unexpected Response: {str(data)}"
            
        return data["choices"][0]["message"]["content"]
    except Exception as e:
        return f"❌ Connection Failed: {str(e)}"

# -----------------------------
# 2. CORE LOGIC
# -----------------------------
def analyze_paper(url):
    try:
        # Extract ID
        match = re.search(r"(\d{4}\.\d{4,5})", url)
        if not match:
            return ["❌ Invalid URL. Please use a standard arXiv link."] + [""] * 8
        paper_id = match.group(1)

        search = arxiv.Search(id_list=[paper_id])
        paper = next(search.results())
        
        resp = requests.get(paper.pdf_url)
        reader = PdfReader(io.BytesIO(resp.content))
        num_pages = len(reader.pages)

        text_pages = [p.extract_text() for p in reader.pages[:7]]
        if num_pages > 7:
            text_pages.append(reader.pages[-1].extract_text())
        content = " ".join(text_pages)

        prompt = f"Title: {paper.title}\nAuthors: {', '.join([a.name for a in paper.authors])}\n\nFull Text Snippet: {content[:18000]}"
        
        raw_analysis = call_grok(prompt)
        
        if "❌" in raw_analysis:
            return [paper.title, raw_analysis] + [""] * 7

        markers = ["SUMMARY", "PROBLEM", "IDEAS", "THEORY", "ALGO", "FINDINGS", "AUTHORS", "VERDICT"]
        results = {}
        for i, m in enumerate(markers):
            start = rf"\[{m}\]"
            end = rf"\[{markers[i+1]}\]" if i+1 < len(markers) else "$"
            match = re.search(f"{start}(.*?){end}", raw_analysis, re.DOTALL | re.IGNORECASE)
            results[m] = match.group(1).strip() if match else "Section could not be parsed."

        return [paper.title] + [results[m] for m in markers]

    except Exception as e:
        return [f"❌ Error: {str(e)}"] + [""] * 8

# -----------------------------
# 3. GRADIO UI
# -----------------------------
with gr.Blocks(theme=gr.themes.Soft(primary_hue="cyan"), title="arXivForMe Ultra") as demo:
    gr.Markdown("# πŸ”¬ arXivForMe")
    gr.Markdown("### A Research Deconstruction Application")
    
    with gr.Row():
        url_input = gr.Textbox(label="arXiv URL", placeholder="https://arxiv.org/abs/2401.12345", scale=4)
        run_btn = gr.Button("ANALYZE", variant="primary", scale=1)

    paper_display = gr.HTML("<center><h3>Enter a link to begin analysis</h3></center>")

    with gr.Tabs():
        with gr.Tab("πŸ“‹ Summary & Problem"):
            with gr.Row():
                out_sum = gr.Markdown(label="Summary")
                out_prob = gr.Markdown(label="Research Problem")
        
        with gr.Tab("πŸ’‘ Innovation"):
            out_idea = gr.Markdown(label="Main Ideas")
            
        with gr.Tab("πŸ“ Math & Theory"):
            out_theo = gr.Markdown(label="Formal Framework")
            
        with gr.Tab("πŸ’» Algorithm"):
            out_algo = gr.Markdown(label="Implementation Logic")
            
        with gr.Tab("πŸ“Š Findings"):
            out_find = gr.Markdown(label="Results")
            
        with gr.Tab("πŸ•΅οΈ Author Reputation"):
            out_auth = gr.Markdown(label="Author Context")
            
        with gr.Tab("βš–οΈ The Verdict"):
            out_verd = gr.Markdown(label="AI Critical Opinion")

    outputs = [paper_display, out_sum, out_prob, out_idea, out_theo, out_algo, out_find, out_auth, out_verd]
    run_btn.click(fn=analyze_paper, inputs=url_input, outputs=outputs)

if __name__ == "__main__":
    demo.launch()