import gradio as gr import requests import os import arxiv import io import re from pypdf import PdfReader # ----------------------------- # 1. CONFIGURATION & LLM ENGINE # ----------------------------- API_KEY = os.getenv("XAI_API_KEY") API_URL = "https://api.x.ai/v1/chat/completions" MODEL_NAME = "grok-4-1-fast-non-reasoning" def call_grok(prompt): if not API_KEY: return "❌ ERROR: XAI_API_KEY not found in Hugging Face Secrets." headers = { "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json" } payload = { "model": MODEL_NAME, "messages": [ {"role": "system", "content": "You are a world-class research scientist. Use LaTeX for math and format the equations properly, the equations and math symbols should all be clear. Use exact headers: [SUMMARY], [PROBLEM], [IDEAS], [THEORY], [ALGO], [FINDINGS], [AUTHORS], [VERDICT]. EXPLAIN EACH SECTION in a detailed manner for the audiences to understand the paper deeply. Explain the math and problem statement in a simple manner"}, {"role": "user", "content": prompt} ], "temperature": 0.1 } try: response = requests.post(API_URL, headers=headers, json=payload, timeout=60) data = response.json() if "error" in data: return f"❌ API ERROR: {data['error']['message']}" if "choices" not in data: return f"❌ Unexpected Response: {str(data)}" return data["choices"][0]["message"]["content"] except Exception as e: return f"❌ Connection Failed: {str(e)}" # ----------------------------- # 2. CORE LOGIC # ----------------------------- def analyze_paper(url): try: # Extract ID match = re.search(r"(\d{4}\.\d{4,5})", url) if not match: return ["❌ Invalid URL. Please use a standard arXiv link."] + [""] * 8 paper_id = match.group(1) search = arxiv.Search(id_list=[paper_id]) paper = next(search.results()) resp = requests.get(paper.pdf_url) reader = PdfReader(io.BytesIO(resp.content)) num_pages = len(reader.pages) text_pages = [p.extract_text() for p in reader.pages[:7]] if num_pages > 7: text_pages.append(reader.pages[-1].extract_text()) content = " ".join(text_pages) prompt = f"Title: {paper.title}\nAuthors: {', '.join([a.name for a in paper.authors])}\n\nFull Text Snippet: {content[:18000]}" raw_analysis = call_grok(prompt) if "❌" in raw_analysis: return [paper.title, raw_analysis] + [""] * 7 markers = ["SUMMARY", "PROBLEM", "IDEAS", "THEORY", "ALGO", "FINDINGS", "AUTHORS", "VERDICT"] results = {} for i, m in enumerate(markers): start = rf"\[{m}\]" end = rf"\[{markers[i+1]}\]" if i+1 < len(markers) else "$" match = re.search(f"{start}(.*?){end}", raw_analysis, re.DOTALL | re.IGNORECASE) results[m] = match.group(1).strip() if match else "Section could not be parsed." return [paper.title] + [results[m] for m in markers] except Exception as e: return [f"❌ Error: {str(e)}"] + [""] * 8 # ----------------------------- # 3. GRADIO UI # ----------------------------- with gr.Blocks(theme=gr.themes.Soft(primary_hue="cyan"), title="arXivForMe Ultra") as demo: gr.Markdown("# 🔬 arXivForMe") gr.Markdown("### A Research Deconstruction Application") with gr.Row(): url_input = gr.Textbox(label="arXiv URL", placeholder="https://arxiv.org/abs/2401.12345", scale=4) run_btn = gr.Button("ANALYZE", variant="primary", scale=1) paper_display = gr.HTML("