Final_Assignment_Template

Sleeping

App Files Files Community

likki1715 commited on Mar 4

Commit

5a57fa7

verified ·

1 Parent(s): 4df03d1

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -120

app.py CHANGED Viewed

@@ -5,80 +5,61 @@ import requests
 import pandas as pd
 import re
-# --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Tool Implementations ---
 def download_and_read_task_file(task_id: str):
-    """Downloads file and immediately reads its content. Returns (filename, content_str)."""
     url = f"{DEFAULT_API_URL}/files/{task_id}"
     try:
         response = requests.get(url, timeout=15)
         if response.status_code != 200:
             return None, ""
         cd = response.headers.get('content-disposition', '')
         filename = f"file_{task_id[:8]}.tmp"
         match = re.search(r'filename="?([^"]+)"?', cd)
         if match:
             filename = match.group(1)
         with open(filename, 'wb') as f:
             f.write(response.content)
         print(f"  [File downloaded: {filename}]")
-        # Try to read content immediately based on file type
         ext = filename.lower().split('.')[-1]
         if ext in ['xlsx', 'xls']:
             try:
-                df = pd.read_excel(filename, sheet_name=None)
                 content = ""
-                for sheet, data in df.items():
                     content += f"Sheet: {sheet}\n{data.to_string()}\n\n"
-                return filename, content[:3000]
             except Exception as e:
                 return filename, f"Excel read error: {e}"
         elif ext == 'py':
             try:
                 with open(filename, 'r', encoding='utf-8') as f:
                     return filename, f.read()
             except Exception as e:
                 return filename, f"Python file read error: {e}"
         elif ext in ['txt', 'csv', 'json', 'md']:
             try:
                 with open(filename, 'r', encoding='utf-8') as f:
-                    return filename, f.read()[:3000]
             except Exception as e:
                 return filename, f"Text read error: {e}"
         elif ext in ['mp3', 'wav', 'ogg', 'm4a']:
-            # Audio - try whisper if available, else note it
             try:
                 import whisper
                 model = whisper.load_model("tiny")
                 result = model.transcribe(filename)
                 return filename, f"Audio transcript: {result['text']}"
             except Exception:
-                return filename, f"Audio file '{filename}' downloaded but cannot be transcribed (no whisper). File size: {len(response.content)} bytes."
-        elif ext in ['png', 'jpg', 'jpeg', 'gif', 'webp']:
-            return filename, f"Image file '{filename}' downloaded. Size: {len(response.content)} bytes. Cannot read image content directly."
         else:
-            # Try text first, fall back to binary
             try:
                 with open(filename, 'r', encoding='utf-8') as f:
-                    return filename, f.read()[:3000]
             except Exception:
-                return filename, f"Binary file '{filename}' downloaded. Size: {len(response.content)} bytes."
     except Exception as e:
-        print(f"  File download error for {task_id}: {e}")
         return None, ""
@@ -93,7 +74,7 @@ def web_search(query: str) -> str:
         for r in results:
             output.append(f"Title: {r.get('title','')}\nURL: {r.get('href','')}\nSnippet: {r.get('body','')[:300]}")
         return "\n---\n".join(output)
-    except ImportError:
         try:
             from duckduckgo_search import DDGS
             with DDGS() as ddgs:
@@ -106,8 +87,6 @@ def web_search(query: str) -> str:
             return "\n---\n".join(output)
         except Exception as e:
             return f"Search error: {e}"
-    except Exception as e:
-        return f"Search error: {e}"
 def web_fetch(url: str) -> str:
@@ -122,9 +101,9 @@ def web_fetch(url: str) -> str:
                 tag.decompose()
             text = soup.get_text(separator="\n", strip=True)
             text = re.sub(r'\n{3,}', '\n\n', text)
-            return text[:1500]
         except ImportError:
-            return response.text[:1500]
     except Exception as e:
         return f"Fetch error: {e}"
@@ -148,7 +127,7 @@ def wikipedia_search(query: str) -> str:
         pages = summary_data.get("query", {}).get("pages", {})
         for page_id, page in pages.items():
             extract = page.get("extract", "No content available.")
-            return f"Wikipedia: {title}\n\n{extract[:1500]}"
         return "No content found."
     except Exception as e:
         return f"Wikipedia error: {e}"
@@ -163,7 +142,7 @@ def run_python(code: str) -> str:
         exec_globals = {}
         exec(code, exec_globals)
         output = sys.stdout.getvalue()
-        return output[:1000] if output else "Code ran but printed nothing. Use print() to see data."
     except Exception as e:
         return f"Python error: {e}"
     finally:
@@ -180,70 +159,63 @@ class SmartAgent:
         print(f"SmartAgent initialized with Groq ({self.model})")
     def call_llm(self, prompt: str) -> str:
-        if len(prompt) > 8000:
-            prompt = prompt[:3000] + "\n\n[...context trimmed...]\n\n" + prompt[-3000:]
-        headers = {
-            "Authorization": f"Bearer {self.api_key}",
-            "Content-Type": "application/json"
-        }
         payload = {
             "model": self.model,
             "messages": [{"role": "user", "content": prompt}],
-            "temperature": 0.1,
             "max_tokens": 512
         }
-        wait_times = [20, 40, 80]
         for attempt, wait_time in enumerate(wait_times):
             try:
                 response = requests.post(self.api_url, headers=headers, json=payload, timeout=60)
                 response.raise_for_status()
-                data = response.json()
-                return data["choices"][0]["message"]["content"].strip()
             except requests.exceptions.HTTPError as e:
-                status = response.status_code
-                if status in [429, 503, 500]:
-                    print(f"Groq API Error ({status})! Waiting {wait_time}s... (Attempt {attempt+1}/3)")
                     time.sleep(wait_time)
                 else:
                     raise e
         raise Exception("Failed after 3 attempts.")
     def __call__(self, question: str, task_id: str) -> str:
         print(f"\nQuestion: {question[:100]}...")
-        # Download and immediately read any attached file
         filename, file_content = download_and_read_task_file(task_id)
         file_context = ""
         if filename and file_content:
-            file_context = f"\n\n[ATTACHED FILE: '{filename}']\n{file_content}\n[END OF FILE]"
-        elif filename:
-            file_context = f"\n\n[ATTACHED FILE: '{filename}' - could not read content]"
-        system = """You are an expert AI solving benchmark questions. Think step by step.
-TOOLS AVAILABLE:
-- SEARCH: <query>
-- WIKIPEDIA: <query>
-- FETCH: <url>
-- PYTHON: ```python ... ```  (always use print())
-OUTPUT FORMAT:
-THOUGHT: <reasoning>
 SEARCH: <query>
-Or when done:
-ANSWER: <exact answer>
-STRICT RULES:
-- If file content is provided above, use it directly - DO NOT re-read it
-- Answer must be exact: numbers only (no units unless asked), short phrases
-- No articles (a/the), no commas in numbers
-- Comma-separated list if multiple items needed
-- Do NOT say "Unable to determine" - always give your best guess"""
         history = []
         initial_prompt = f"{system}\n\nQuestion: {question}{file_context}"
@@ -254,26 +226,31 @@ STRICT RULES:
             if not history:
                 prompt = initial_prompt
             else:
-                recent = history[-3:]
                 exchanges = "\n\n".join([
-                    f"Step {i+1}:\nAction: {h['action']}\nResult: {h['result'][:400]}"
                     for i, h in enumerate(recent)
                 ])
-                prompt = f"{system}\n\nQuestion: {question}{file_context}\n\nPrevious steps:\n{exchanges}\n\nContinue:"
             response = self.call_llm(prompt)
-            print(f"  LLM [{iteration}]: {response[:200]}...")
             fetch_match = re.search(r'FETCH:\s*(https?://\S+)', response)
             search_match = re.search(r'SEARCH:\s*(.+?)(?:\n|$)', response)
             wiki_match = re.search(r'WIKIPEDIA:\s*(.+?)(?:\n|$)', response)
             python_match = re.search(r'PYTHON:\s*```(?:python)?\n?(.*?)```', response, re.DOTALL)
-            answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
             if answer_match:
                 answer = answer_match.group(1).strip()
                 print(f"  Final Answer: {answer}")
                 return answer
             elif fetch_match:
                 url = fetch_match.group(1).strip()
                 print(f"  Tool: FETCH({url[:80]})")
@@ -289,43 +266,33 @@ STRICT RULES:
                 print(f"  Tool: WIKIPEDIA({query})")
                 result = wikipedia_search(query)
                 history.append({"action": f"WIKIPEDIA: {query}", "result": result})
-            elif python_match:
-                code = python_match.group(1).strip()
-                print(f"  Tool: PYTHON({code[:60]}...)")
-                result = run_python(code)
-                history.append({"action": f"PYTHON: {code[:100]}", "result": result})
             else:
-                history.append({"action": "none", "result": "No valid tool. Use SEARCH, WIKIPEDIA, FETCH, PYTHON, or ANSWER."})
-        # Fallback: force a best-guess answer
-        recent = history[-3:]
-        exchanges = "\n\n".join([f"Action: {h['action']}\nResult: {h['result'][:300]}" for h in recent])
-        fallback_prompt = (
-            f"{system}\n\nQuestion: {question}{file_context}\n\n"
-            f"Steps taken:\n{exchanges}\n\n"
-            f"You MUST give a final answer now. Do not say 'unable to determine'. "
-            f"Use your best judgment. Output ONLY: ANSWER: <answer>"
         )
-        last = self.call_llm(fallback_prompt)
-        answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', last, re.IGNORECASE)
-        if answer_match:
-            return answer_match.group(1).strip()
         return last.strip().split('\n')[0][:200]
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
     if profile:
-        username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         return "Please Login to Hugging Face with the button.", None
-    api_url = DEFAULT_API_URL
-    questions_url = f"{api_url}/questions"
-    submit_url = f"{api_url}/submit"
     try:
         agent = SmartAgent()
     except Exception as e:
@@ -334,11 +301,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     try:
-        response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
-        if not questions_data:
-            return "Fetched questions list is empty.", None
         print(f"Fetched {len(questions_data)} questions.")
     except Exception as e:
         return f"Error fetching questions: {e}", None
@@ -351,7 +316,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         question_text = item.get("question")
         if not task_id or question_text is None:
             continue
         try:
             submitted_answer = agent(question_text, task_id)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
@@ -359,7 +323,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         except Exception as e:
             print(f"Error on task {task_id}: {e}")
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
         time.sleep(30)
     if not answers_payload:
@@ -367,7 +330,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=120)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
@@ -381,8 +344,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
-            error_json = e.response.json()
-            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
         except Exception:
             error_detail += f" Response: {e.response.text[:500]}"
         return f"Submission Failed: {error_detail}", pd.DataFrame(results_log)
@@ -392,16 +354,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
 with gr.Blocks() as demo:
     gr.Markdown("# 🤖 Smart Agent — GAIA Benchmark Runner")
-    gr.Markdown(
-        """
         **Powered by Groq (Llama 3.3 70B)**
-        **Instructions:**
-        1. Make sure `GROQ_API_KEY` is set in your Space secrets
-        2. `requirements.txt` must include: `gradio`, `requests`, `pandas`, `openpyxl`, `ddgs`, `beautifulsoup4`
-        3. Log in with your Hugging Face account below
-        4. Click **Run Evaluation & Submit All Answers**
-        """
-    )
     gr.LoginButton()
     run_button = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)

 import pandas as pd
 import re
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 def download_and_read_task_file(task_id: str):
     url = f"{DEFAULT_API_URL}/files/{task_id}"
     try:
         response = requests.get(url, timeout=15)
         if response.status_code != 200:
             return None, ""
         cd = response.headers.get('content-disposition', '')
         filename = f"file_{task_id[:8]}.tmp"
         match = re.search(r'filename="?([^"]+)"?', cd)
         if match:
             filename = match.group(1)
         with open(filename, 'wb') as f:
             f.write(response.content)
         print(f"  [File downloaded: {filename}]")
         ext = filename.lower().split('.')[-1]
         if ext in ['xlsx', 'xls']:
             try:
+                df_dict = pd.read_excel(filename, sheet_name=None)
                 content = ""
+                for sheet, data in df_dict.items():
                     content += f"Sheet: {sheet}\n{data.to_string()}\n\n"
+                return filename, content[:4000]
             except Exception as e:
                 return filename, f"Excel read error: {e}"
         elif ext == 'py':
             try:
                 with open(filename, 'r', encoding='utf-8') as f:
                     return filename, f.read()
             except Exception as e:
                 return filename, f"Python file read error: {e}"
         elif ext in ['txt', 'csv', 'json', 'md']:
             try:
                 with open(filename, 'r', encoding='utf-8') as f:
+                    return filename, f.read()[:4000]
             except Exception as e:
                 return filename, f"Text read error: {e}"
         elif ext in ['mp3', 'wav', 'ogg', 'm4a']:
             try:
                 import whisper
                 model = whisper.load_model("tiny")
                 result = model.transcribe(filename)
                 return filename, f"Audio transcript: {result['text']}"
             except Exception:
+                return filename, f"Audio file '{filename}' - cannot transcribe without whisper."
         else:
             try:
                 with open(filename, 'r', encoding='utf-8') as f:
+                    return filename, f.read()[:4000]
             except Exception:
+                return filename, f"Binary file '{filename}' - {len(response.content)} bytes."
     except Exception as e:
+        print(f"  File download error: {e}")
         return None, ""
         for r in results:
             output.append(f"Title: {r.get('title','')}\nURL: {r.get('href','')}\nSnippet: {r.get('body','')[:300]}")
         return "\n---\n".join(output)
+    except Exception:
         try:
             from duckduckgo_search import DDGS
             with DDGS() as ddgs:
             return "\n---\n".join(output)
         except Exception as e:
             return f"Search error: {e}"
 def web_fetch(url: str) -> str:
                 tag.decompose()
             text = soup.get_text(separator="\n", strip=True)
             text = re.sub(r'\n{3,}', '\n\n', text)
+            return text[:2000]
         except ImportError:
+            return response.text[:2000]
     except Exception as e:
         return f"Fetch error: {e}"
         pages = summary_data.get("query", {}).get("pages", {})
         for page_id, page in pages.items():
             extract = page.get("extract", "No content available.")
+            return f"Wikipedia: {title}\n\n{extract[:2000]}"
         return "No content found."
     except Exception as e:
         return f"Wikipedia error: {e}"
         exec_globals = {}
         exec(code, exec_globals)
         output = sys.stdout.getvalue()
+        return output[:1500] if output else "Code ran but printed nothing. Add print() statements."
     except Exception as e:
         return f"Python error: {e}"
     finally:
         print(f"SmartAgent initialized with Groq ({self.model})")
     def call_llm(self, prompt: str) -> str:
+        if len(prompt) > 7000:
+            prompt = prompt[:3000] + "\n\n[...trimmed...]\n\n" + prompt[-3000:]
+        headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
         payload = {
             "model": self.model,
             "messages": [{"role": "user", "content": prompt}],
+            "temperature": 0.0,
             "max_tokens": 512
         }
+        wait_times = [25, 50, 100]
         for attempt, wait_time in enumerate(wait_times):
             try:
                 response = requests.post(self.api_url, headers=headers, json=payload, timeout=60)
                 response.raise_for_status()
+                return response.json()["choices"][0]["message"]["content"].strip()
             except requests.exceptions.HTTPError as e:
+                if response.status_code in [429, 503, 500]:
+                    print(f"Groq Error ({response.status_code})! Waiting {wait_time}s...")
                     time.sleep(wait_time)
                 else:
                     raise e
         raise Exception("Failed after 3 attempts.")
     def __call__(self, question: str, task_id: str) -> str:
         print(f"\nQuestion: {question[:100]}...")
         filename, file_content = download_and_read_task_file(task_id)
         file_context = ""
         if filename and file_content:
+            file_context = f"\n\n[FILE '{filename}' CONTENT]:\n{file_content}\n[END FILE]"
+        system = """You are a precise AI assistant solving benchmark questions with EXACT answers required.
+TOOLS (use ONE per response):
 SEARCH: <query>
+WIKIPEDIA: <query>
+FETCH: <full_url>
+PYTHON:
+```python
+# code here - always use print()
+```
+When you have the answer:
+ANSWER: <value>
+CRITICAL RULES:
+1. NEVER guess - only answer when you have verified the information from a source
+2. For reversed/encoded text questions - use PYTHON to decode immediately
+3. For file questions - the file content is provided above, analyze it with PYTHON
+4. For math/counting - use PYTHON to compute
+5. Answer format must be EXACT:
+   - Numbers: digits only, no units unless explicitly asked
+   - Lists: comma separated, alphabetical if asked, exact spelling
+   - Names: exact as found in source
+6. If you see a URL in the question - FETCH it first
+7. Do NOT make up data - search for it"""
         history = []
         initial_prompt = f"{system}\n\nQuestion: {question}{file_context}"
             if not history:
                 prompt = initial_prompt
             else:
+                recent = history[-4:]
                 exchanges = "\n\n".join([
+                    f"Step {i+1}: {h['action']}\nResult: {h['result'][:500]}"
                     for i, h in enumerate(recent)
                 ])
+                prompt = f"{system}\n\nQuestion: {question}{file_context}\n\nSteps so far:\n{exchanges}\n\nNext step:"
             response = self.call_llm(prompt)
+            print(f"  LLM [{iteration}]: {response[:250]}...")
+            answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
             fetch_match = re.search(r'FETCH:\s*(https?://\S+)', response)
             search_match = re.search(r'SEARCH:\s*(.+?)(?:\n|$)', response)
             wiki_match = re.search(r'WIKIPEDIA:\s*(.+?)(?:\n|$)', response)
             python_match = re.search(r'PYTHON:\s*```(?:python)?\n?(.*?)```', response, re.DOTALL)
             if answer_match:
                 answer = answer_match.group(1).strip()
                 print(f"  Final Answer: {answer}")
                 return answer
+            elif python_match:
+                code = python_match.group(1).strip()
+                print(f"  Tool: PYTHON")
+                result = run_python(code)
+                history.append({"action": f"PYTHON: {code[:150]}", "result": result})
             elif fetch_match:
                 url = fetch_match.group(1).strip()
                 print(f"  Tool: FETCH({url[:80]})")
                 print(f"  Tool: WIKIPEDIA({query})")
                 result = wikipedia_search(query)
                 history.append({"action": f"WIKIPEDIA: {query}", "result": result})
             else:
+                history.append({"action": "none", "result": "Use SEARCH, WIKIPEDIA, FETCH, PYTHON, or ANSWER."})
+        # Forced fallback
+        recent = history[-4:]
+        exchanges = "\n\n".join([f"{h['action']}\n-> {h['result'][:400]}" for h in recent])
+        fallback = (
+            f"Question: {question}{file_context}\n\n"
+            f"Research done:\n{exchanges}\n\n"
+            f"Based on the research above, give the single best answer. "
+            f"Output ONLY: ANSWER: <answer>"
         )
+        last = self.call_llm(fallback)
+        m = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', last, re.IGNORECASE)
+        if m:
+            return m.group(1).strip()
         return last.strip().split('\n')[0][:200]
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
     if profile:
+        username = profile.username
         print(f"User logged in: {username}")
     else:
         return "Please Login to Hugging Face with the button.", None
     try:
         agent = SmartAgent()
     except Exception as e:
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     try:
+        response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
         response.raise_for_status()
         questions_data = response.json()
         print(f"Fetched {len(questions_data)} questions.")
     except Exception as e:
         return f"Error fetching questions: {e}", None
         question_text = item.get("question")
         if not task_id or question_text is None:
             continue
         try:
             submitted_answer = agent(question_text, task_id)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
         except Exception as e:
             print(f"Error on task {task_id}: {e}")
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
         time.sleep(30)
     if not answers_payload:
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     try:
+        response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=120)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
+            error_detail += f" Detail: {e.response.json().get('detail', e.response.text)}"
         except Exception:
             error_detail += f" Response: {e.response.text[:500]}"
         return f"Submission Failed: {error_detail}", pd.DataFrame(results_log)
 with gr.Blocks() as demo:
     gr.Markdown("# 🤖 Smart Agent — GAIA Benchmark Runner")
+    gr.Markdown("""
         **Powered by Groq (Llama 3.3 70B)**
+        1. Set `GROQ_API_KEY` in Space secrets
+        2. `requirements.txt`: `gradio requests pandas openpyxl ddgs beautifulsoup4`
+        3. Login and click Run
+    """)
     gr.LoginButton()
     run_button = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)