Final_Assignment_try1

Running

App Files Files Community

Nitien commited on 8 days ago

Commit

c9cad9e

verified ·

1 Parent(s): 81917a3

Upload 3 files

Browse files

Files changed (3) hide show

config.py +52 -0
customtools.py +261 -0
gaia_agent.py +386 -0

config.py ADDED Viewed

	@@ -0,0 +1,52 @@

+"""
+Configuration and constants for the GAIA agent.
+Centralized configuration for easy management and customization.
+"""
+import os
+from dotenv import load_dotenv
+load_dotenv()
+# ==================== API KEYS ====================
+OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
+TAVILY_API_KEY = os.getenv("TAVILY_API_KEY", "")
+# ==================== LLM CONFIGURATION ====================
+LLM_MODEL = "inclusionai/Ling-2.6-1T:free"
+LLM_TEMPERATURE = 0
+LLM_MAX_ITERATIONS = 5
+# ==================== TOOL CONFIGURATION ====================
+WIKIPEDIA_MAX_PAGES = 2
+WIKIPEDIA_CHAR_LIMIT = 8_000
+YOUTUBE_CHAR_LIMIT = 10_000
+WEB_SEARCH_RESULTS_LIMIT = 3
+EXCEL_PREVIEW_ROWS = 50
+# ==================== OUTPUT CONFIGURATION ====================
+OUTPUT_FILE = "/home/nitin/AI/hfagent/results.jsonl"
+FINAL_ANSWER_MAX_LENGTH = 100
+REASONING_TRACE_MAX_LENGTH = 200
+# ==================== TOOL NAMES ====================
+TOOL_NAMES = {
+    "WEB_SEARCH": "web_search",
+    "WIKI_SEARCH": "wikisearch",
+    "YOUTUBE_TRANSCRIPT": "youtube_transcript",
+    "EXCEL_ANALYSIS": "load_and_analyze_excel_file",
+    "IMAGE_TEXT": "extract_text_from_image",
+    "AUDIO_TRANSCRIBE": "transcribe_audio",
+    "ADD": "addition_tool",
+    "SUBTRACT": "subtraction_tool",
+    "MULTIPLY": "multiplication_tool",
+    "NONE": "none",
+}
+# ==================== VALIDATION ====================
+VALID_EXCEL_EXTENSIONS = (".xlsx", ".xls", ".csv")
+VALID_IMAGE_EXTENSIONS = (".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".gif")
+VALID_AUDIO_EXTENSIONS = (".mp3", ".wav", ".m4a", ".flac", ".ogg")

customtools.py ADDED Viewed

	@@ -0,0 +1,261 @@

+"""
+Custom tools for the GAIA agent.
+Includes tools for web search, file analysis, text extraction, and more.
+"""
+import os
+import re
+import subprocess
+from tempfile import NamedTemporaryFile
+from pathlib import Path
+import cv2
+import pandas as pds
+import pytesseract
+import whisper
+from dotenv import load_dotenv
+from langchain_core.messages import HumanMessage
+from langchain_core.tools import tool
+from langchain_community.document_loaders import WikipediaLoader
+from langchain_openrouter import ChatOpenRouter
+from tavily import TavilyClient
+from youtube_transcript_api import YouTubeTranscriptApi
+from config import (
+    OPENROUTER_API_KEY,
+    TAVILY_API_KEY,
+    LLM_MODEL,
+    LLM_TEMPERATURE,
+    WIKIPEDIA_MAX_PAGES,
+    WIKIPEDIA_CHAR_LIMIT,
+    YOUTUBE_CHAR_LIMIT,
+    WEB_SEARCH_RESULTS_LIMIT,
+    EXCEL_PREVIEW_ROWS,
+)
+from prompts import (
+    EXCEL_ANALYSIS_PROMPT_TEMPLATE,
+    WEB_SEARCH_EXTRACTION_PROMPT_TEMPLATE,
+)
+load_dotenv()
+@tool
+def wikisearch(query: str, max_pages: int = None) -> str:
+    """Search Wikipedia pages and return concatenated page texts."""
+    max_pages = max_pages or WIKIPEDIA_MAX_PAGES
+    print(f"wikisearch called with query: {query}, max_pages: {max_pages}")
+    try:
+        docs = WikipediaLoader(query=query, load_max_docs=max_pages).load()
+        joined = "\n\n---\n\n".join(d.page_content for d in docs)
+        return joined[:WIKIPEDIA_CHAR_LIMIT]
+    except Exception as e:
+        return f"Error searching Wikipedia: {str(e)}"
+@tool
+def youtube_transcript(url: str, chars: int = None) -> str:
+    """Fetch YouTube video transcript."""
+    chars = chars or YOUTUBE_CHAR_LIMIT
+    video_id_match = re.search(r"[?&]v=([A-Za-z0-9_\-]{11})", url)
+    if not video_id_match:
+        return "Error: Could not extract video ID from URL"
+    try:
+        transcript = YouTubeTranscriptApi.get_transcript(video_id_match.group(1))
+        text = " ".join(piece["text"] for piece in transcript)
+        return text[:chars]
+    except Exception as exc:
+        print(f"Error fetching YouTube transcript: {exc}")
+        return f"Error fetching transcript: {str(exc)}"
+@tool
+def web_search(query: str) -> str:
+    """Perform a web search and extract concise factual answers."""
+    print(f"web_search called with query: {query}")
+    if not TAVILY_API_KEY:
+        return "Error: TAVILY_API_KEY not set in environment"
+    try:
+        tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
+        search_results = tavily_client.search(query)
+        print(f"Search results obtained")
+        # Format results as a readable string
+        if search_results and isinstance(search_results, dict) and "results" in search_results:
+            formatted = "\n".join([
+                f"- {r.get('title', '')}: {r.get('content', '')[:200]}"
+                for r in search_results["results"][:WEB_SEARCH_RESULTS_LIMIT]
+            ])
+            return formatted if formatted else "No results found"
+        return str(search_results)
+    except Exception as e:
+        print(f"Error during web search: {e}")
+        return f"Error during web search: {str(e)}"
+@tool
+def addition_tool(a: str, b: str) -> str:
+    """Add two numbers represented as strings."""
+    try:
+        num_a = float(a)
+        num_b = float(b)
+        result = num_a + num_b
+        return str(result)
+    except ValueError:
+        return "Invalid input: both a and b must be numbers."
+    except Exception as e:
+        return f"Error during addition: {str(e)}"
+@tool
+def subtraction_tool(a: str, b: str) -> str:
+    """Subtract two numbers represented as strings."""
+    try:
+        num_a = float(a)
+        num_b = float(b)
+        result = num_a - num_b
+        return str(result)
+    except ValueError:
+        return "Invalid input: both a and b must be numbers."
+    except Exception as e:
+        return f"Error during subtraction: {str(e)}"
+@tool
+def multiplication_tool(a: str, b: str) -> str:
+    """Multiply two numbers represented as strings."""
+    try:
+        num_a = float(a)
+        num_b = float(b)
+        result = num_a * num_b
+        return str(result)
+    except ValueError:
+        return "Invalid input: both a and b must be numbers."
+    except Exception as e:
+        return f"Error during multiplication: {str(e)}"
+@tool
+def division_tool(a: str, b: str) -> str:
+    """Divide two numbers represented as strings."""
+    try:
+        num_a = float(a)
+        num_b = float(b)
+        if num_b == 0:
+            return "Error: Division by zero is not allowed."
+        result = num_a / num_b
+        return str(result)
+    except ValueError:
+        return "Invalid input: both a and b must be numbers."
+    except Exception as e:
+        return f"Error during division: {str(e)}"
+@tool
+def extract_text_from_image(image_path: str) -> str:
+    """
+    Extract text from image files using OCR.
+    Works with .jpg, .png, .bmp, .tiff formats only.
+    Args:
+        image_path: Full path to the image file
+    """
+    try:
+        img = cv2.imread(image_path)
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
+        thresh = cv2.bitwise_not(thresh)
+        custom_config = r'--oem 3 --psm 6'
+        full_text = pytesseract.image_to_string(thresh, config=custom_config)
+        return f"Extracted text from image:\n\n{full_text}"
+    except Exception as e:
+        return f"Error extracting text from image: {str(e)}"
+@tool
+def run_python(code: str) -> str:
+    """Execute Python code in a subprocess and return output."""
+    try:
+        with NamedTemporaryFile(delete=False, suffix=".py", mode="w") as f:
+            f.write(code)
+            path = f.name
+        proc = subprocess.run(
+            ["python", path], capture_output=True, text=True, timeout=45
+        )
+        out = proc.stdout.strip().splitlines()
+        return out[-1] if out else ""
+    except Exception as exc:
+        print(f"Error executing Python code: {exc}")
+        return f"py_error:{exc}"
+@tool
+def load_and_analyze_excel_file(query: str, file_path: str) -> str:
+    """
+    Load and analyze data from Excel/CSV files (.xlsx, .xls, .csv).
+    Args:
+        query: Data analysis question (e.g., "Count records where status=active")
+        file_path: Full path to the Excel/CSV file
+    """
+    print(f"load_and_analyze_excel_file called - Query: {query}, File: {file_path}")
+    try:
+        # Read the file based on extension
+        if file_path.lower().endswith(".csv"):
+            df = pds.read_csv(file_path)
+        else:
+            df = pds.read_excel(file_path)
+        # Create basic data summary
+        result = f"File loaded successfully.\n"
+        result += f"Rows: {len(df)}, Columns: {len(df.columns)}\n"
+        result += f"Column names: {', '.join(df.columns.tolist())}\n\n"
+        # Prepare data context for LLM
+        data_summary = f"DataFrame:\n{df.to_string(max_rows=EXCEL_PREVIEW_ROWS)}\n\nData Types:\n{df.dtypes.to_string()}"
+        # Create analysis prompt
+        analysis_prompt = EXCEL_ANALYSIS_PROMPT_TEMPLATE.format(
+            data_summary=data_summary,
+            query=query
+        )
+        # Get LLM analysis
+        tool_llm = ChatOpenRouter(
+            model=LLM_MODEL,
+            temperature=LLM_TEMPERATURE,
+            api_key=OPENROUTER_API_KEY,
+        )
+        message = HumanMessage(content=analysis_prompt)
+        llm_response = tool_llm.invoke([message])
+        result += f"Analysis:\n{llm_response.content}"
+        print(f"Excel analysis completed")
+        return result
+    except Exception as e:
+        return f"Error analyzing Excel file: {str(e)}"
+@tool
+def transcribe_audio(audio_file: str) -> str:
+    """Transcribe audio files and return the transcript."""
+    try:
+        model = whisper.load_model("base")
+        output = model.transcribe(audio=str(Path(audio_file)), language='en')
+        print(f"Audio transcription completed")
+        return output['text']
+    except Exception as exc:
+        print(f"Error transcribing audio: {exc}")
+        return f"transcription_error:{exc}"

gaia_agent.py ADDED Viewed

	@@ -0,0 +1,386 @@

+"""
+GAIA Agent - Multi-step reasoning agent for complex tasks.
+Uses LanggraphStateGraph for workflow orchestration and multiple specialized tools.
+"""
+import os
+import json
+from typing import List, Dict, Any, Optional, Literal
+from pathlib import Path
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field
+from langchain_core.messages import HumanMessage
+from langchain_openrouter import ChatOpenRouter
+from langgraph.graph import StateGraph, START, END
+from langgraph.checkpoint.memory import MemorySaver
+from typing import TypedDict
+from customtools import (
+    load_and_analyze_excel_file,
+    extract_text_from_image,
+    web_search,
+    wikisearch,
+    youtube_transcript,
+    addition_tool,
+    subtraction_tool,
+    multiplication_tool,
+    transcribe_audio,
+)
+from config import (
+    OPENROUTER_API_KEY,
+    LLM_MODEL,
+    LLM_TEMPERATURE,
+    OUTPUT_FILE,
+    FINAL_ANSWER_MAX_LENGTH,
+    REASONING_TRACE_MAX_LENGTH,
+)
+from prompts import (
+    PLANNER_PROMPT_TEMPLATE,
+    FINALIZER_PROMPT_TEMPLATE,
+)
+load_dotenv()
+memory = MemorySaver()
+def connect_models():
+    """Initialize and return the LLM instance."""
+    try:
+        print(f"Connecting to LLM: {LLM_MODEL}")
+        llm = ChatOpenRouter(
+            model=LLM_MODEL,
+            temperature=LLM_TEMPERATURE,
+            api_key=OPENROUTER_API_KEY,
+        )
+        return llm
+    except Exception as e:
+        print(f"Error initializing LLM: {e}")
+        raise
+# Tool registry
+TOOLS = {
+    "web_search": web_search,
+    "addition_tool": addition_tool,
+    "subtraction_tool": subtraction_tool,
+    "multiplication_tool": multiplication_tool,
+    "youtube_transcript": youtube_transcript,
+    "load_and_analyze_excel_file": load_and_analyze_excel_file,
+    "extract_text_from_image": extract_text_from_image,
+    "wikisearch": wikisearch,
+    "transcribe_audio": transcribe_audio,
+}
+class AgentState(TypedDict):
+    """State structure for the agent workflow."""
+    question: str
+    plan: List[Dict[str, Any]]
+    current_step: int
+    selected_tool: Optional[str]
+    tool_input: Optional[str]
+    tool_output: Optional[str]
+    intermediate_results: List[Dict[str, Any]]
+    final_answer: Optional[str]
+    done: bool
+class Step(BaseModel):
+    """Represents a single step in the plan."""
+    step_number: int
+    description: str
+    tool: Literal[
+        "web_search",
+        "wikisearch",
+        "youtube_transcript",
+        "load_and_analyze_excel_file",
+        "extract_text_from_image",
+        "transcribe_audio",
+        "addition_tool",
+        "subtraction_tool",
+        "multiplication_tool",
+        "none",
+    ]
+    tool_input: str
+class Plan(BaseModel):
+    """Structured plan with multiple steps."""
+    steps: List[Step]
+def planner_node(state: AgentState):
+    """Planner node: breaks down question into steps."""
+    prompt = PLANNER_PROMPT_TEMPLATE.format(question=state['question'])
+    planner_llm = llm.with_structured_output(Plan, method="json_schema")
+    response = planner_llm.invoke(prompt)
+    print(f"Planner generated {len(response.steps)} steps")
+    return {
+        **state,
+        "plan": [step.dict() for step in response.steps],
+        "current_step": 0,
+        "intermediate_results": [],
+        "done": False,
+    }
+def execute_step_node(state: AgentState):
+    """Execute step node: prepares tool invocation."""
+    step = state["plan"][state["current_step"]]
+    tool_name = step.get("tool", "none")
+    print(f"Executing step {state['current_step'] + 1}/{len(state['plan'])}: {tool_name}")
+    return {
+        **state,
+        "tool_input": step.get("tool_input"),
+        "selected_tool": tool_name,
+    }
+def tool_node(state: AgentState):
+    """Tool execution node: invokes the selected tool."""
+    tool_name = state.get("selected_tool")
+    tool_input = state.get("tool_input")
+    if tool_name == "none":
+        return {**state, "tool_output": tool_input}
+    print(f"Invoking tool: {tool_name}")
+    tool = TOOLS.get(tool_name)
+    # Special handling for load_and_analyze_excel_file: parse query|file_path format
+    if tool_name == "load_and_analyze_excel_file" and isinstance(tool_input, str) and "|" in tool_input:
+        parts = tool_input.split("|", 1)
+        query = parts[0].strip()
+        file_path = parts[1].strip()
+        tool_input = {"query": query, "file_path": file_path}
+        print(f"Parsed Excel input - Query: '{query[:50]}...', File: '{file_path}'")
+    # Special handling for math tools: parse "a,b" format
+    if tool in (addition_tool, subtraction_tool, multiplication_tool):
+        try:
+            a, b = tool_input.split(",")
+            tool_input = {"a": a.strip(), "b": b.strip()}
+        except Exception as e:
+            print(f"Error parsing math tool input: {e}")
+            return {**state, "tool_output": f"Error parsing input: {e}"}
+    if not tool:
+        return {**state, "tool_output": f"Unknown tool: {tool_name}"}
+    try:
+        result = tool.invoke(tool_input)
+    except Exception as e:
+        print(f"Error invoking tool {tool_name}: {e}")
+        result = f"Tool error: {str(e)}"
+    return {**state, "tool_output": result}
+def update_state_node(state: AgentState):
+    """Update state node: records tool output and progresses to next step."""
+    step = state["plan"][state["current_step"]]
+    state["intermediate_results"].append({
+        "step": step,
+        "output": state["tool_output"]
+    })
+    next_step = state["current_step"] + 1
+    done = next_step >= len(state["plan"])
+    return {
+        **state,
+        "current_step": next_step,
+        "done": done,
+    }
+def should_continue(state: AgentState):
+    """Conditional edge: determines if workflow should continue or finalize."""
+    return "finalize" if state["done"] else "continue"
+def finalizer_node(state: AgentState):
+    """Finalizer node: summarizes results and generates final answer."""
+    # Format intermediate results for the finalizer
+    results_text = "\n".join([
+        f"Step {i+1}: {r['step'].get('description', '')}\n  Output: {str(r['output'])[:100]}..."
+        for i, r in enumerate(state["intermediate_results"])
+    ])
+    prompt = FINALIZER_PROMPT_TEMPLATE.format(
+        question=state['question'],
+        intermediate_results=results_text
+    )
+    response = llm.invoke(prompt)
+    return {
+        **state,
+        "final_answer": response.content,
+    }
+def create_agent_workflow():
+    graph = StateGraph(AgentState)
+    # Nodes
+    graph.add_node("planner", planner_node)
+    graph.add_node("executor", execute_step_node)
+    graph.add_node("tool", tool_node)
+    graph.add_node("updater", update_state_node)
+    graph.add_node("finalizer", finalizer_node)
+    # Entry
+    graph.set_entry_point("planner")
+    # Flow
+    graph.add_edge("planner", "executor")
+    graph.add_edge("executor", "tool")
+    graph.add_edge("tool", "updater")
+    # Loop
+    graph.add_conditional_edges(
+        "updater",
+        should_continue,
+        {
+            "continue": "executor",
+            "finalize": "finalizer"
+        }
+    )
+    # End
+    graph.add_edge("finalizer", END)
+    return graph.compile()
+def format_reasoning_trace(intermediate_results: List[Dict[str, Any]]) -> str:
+    """Format intermediate results into a readable reasoning trace"""
+    trace_lines = []
+    for result in intermediate_results:
+        step = result.get("step", {})
+        output = result.get("output", "")
+        description = step.get("description", "Unknown step")
+        tool = step.get("tool", "none")
+        trace_lines.append(f"Step: {description}")
+        trace_lines.append(f" Tool: {tool}")
+        trace_lines.append(f" Output: {output[:200]}{'...' if len(str(output)) > 200 else ''}")
+    return "\n".join(trace_lines)
+def process_questions(questions_file: str = None, questions_list: List[str] = None) -> str:
+    """
+    Process multiple questions and save results to a file
+    Args:
+        questions_file: Path to a file containing questions (one per line)
+        questions_list: List of questions to process
+    Returns:
+        Path to the output file with results
+    """
+    global llm
+    llm = connect_models()
+    print(f"LLM available: {llm}")
+    agent = create_agent_workflow()
+    # Get questions from either file or list
+    if questions_file:
+        with open(questions_file, 'r') as f:
+            questions = [q.strip() for q in f.readlines() if q.strip()]
+    elif questions_list:
+        questions = questions_list
+    else:
+        raise ValueError("Either questions_file or questions_list must be provided")
+    results = []
+    for idx, question in enumerate(questions, 1):
+        task_id = f"task_id_{idx}"
+        print(f"\n{'='*80}")
+        print(f"Processing {task_id}: {question[:80]}...")
+        print(f"{'='*80}")
+        try:
+            # Run the agent
+            result = agent.invoke({
+                "question": question
+            })
+            # Extract the final answer and reasoning trace
+            final_answer = result.get("final_answer", "No answer generated")
+            intermediate_results = result.get("intermediate_results", [])
+            # Format the reasoning trace
+            reasoning_trace = format_reasoning_trace(intermediate_results)
+            # Create the result object
+            task_result = {
+                "task_id": task_id,
+                "model_answer": final_answer,
+                "reasoning_trace": reasoning_trace
+            }
+            results.append(task_result)
+            print(f"Completed {task_id}")
+            print(f"Answer: {final_answer[:100]}...")
+        except Exception as e:
+            print(f"✗ Error processing {task_id}: {str(e)}")
+            task_result = {
+                "task_id": task_id,
+                "model_answer": f"Error: {str(e)}",
+                "reasoning_trace": "Failed to execute agent"
+            }
+            results.append(task_result)
+    # Save results to file
+    output_file = "/home/nitin/AI/hfagent/results.jsonl"
+    with open(output_file, 'w') as f:
+        for result in results:
+            f.write(json.dumps(result) + '\n')
+    print(f"\n{'='*80}")
+    print(f"All tasks completed. Results saved to: {output_file}")
+    print(f"{'='*80}")
+    return output_file
+if __name__ == "__main__":
+    global llm
+    # Example questions to process
+    questions = [
+        #"What is the square of the population of France in millions?",
+        #"What is 50 plus 75?"
+    ]
+    # Process all questions
+    output_file = process_questions(questions_list=questions)
+    # Print the results
+    print("\nResults from file:")
+    with open(output_file, 'r') as f:
+        for line in f:
+            result = json.loads(line)
+            print(f"\nTask ID: {result['task_id']}")
+            print(f"Answer: {result['model_answer']}")
+            print(f"Reasoning:\n{result['reasoning_trace']}")