Final_Assignment_Template

Sleeping

App Files Files Community

peachchange commited on Feb 27

Commit

42a7415

verified ·

1 Parent(s): 72a4ffb

Upload 5 files

Browse files

Files changed (5) hide show

__init__.py +0 -0
app_langgraph.py +101 -0
math_tools.py +52 -0
multimodal_tools.py +177 -0
search_tools.py +53 -0

__init__.py ADDED Viewed

File without changes

app_langgraph.py ADDED Viewed

	@@ -0,0 +1,101 @@

+"""LangGraph Agent"""
+import os
+from dotenv import load_dotenv
+from langgraph.graph import START, StateGraph, MessagesState
+from langgraph.prebuilt import tools_condition
+from langgraph.prebuilt import ToolNode
+from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace, HuggingFaceEmbeddings
+from langchain_core.messages import SystemMessage, HumanMessage
+from langchain_core.globals import set_debug
+from langchain_groq import ChatGroq
+from tools.search_tools import web_search, arvix_search, wiki_search
+from tools.math_tools import multiply, add, subtract, divide
+# from supabase.client import Client, create_client
+# from langchain.tools.retriever import create_retriever_tool
+# from langchain_community.vectorstores import SupabaseVectorStore
+import json
+from tools.multimodal_tools import extract_text, analyze_image_tool, analyze_audio_tool
+from langchain_google_genai import ChatGoogleGenerativeAI
+# set_debug(True)
+load_dotenv()
+tools = [
+    multiply,
+    add,
+    subtract,
+    divide,
+    web_search,
+    wiki_search,
+    arvix_search,
+    extract_text,
+    analyze_image_tool,
+    analyze_audio_tool
+]
+def build_graph():
+    hf_token = os.getenv("HF_TOKEN")
+    api_key = os.getenv("GEMINI_API_KEY")
+    # llm = HuggingFaceEndpoint(
+    #     repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
+    #     huggingfacehub_api_token=hf_token,
+    # )
+    # chat = ChatHuggingFace(llm=llm, verbose=True)
+    # llm_with_tools = chat.bind_tools(tools)
+    # llm = ChatGroq(model="qwen-qwq-32b", temperature=0)
+    # llm_with_tools = llm.bind_tools(tools)
+    chat = ChatGoogleGenerativeAI(
+        model= "gemini-2.5-pro-preview-05-06",
+        temperature=0,
+        max_retries=2,
+        google_api_key=api_key,
+        thinking_budget= 0
+    )
+    chat_with_tools = chat.bind_tools(tools)
+    def assistant(state: MessagesState):
+        sys_msg = "You are a helpful assistant with access to tools. Understand user requests accurately. Use your tools when needed to answer effectively. Strictly follow all user instructions and constraints." \
+        "Pay attention: your output needs to contain only the final answer without any reasoning since it will be strictly evaluated against a dataset which contains only the specific response." \
+        "Your final output needs to be just the string or integer containing the answer, not an array or technical stuff."
+        return {
+            "messages": [chat_with_tools.invoke([sys_msg] + state["messages"])],
+        }
+    ## The graph
+    builder = StateGraph(MessagesState)
+    builder.add_node("assistant", assistant)
+    builder.add_node("tools", ToolNode(tools))
+    builder.add_edge(START, "assistant")
+    builder.add_conditional_edges(
+        "assistant",
+        # If the latest message requires a tool, route to tools
+        # Otherwise, provide a direct response
+        tools_condition,
+    )
+    builder.add_edge("tools", "assistant")
+    return builder.compile()
+# test
+if __name__ == "__main__":
+    graph = build_graph()
+    with open('sample.jsonl', 'r') as jsonl_file:
+        json_list = list(jsonl_file)
+    start = 10 #revisit 5, 8,
+    end = start + 1
+    for json_str in json_list[start:end]:
+        json_data = json.loads(json_str)
+        print(f"Question::::::::: {json_data['Question']}")
+        print(f"Final answer::::: {json_data['Final answer']}")
+        question = json_data['Question']
+        messages = [HumanMessage(content=question)]
+        messages = graph.invoke({"messages": messages})
+        for m in messages["messages"]:
+            m.pretty_print()

math_tools.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from langchain_core.tools import tool
+@tool
+def multiply(a: int, b: int) -> int:
+    """Multiply two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a * b
+@tool
+def add(a: int, b: int) -> int:
+    """Add two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a + b
+@tool
+def subtract(a: int, b: int) -> int:
+    """Subtract two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a - b
+@tool
+def divide(a: int, b: int) -> int:
+    """Divide two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    if b == 0:
+        raise ValueError("Cannot divide by zero.")
+    return a / b
+@tool
+def modulus(a: int, b: int) -> int:
+    """Get the modulus of two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a % b

multimodal_tools.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import base64
+import os
+from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain.tools import Tool
+from langchain_core.tools import tool
+api_key = os.getenv("GEMINI_API_KEY")
+# Create LLM class
+vision_llm = ChatGoogleGenerativeAI(
+    model= "gemini-2.5-flash-preview-05-20",
+    temperature=0,
+    max_retries=2,
+    google_api_key=api_key
+)
+@tool("extract_text_tool", parse_docstring=True)
+def extract_text(img_path: str) -> str:
+    """Extract text from an image file using a multimodal model.
+    Args:
+        img_path (str): The path to the image file from which to extract text.
+    Returns:
+        str: The extracted text from the image, or an empty string if an error occurs.
+    """
+    all_text = ""
+    try:
+        # Read image and encode as base64
+        with open(img_path, "rb") as image_file:
+            image_bytes = image_file.read()
+        image_base64 = base64.b64encode(image_bytes).decode("utf-8")
+        # Prepare the prompt including the base64 image data
+        message = [
+            HumanMessage(
+                content=[
+                    {
+                        "type": "text",
+                        "text": (
+                            "Extract all the text from this image. "
+                            "Return only the extracted text, no explanations."
+                        ),
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/png;base64,{image_base64}"
+                        },
+                    },
+                ]
+            )
+        ]
+        # Call the vision-capable model
+        response = vision_llm.invoke(message)
+        # Append extracted text
+        all_text += response.content + "\n\n"
+        return all_text.strip()
+    except Exception as e:
+        # A butler should handle errors gracefully
+        error_msg = f"Error extracting text: {str(e)}"
+        print(error_msg)
+        return ""
+@tool("analyze_image_tool", parse_docstring=True)
+def analyze_image_tool(user_query: str, img_path: str) -> str:
+    """Answer the question reasoning on the image.
+    Args:
+        user_query (str): The question to be answered based on the image.
+        img_path (str): Path to the image file to be analyzed.
+    Returns:
+        str: The answer to the query based on image content, or an empty string if an error occurs.
+    """
+    all_text = ""
+    try:
+        # Read image and encode as base64
+        with open(img_path, "rb") as image_file:
+            image_bytes = image_file.read()
+        image_base64 = base64.b64encode(image_bytes).decode("utf-8")
+        # Prepare the prompt including the base64 image data
+        message = [
+            HumanMessage(
+                content=[
+                    {
+                        "type": "text",
+                        "text": (
+                            f"User query: {user_query}"
+                        ),
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/png;base64,{image_base64}"
+                        },
+                    },
+                ]
+            )
+        ]
+        # Call the vision-capable model
+        response = vision_llm.invoke(message)
+        # Append extracted text
+        all_text += response.content + "\n\n"
+        return all_text.strip()
+    except Exception as e:
+        # A butler should handle errors gracefully
+        error_msg = f"Error analyzing image: {str(e)}"
+        print(error_msg)
+        return ""
+@tool("analyze_audio_tool", parse_docstring=True)
+def analyze_audio_tool(user_query: str, audio_path: str) -> str:
+    """Answer the question by reasoning on the provided audio file.
+    Args:
+        user_query (str): The question to be answered based on the audio content.
+        audio_path (str): Path to the audio file (e.g., .mp3, .wav, .flac, .aac, .ogg).
+    Returns:
+        str: The answer to the query based on audio content, or an error message/empty string if an error occurs.
+    """
+    try:
+        # Determine MIME type from file extension
+        _filename, file_extension = os.path.splitext(audio_path)
+        file_extension = file_extension.lower()
+        supported_formats = {
+            ".mp3": "audio/mp3", ".wav": "audio/wav", ".flac": "audio/flac",
+            ".aac": "audio/aac", ".ogg": "audio/ogg"
+        }
+        if file_extension not in supported_formats:
+            return (f"Error: Unsupported audio file format '{file_extension}'. "
+                    f"Supported extensions: {', '.join(supported_formats.keys())}.")
+        mime_type = supported_formats[file_extension]
+        # Read audio file and encode as base64
+        with open(audio_path, "rb") as audio_file:
+            audio_bytes = audio_file.read()
+        audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
+        # Prepare the prompt including the base64 audio data
+        message = [
+        HumanMessage(
+            content=[
+                {
+                    "type": "text",
+                    "text": f"User query: {user_query}",
+                },
+                {
+                    "type": "audio",
+                    "source_type": "base64",
+                    "mime_type": mime_type,
+                    "data": audio_base64
+                },
+            ]
+        )
+        ]
+        # Call the vision-capable model
+        response = vision_llm.invoke(message)
+        return response.content.strip()
+    except Exception as e:
+        error_msg = f"Error analyzing audio: {str(e)}"
+        print(error_msg)
+        return ""

search_tools.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from langchain_core.tools import tool
+from langchain_community.document_loaders import WikipediaLoader
+from langchain_community.document_loaders import ArxivLoader
+# Search engine specifically for LLMs
+# from langchain_community.tools.tavily_search import TavilySearchResults
+from langchain_tavily import TavilySearch
+@tool
+def web_search(query: str) -> str:
+    """Search Tavily for a query and return maximum 3 results.
+    Args:
+        query: The search query."""
+    # print(f"Web search query:::::::::::: {query}")
+    search_docs = TavilySearch(max_results=3).invoke({"query":query})
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc["url"]}" page="{doc["title"]}"/>\n{doc["content"]}\n</Document>'
+            for doc in search_docs['results']
+        ])
+    # print(f"Web search result:::::::::::: {formatted_search_docs}")
+    return {"web_results": formatted_search_docs}
+@tool
+def wiki_search(query: str) -> str:
+    """Search Wikipedia for a query and return maximum 2 results.
+    Args:
+        query: The search query."""
+    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+            for doc in search_docs
+        ])
+    return {"wiki_results": formatted_search_docs}
+@tool
+def arvix_search(query: str) -> str:
+    """Search Arxiv for a query and return maximum 3 result.
+    Args:
+        query: The search query."""
+    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
+            for doc in search_docs
+        ])
+    return {"arvix_results": formatted_search_docs}