Spaces:

lablab-ai-amd-developer-hackathon
/

AndesOps-AI

Running

App Files Files Community

Álvaro Valenzuela Valdes commited on about 3 hours ago

Commit

46928cd

1 Parent(s): 080b51f

feat: implement Llama 3.2 Vision support in chatbot for image analysis

Browse files

Files changed (2) hide show

backend/app/services/llm.py +54 -1
frontend/components/AgentChat.tsx +34 -5

backend/app/services/llm.py CHANGED Viewed

@@ -110,6 +110,7 @@ async def call_gemini_with_model(prompt: str, model_name: str | None = None, is_
         "Mixtral-8x7B (Groq)": "groq:mixtral-8x7b-32768",
         "Gemma-2-9B (Featherless)": "google/gemma-2-9b-it",
         "Llama-3.1-8B (Featherless)": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     }
     model_id = model_map.get(model_name, "gemini")
@@ -127,7 +128,15 @@ async def call_gemini_with_model(prompt: str, model_name: str | None = None, is_
             return await call_groq(prompt, "llama-3.3-70b-versatile")
         return res
     elif model_id.startswith("groq:"):
-        res = await call_groq(prompt, model=model_id[5:])
         if not res and settings.gemini_api_key:
             print("DEBUG: Groq failed or returned empty. Trying Gemini fallback.")
             return await call_gemini(prompt, is_json=is_json)
@@ -139,6 +148,50 @@ async def call_gemini_with_model(prompt: str, model_name: str | None = None, is_
             return await call_groq(prompt, "llama-3.3-70b-versatile")
         return res
 def _parse_gemini_response(output: str) -> dict | None:
     if not output:
         return None

         "Mixtral-8x7B (Groq)": "groq:mixtral-8x7b-32768",
         "Gemma-2-9B (Featherless)": "google/gemma-2-9b-it",
         "Llama-3.1-8B (Featherless)": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+        "Llama-3.2-11B-Vision (Groq)": "groq:llama-3.2-11b-vision-preview",
     }
     model_id = model_map.get(model_name, "gemini")
             return await call_groq(prompt, "llama-3.3-70b-versatile")
         return res
     elif model_id.startswith("groq:"):
+        # Check if it's a vision call (hacky way for now, but effective)
+        if "IMAGE_DATA:" in prompt:
+            parts = prompt.split("IMAGE_DATA:")
+            text_prompt = parts[0].strip()
+            image_b64 = parts[1].strip()
+            res = await call_groq_vision(text_prompt, image_b64, model=model_id[5:])
+        else:
+            res = await call_groq(prompt, model=model_id[5:])
         if not res and settings.gemini_api_key:
             print("DEBUG: Groq failed or returned empty. Trying Gemini fallback.")
             return await call_gemini(prompt, is_json=is_json)
             return await call_groq(prompt, "llama-3.3-70b-versatile")
         return res
+async def call_groq_vision(prompt: str, image_b64: str, model: str = "llama-3.2-11b-vision-preview") -> str:
+    if not settings.groq_api_key:
+        return ""
+    try:
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            # Ensure proper data URL format
+            if not image_b64.startswith("data:image"):
+                image_b64 = f"data:image/jpeg;base64,{image_b64}"
+            payload = {
+                "model": model,
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": prompt},
+                            {
+                                "type": "image_url",
+                                "image_url": {"url": image_b64}
+                            }
+                        ]
+                    }
+                ],
+                "temperature": 0.2
+            }
+            response = await client.post(
+                "https://api.groq.com/openai/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {settings.groq_api_key}",
+                    "Content-Type": "application/json"
+                },
+                json=payload
+            )
+            if response.status_code != 200:
+                print(f"Groq Vision Error ({model}): {response.status_code} - {response.text}")
+                return ""
+            data = response.json()
+            return data["choices"][0]["message"]["content"]
+    except Exception as e:
+        print(f"Error calling Groq Vision ({model}): {e}")
+        return ""
 def _parse_gemini_response(output: str) -> dict | None:
     if not output:
         return None

frontend/components/AgentChat.tsx CHANGED Viewed

@@ -20,10 +20,10 @@ const agents = [
   { id: "tech", name: "Ing. Tech", avatar: "👨‍💻", color: "text-cyan" },
   { id: "risk", name: "Sra. Estrategia", avatar: "🕵️‍♀️", color: "text-purple-400" },
 ];
 const models = [
   "Llama-3.3-70B (Groq)",
   "Llama-3.1-8B (Groq)",
   "Gemini 2.5 Flash",
   "Qwen-2.5 (Featherless)",
 ];
@@ -38,6 +38,7 @@ export default function AgentChat({ tender, companyProfile }: Props) {
   const [isUploading, setIsUploading] = useState(false);
   const [isListening, setIsListening] = useState(false);
   const [contextText, setContextText] = useState("");
   const scrollRef = useRef<HTMLDivElement>(null);
   const fileInputRef = useRef<HTMLInputElement>(null);
@@ -107,19 +108,40 @@ export default function AgentChat({ tender, companyProfile }: Props) {
     const messageToSend = overrideInput || input;
     if (!messageToSend.trim() || isLoading) return;
-    const userMsg: Message = { role: "user", content: messageToSend };
     setMessages(prev => [...prev, userMsg]);
     if (!overrideInput) setInput("");
     setIsLoading(true);
     try {
       const response = await fetch(`${getAPIBase()}/api/chat`, {
         method: "POST",
         headers: { "Content-Type": "application/json" },
         body: JSON.stringify({
           tender,
           company_profile: companyProfile,
-          message: contextText ? `[DOC CONTEXT: ${contextText.slice(0, 3000)}]\n\nUSER QUESTION: ${messageToSend}` : messageToSend,
           agent: selectedAgent.id,
           model: selectedModel,
           history: messages.map(({role, content, agent}) => ({role, content, agent_name: agent})),
@@ -132,7 +154,7 @@ export default function AgentChat({ tender, companyProfile }: Props) {
       simulateTyping(data.response, selectedAgent.name);
     } catch (error) {
       console.error(error);
-      setMessages(prev => [...prev, { role: "assistant", content: "⚠️ Error connecting to the agent. Please try again." }]);
     } finally {
       setIsLoading(false);
     }
@@ -141,6 +163,13 @@ export default function AgentChat({ tender, companyProfile }: Props) {
   const handleFileUpload = async (e: React.ChangeEvent<HTMLInputElement>) => {
     if (e.target.files && e.target.files[0]) {
       const file = e.target.files[0];
       setIsUploading(true);
       try {
         const result = await uploadDocument(file);
@@ -167,7 +196,7 @@ export default function AgentChat({ tender, companyProfile }: Props) {
         ref={fileInputRef}
         onChange={handleFileUpload}
         className="hidden"
-        accept=".pdf,.docx,.doc,.txt"
       />
       {/* Chat Header */}

   { id: "tech", name: "Ing. Tech", avatar: "👨‍💻", color: "text-cyan" },
   { id: "risk", name: "Sra. Estrategia", avatar: "🕵️‍♀️", color: "text-purple-400" },
 ];
 const models = [
   "Llama-3.3-70B (Groq)",
   "Llama-3.1-8B (Groq)",
+  "Llama-3.2-11B-Vision (Groq)",
   "Gemini 2.5 Flash",
   "Qwen-2.5 (Featherless)",
 ];
   const [isUploading, setIsUploading] = useState(false);
   const [isListening, setIsListening] = useState(false);
   const [contextText, setContextText] = useState("");
+  const [attachedFile, setAttachedFile] = useState<File | null>(null);
   const scrollRef = useRef<HTMLDivElement>(null);
   const fileInputRef = useRef<HTMLInputElement>(null);
     const messageToSend = overrideInput || input;
     if (!messageToSend.trim() || isLoading) return;
+    let imageBase64 = "";
+    if (attachedFile && attachedFile.type.startsWith("image/")) {
+      setIsUploading(true);
+      try {
+        imageBase64 = await new Promise((resolve, reject) => {
+          const reader = new FileReader();
+          reader.onload = () => resolve(reader.result as string);
+          reader.onerror = reject;
+          reader.readAsDataURL(attachedFile);
+        });
+      } catch (err) {
+        console.error("Error converting image:", err);
+      }
+      setIsUploading(false);
+    }
+    const userMsg: Message = { role: "user", content: messageToSend, agent: "User" };
     setMessages(prev => [...prev, userMsg]);
     if (!overrideInput) setInput("");
+    setAttachedFile(null);
     setIsLoading(true);
     try {
+      const finalMessage = imageBase64
+        ? `${messageToSend}\n\nIMAGE_DATA:${imageBase64}`
+        : contextText ? `[DOC CONTEXT: ${contextText.slice(0, 3000)}]\n\nUSER QUESTION: ${messageToSend}` : messageToSend;
       const response = await fetch(`${getAPIBase()}/api/chat`, {
         method: "POST",
         headers: { "Content-Type": "application/json" },
         body: JSON.stringify({
           tender,
           company_profile: companyProfile,
+          message: finalMessage,
           agent: selectedAgent.id,
           model: selectedModel,
           history: messages.map(({role, content, agent}) => ({role, content, agent_name: agent})),
       simulateTyping(data.response, selectedAgent.name);
     } catch (error) {
       console.error(error);
+      setMessages(prev => [...prev, { role: "assistant", content: "⚠️ Error connecting to the agent. Please try again.", agent: selectedAgent.name }]);
     } finally {
       setIsLoading(false);
     }
   const handleFileUpload = async (e: React.ChangeEvent<HTMLInputElement>) => {
     if (e.target.files && e.target.files[0]) {
       const file = e.target.files[0];
+      if (file.type.startsWith("image/")) {
+        setAttachedFile(file);
+        setMessages(prev => [...prev, { role: "user", content: `🖼️ Attached image: ${file.name}` }]);
+        return;
+      }
       setIsUploading(true);
       try {
         const result = await uploadDocument(file);
         ref={fileInputRef}
         onChange={handleFileUpload}
         className="hidden"
+        accept=".pdf,.docx,.doc,.txt,image/*"
       />
       {/* Chat Header */}