Álvaro Valenzuela Valdes commited on
Commit
46928cd
·
1 Parent(s): 080b51f

feat: implement Llama 3.2 Vision support in chatbot for image analysis

Browse files
backend/app/services/llm.py CHANGED
@@ -110,6 +110,7 @@ async def call_gemini_with_model(prompt: str, model_name: str | None = None, is_
110
  "Mixtral-8x7B (Groq)": "groq:mixtral-8x7b-32768",
111
  "Gemma-2-9B (Featherless)": "google/gemma-2-9b-it",
112
  "Llama-3.1-8B (Featherless)": "meta-llama/Meta-Llama-3.1-8B-Instruct",
 
113
  }
114
 
115
  model_id = model_map.get(model_name, "gemini")
@@ -127,7 +128,15 @@ async def call_gemini_with_model(prompt: str, model_name: str | None = None, is_
127
  return await call_groq(prompt, "llama-3.3-70b-versatile")
128
  return res
129
  elif model_id.startswith("groq:"):
130
- res = await call_groq(prompt, model=model_id[5:])
 
 
 
 
 
 
 
 
131
  if not res and settings.gemini_api_key:
132
  print("DEBUG: Groq failed or returned empty. Trying Gemini fallback.")
133
  return await call_gemini(prompt, is_json=is_json)
@@ -139,6 +148,50 @@ async def call_gemini_with_model(prompt: str, model_name: str | None = None, is_
139
  return await call_groq(prompt, "llama-3.3-70b-versatile")
140
  return res
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  def _parse_gemini_response(output: str) -> dict | None:
143
  if not output:
144
  return None
 
110
  "Mixtral-8x7B (Groq)": "groq:mixtral-8x7b-32768",
111
  "Gemma-2-9B (Featherless)": "google/gemma-2-9b-it",
112
  "Llama-3.1-8B (Featherless)": "meta-llama/Meta-Llama-3.1-8B-Instruct",
113
+ "Llama-3.2-11B-Vision (Groq)": "groq:llama-3.2-11b-vision-preview",
114
  }
115
 
116
  model_id = model_map.get(model_name, "gemini")
 
128
  return await call_groq(prompt, "llama-3.3-70b-versatile")
129
  return res
130
  elif model_id.startswith("groq:"):
131
+ # Check if it's a vision call (hacky way for now, but effective)
132
+ if "IMAGE_DATA:" in prompt:
133
+ parts = prompt.split("IMAGE_DATA:")
134
+ text_prompt = parts[0].strip()
135
+ image_b64 = parts[1].strip()
136
+ res = await call_groq_vision(text_prompt, image_b64, model=model_id[5:])
137
+ else:
138
+ res = await call_groq(prompt, model=model_id[5:])
139
+
140
  if not res and settings.gemini_api_key:
141
  print("DEBUG: Groq failed or returned empty. Trying Gemini fallback.")
142
  return await call_gemini(prompt, is_json=is_json)
 
148
  return await call_groq(prompt, "llama-3.3-70b-versatile")
149
  return res
150
 
151
+ async def call_groq_vision(prompt: str, image_b64: str, model: str = "llama-3.2-11b-vision-preview") -> str:
152
+ if not settings.groq_api_key:
153
+ return ""
154
+
155
+ try:
156
+ async with httpx.AsyncClient(timeout=60.0) as client:
157
+ # Ensure proper data URL format
158
+ if not image_b64.startswith("data:image"):
159
+ image_b64 = f"data:image/jpeg;base64,{image_b64}"
160
+
161
+ payload = {
162
+ "model": model,
163
+ "messages": [
164
+ {
165
+ "role": "user",
166
+ "content": [
167
+ {"type": "text", "text": prompt},
168
+ {
169
+ "type": "image_url",
170
+ "image_url": {"url": image_b64}
171
+ }
172
+ ]
173
+ }
174
+ ],
175
+ "temperature": 0.2
176
+ }
177
+
178
+ response = await client.post(
179
+ "https://api.groq.com/openai/v1/chat/completions",
180
+ headers={
181
+ "Authorization": f"Bearer {settings.groq_api_key}",
182
+ "Content-Type": "application/json"
183
+ },
184
+ json=payload
185
+ )
186
+ if response.status_code != 200:
187
+ print(f"Groq Vision Error ({model}): {response.status_code} - {response.text}")
188
+ return ""
189
+ data = response.json()
190
+ return data["choices"][0]["message"]["content"]
191
+ except Exception as e:
192
+ print(f"Error calling Groq Vision ({model}): {e}")
193
+ return ""
194
+
195
  def _parse_gemini_response(output: str) -> dict | None:
196
  if not output:
197
  return None
frontend/components/AgentChat.tsx CHANGED
@@ -20,10 +20,10 @@ const agents = [
20
  { id: "tech", name: "Ing. Tech", avatar: "👨‍💻", color: "text-cyan" },
21
  { id: "risk", name: "Sra. Estrategia", avatar: "🕵️‍♀️", color: "text-purple-400" },
22
  ];
23
-
24
  const models = [
25
  "Llama-3.3-70B (Groq)",
26
  "Llama-3.1-8B (Groq)",
 
27
  "Gemini 2.5 Flash",
28
  "Qwen-2.5 (Featherless)",
29
  ];
@@ -38,6 +38,7 @@ export default function AgentChat({ tender, companyProfile }: Props) {
38
  const [isUploading, setIsUploading] = useState(false);
39
  const [isListening, setIsListening] = useState(false);
40
  const [contextText, setContextText] = useState("");
 
41
  const scrollRef = useRef<HTMLDivElement>(null);
42
  const fileInputRef = useRef<HTMLInputElement>(null);
43
 
@@ -107,19 +108,40 @@ export default function AgentChat({ tender, companyProfile }: Props) {
107
  const messageToSend = overrideInput || input;
108
  if (!messageToSend.trim() || isLoading) return;
109
 
110
- const userMsg: Message = { role: "user", content: messageToSend };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  setMessages(prev => [...prev, userMsg]);
112
  if (!overrideInput) setInput("");
 
113
  setIsLoading(true);
114
 
115
  try {
 
 
 
 
116
  const response = await fetch(`${getAPIBase()}/api/chat`, {
117
  method: "POST",
118
  headers: { "Content-Type": "application/json" },
119
  body: JSON.stringify({
120
  tender,
121
  company_profile: companyProfile,
122
- message: contextText ? `[DOC CONTEXT: ${contextText.slice(0, 3000)}]\n\nUSER QUESTION: ${messageToSend}` : messageToSend,
123
  agent: selectedAgent.id,
124
  model: selectedModel,
125
  history: messages.map(({role, content, agent}) => ({role, content, agent_name: agent})),
@@ -132,7 +154,7 @@ export default function AgentChat({ tender, companyProfile }: Props) {
132
  simulateTyping(data.response, selectedAgent.name);
133
  } catch (error) {
134
  console.error(error);
135
- setMessages(prev => [...prev, { role: "assistant", content: "⚠️ Error connecting to the agent. Please try again." }]);
136
  } finally {
137
  setIsLoading(false);
138
  }
@@ -141,6 +163,13 @@ export default function AgentChat({ tender, companyProfile }: Props) {
141
  const handleFileUpload = async (e: React.ChangeEvent<HTMLInputElement>) => {
142
  if (e.target.files && e.target.files[0]) {
143
  const file = e.target.files[0];
 
 
 
 
 
 
 
144
  setIsUploading(true);
145
  try {
146
  const result = await uploadDocument(file);
@@ -167,7 +196,7 @@ export default function AgentChat({ tender, companyProfile }: Props) {
167
  ref={fileInputRef}
168
  onChange={handleFileUpload}
169
  className="hidden"
170
- accept=".pdf,.docx,.doc,.txt"
171
  />
172
 
173
  {/* Chat Header */}
 
20
  { id: "tech", name: "Ing. Tech", avatar: "👨‍💻", color: "text-cyan" },
21
  { id: "risk", name: "Sra. Estrategia", avatar: "🕵️‍♀️", color: "text-purple-400" },
22
  ];
 
23
  const models = [
24
  "Llama-3.3-70B (Groq)",
25
  "Llama-3.1-8B (Groq)",
26
+ "Llama-3.2-11B-Vision (Groq)",
27
  "Gemini 2.5 Flash",
28
  "Qwen-2.5 (Featherless)",
29
  ];
 
38
  const [isUploading, setIsUploading] = useState(false);
39
  const [isListening, setIsListening] = useState(false);
40
  const [contextText, setContextText] = useState("");
41
+ const [attachedFile, setAttachedFile] = useState<File | null>(null);
42
  const scrollRef = useRef<HTMLDivElement>(null);
43
  const fileInputRef = useRef<HTMLInputElement>(null);
44
 
 
108
  const messageToSend = overrideInput || input;
109
  if (!messageToSend.trim() || isLoading) return;
110
 
111
+ let imageBase64 = "";
112
+ if (attachedFile && attachedFile.type.startsWith("image/")) {
113
+ setIsUploading(true);
114
+ try {
115
+ imageBase64 = await new Promise((resolve, reject) => {
116
+ const reader = new FileReader();
117
+ reader.onload = () => resolve(reader.result as string);
118
+ reader.onerror = reject;
119
+ reader.readAsDataURL(attachedFile);
120
+ });
121
+ } catch (err) {
122
+ console.error("Error converting image:", err);
123
+ }
124
+ setIsUploading(false);
125
+ }
126
+
127
+ const userMsg: Message = { role: "user", content: messageToSend, agent: "User" };
128
  setMessages(prev => [...prev, userMsg]);
129
  if (!overrideInput) setInput("");
130
+ setAttachedFile(null);
131
  setIsLoading(true);
132
 
133
  try {
134
+ const finalMessage = imageBase64
135
+ ? `${messageToSend}\n\nIMAGE_DATA:${imageBase64}`
136
+ : contextText ? `[DOC CONTEXT: ${contextText.slice(0, 3000)}]\n\nUSER QUESTION: ${messageToSend}` : messageToSend;
137
+
138
  const response = await fetch(`${getAPIBase()}/api/chat`, {
139
  method: "POST",
140
  headers: { "Content-Type": "application/json" },
141
  body: JSON.stringify({
142
  tender,
143
  company_profile: companyProfile,
144
+ message: finalMessage,
145
  agent: selectedAgent.id,
146
  model: selectedModel,
147
  history: messages.map(({role, content, agent}) => ({role, content, agent_name: agent})),
 
154
  simulateTyping(data.response, selectedAgent.name);
155
  } catch (error) {
156
  console.error(error);
157
+ setMessages(prev => [...prev, { role: "assistant", content: "⚠️ Error connecting to the agent. Please try again.", agent: selectedAgent.name }]);
158
  } finally {
159
  setIsLoading(false);
160
  }
 
163
  const handleFileUpload = async (e: React.ChangeEvent<HTMLInputElement>) => {
164
  if (e.target.files && e.target.files[0]) {
165
  const file = e.target.files[0];
166
+
167
+ if (file.type.startsWith("image/")) {
168
+ setAttachedFile(file);
169
+ setMessages(prev => [...prev, { role: "user", content: `🖼️ Attached image: ${file.name}` }]);
170
+ return;
171
+ }
172
+
173
  setIsUploading(true);
174
  try {
175
  const result = await uploadDocument(file);
 
196
  ref={fileInputRef}
197
  onChange={handleFileUpload}
198
  className="hidden"
199
+ accept=".pdf,.docx,.doc,.txt,image/*"
200
  />
201
 
202
  {/* Chat Header */}