Spaces:

cesjavi
/

aubm

Build error

App Files Files Community

cesjavi commited on about 8 hours ago

Commit

9873daa

1 Parent(s): a362a22

Fix: Permanent model override for decommissioned Groq models in AgentRunnerService (Phase 9)

Browse files

Files changed (1) hide show

backend/services/agent_runner_service.py +21 -10

backend/services/agent_runner_service.py CHANGED Viewed

@@ -63,11 +63,17 @@ class AgentRunnerService:
                 metadata={"project_id": project_id, "run_id": run_id, "status": "running"},
             )
             agent = AgentFactory.get_agent(
                 provider=agent_data["api_provider"],
                 name=agent_data["name"],
                 role=agent_data["role"],
-                model=agent_data["model"],
                 system_prompt=agent_data.get("system_prompt")
             )
@@ -214,18 +220,23 @@ class AgentRunnerService:
                 if pattern in raw_out:
                     logger.warning(f"SECURITY: Suspicious pattern '{pattern}' detected in agent output for task {task_id}.")
                     result["security_warning"] = f"Output sanitized: suspicious pattern '{pattern}' detected."
-                    # We don't block yet, but we flag it.
             quality_review = validate_output(quality_task, result)
             result["quality_review"] = quality_review
             claims_count = await evidence_service.replace_task_claims(task, result)
-            completion_tokens = budget_service.estimate_completion_tokens(result)
-            estimated_cost = budget_service.estimate_cost(
                 agent_data.get("api_provider"),
                 agent_data.get("model"),
-                prompt_tokens,
-                completion_tokens,
             )
             budget_service.record_usage(
                 project_id=project_id,
                 task_id=task_id,
@@ -233,10 +244,10 @@ class AgentRunnerService:
                 agent_id=agent_data.get("id"),
                 provider=agent_data.get("api_provider"),
                 model=agent_data.get("model"),
-                prompt_tokens=prompt_tokens,
-                completion_tokens=completion_tokens,
-                estimated_cost=estimated_cost,
-                metadata={"duration_seconds": round(duration, 2), "claims_count": claims_count},
             )
             # 6. Save to Cache

                 metadata={"project_id": project_id, "run_id": run_id, "status": "running"},
             )
+            # Emergency Model Override for decommissioned Groq models
+            model_to_use = agent_data["model"]
+            if "llama3-70b-8192" in model_to_use:
+                model_to_use = "llama-3.3-70b-versatile"
+                logger.warning(f"Overriding decommissioned model {agent_data['model']} with {model_to_use}")
             agent = AgentFactory.get_agent(
                 provider=agent_data["api_provider"],
                 name=agent_data["name"],
                 role=agent_data["role"],
+                model=model_to_use,
                 system_prompt=agent_data.get("system_prompt")
             )
                 if pattern in raw_out:
                     logger.warning(f"SECURITY: Suspicious pattern '{pattern}' detected in agent output for task {task_id}.")
                     result["security_warning"] = f"Output sanitized: suspicious pattern '{pattern}' detected."
             quality_review = validate_output(quality_task, result)
             result["quality_review"] = quality_review
             claims_count = await evidence_service.replace_task_claims(task, result)
+            # Use actual usage if provided by agent, otherwise fallback to estimation
+            usage = result.get("usage") or {}
+            actual_prompt_tokens = usage.get("prompt_tokens") or prompt_tokens
+            actual_completion_tokens = usage.get("completion_tokens") or budget_service.estimate_completion_tokens(result)
+            actual_cost = budget_service.estimate_cost(
                 agent_data.get("api_provider"),
                 agent_data.get("model"),
+                actual_prompt_tokens,
+                actual_completion_tokens,
             )
             budget_service.record_usage(
                 project_id=project_id,
                 task_id=task_id,
                 agent_id=agent_data.get("id"),
                 provider=agent_data.get("api_provider"),
                 model=agent_data.get("model"),
+                prompt_tokens=actual_prompt_tokens,
+                completion_tokens=actual_completion_tokens,
+                estimated_cost=actual_cost,
+                metadata={"duration_seconds": round(duration, 2), "claims_count": claims_count, "usage_source": "api" if result.get("usage") else "estimation"},
             )
             # 6. Save to Cache