tazwarrrr commited on
Commit
56b60d4
·
1 Parent(s): 17497ec

fix render

Browse files
backend/requirements.txt CHANGED
@@ -4,6 +4,7 @@ websockets==12.0
4
  pydantic>=2.8.0,<3.0.0
5
  python-multipart==0.0.6
6
  groq==0.9.0
 
7
  openai==1.47.0
8
  crewai==0.55.2
9
  python-dotenv==1.0.0
 
4
  pydantic>=2.8.0,<3.0.0
5
  python-multipart==0.0.6
6
  groq==0.9.0
7
+ httpx==0.27.2
8
  openai==1.47.0
9
  crewai==0.55.2
10
  python-dotenv==1.0.0
backend/tools/llm_client.py CHANGED
@@ -1,45 +1,59 @@
 
 
 
1
  import os
2
  from dotenv import load_dotenv
3
 
 
 
4
  # Load environment variables
5
  load_dotenv()
6
 
7
- from typing import Optional, Dict, Any
8
- from groq import Groq
9
- from openai import OpenAI
10
 
11
  class LLMClient:
12
  """Unified LLM client supporting both Groq (local) and vLLM (AMD Cloud)"""
13
-
14
  def __init__(self):
15
  self.use_vllm = os.getenv("USE_VLLM", "false").lower() == "true"
16
-
 
 
 
17
  if self.use_vllm:
18
  # vLLM configuration for AMD Cloud
19
- self.vllm_base_url = os.getenv("VLLM_BASE_URL", "http://localhost:8000")
 
20
  self.vllm_api_key = os.getenv("VLLM_API_KEY", "dummy-key")
21
- self.client = OpenAI(
22
- base_url=self.vllm_base_url,
23
- api_key=self.vllm_api_key
24
- )
25
- self.model = os.getenv("VLLM_MODEL", "amd/llama-3.3-70b")
 
 
 
 
 
26
  else:
27
  # Groq configuration for local development
28
  self.groq_api_key = os.getenv("GROQ_API_KEY")
29
  if not self.groq_api_key:
30
  print("Warning: GROQ_API_KEY not found. Using mock mode.")
31
- self.client = None
32
- self.model = "mock"
33
  return
34
- self.client = Groq(api_key=self.groq_api_key)
35
- self.model = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
36
-
 
 
 
 
 
37
  def chat_completion(self, messages: list, temperature: float = 0.7, max_tokens: int = 4000) -> str:
38
  """Send chat completion request to the configured LLM"""
39
  if self.client is None:
40
  # Mock response when no API key is available
41
  return '{"kernels_found": ["mock_kernel"], "cuda_apis": ["cudaMalloc"], "warp_size_issue": true, "workload_type": "memory-bound", "sharding_detected": false, "difficulty": "Medium"}'
42
-
43
  try:
44
  if self.use_vllm:
45
  response = self.client.chat.completions.create(
@@ -57,10 +71,10 @@ class LLMClient:
57
  max_tokens=max_tokens
58
  )
59
  return response.choices[0].message.content
60
-
61
  except Exception as e:
62
- raise Exception(f"LLM request failed: {str(e)}")
63
-
64
  def get_model_info(self) -> Dict[str, Any]:
65
  """Get information about the current model configuration"""
66
  if self.use_vllm:
@@ -76,7 +90,7 @@ class LLMClient:
76
  'model': self.model,
77
  'platform': 'Local Development'
78
  }
79
-
80
  def test_connection(self) -> bool:
81
  """Test if the LLM connection is working"""
82
  try:
@@ -85,5 +99,5 @@ class LLMClient:
85
  ]
86
  response = self.chat_completion(test_messages, max_tokens=10)
87
  return "OK" in response.upper()
88
- except:
89
  return False
 
1
+ from openai import OpenAI
2
+ from groq import Groq
3
+ from typing import Optional, Dict, Any
4
  import os
5
  from dotenv import load_dotenv
6
 
7
+ # pylint: disable=broad-exception-caught
8
+
9
  # Load environment variables
10
  load_dotenv()
11
 
 
 
 
12
 
13
  class LLMClient:
14
  """Unified LLM client supporting both Groq (local) and vLLM (AMD Cloud)"""
15
+
16
  def __init__(self):
17
  self.use_vllm = os.getenv("USE_VLLM", "false").lower() == "true"
18
+ self.client = None
19
+ self.model = "mock"
20
+ self.init_error: Optional[str] = None
21
+
22
  if self.use_vllm:
23
  # vLLM configuration for AMD Cloud
24
+ self.vllm_base_url = os.getenv(
25
+ "VLLM_BASE_URL", "http://localhost:8000")
26
  self.vllm_api_key = os.getenv("VLLM_API_KEY", "dummy-key")
27
+ try:
28
+ self.client = OpenAI(
29
+ base_url=self.vllm_base_url,
30
+ api_key=self.vllm_api_key
31
+ )
32
+ self.model = os.getenv("VLLM_MODEL", "amd/llama-3.3-70b")
33
+ except Exception as e:
34
+ self.init_error = f"vLLM client init failed: {str(e)}"
35
+ print(
36
+ f"Warning: {self.init_error}. Falling back to mock mode.")
37
  else:
38
  # Groq configuration for local development
39
  self.groq_api_key = os.getenv("GROQ_API_KEY")
40
  if not self.groq_api_key:
41
  print("Warning: GROQ_API_KEY not found. Using mock mode.")
 
 
42
  return
43
+ try:
44
+ self.client = Groq(api_key=self.groq_api_key)
45
+ self.model = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
46
+ except Exception as e:
47
+ self.init_error = f"Groq client init failed: {str(e)}"
48
+ print(
49
+ f"Warning: {self.init_error}. Falling back to mock mode.")
50
+
51
  def chat_completion(self, messages: list, temperature: float = 0.7, max_tokens: int = 4000) -> str:
52
  """Send chat completion request to the configured LLM"""
53
  if self.client is None:
54
  # Mock response when no API key is available
55
  return '{"kernels_found": ["mock_kernel"], "cuda_apis": ["cudaMalloc"], "warp_size_issue": true, "workload_type": "memory-bound", "sharding_detected": false, "difficulty": "Medium"}'
56
+
57
  try:
58
  if self.use_vllm:
59
  response = self.client.chat.completions.create(
 
71
  max_tokens=max_tokens
72
  )
73
  return response.choices[0].message.content
74
+
75
  except Exception as e:
76
+ raise RuntimeError(f"LLM request failed: {str(e)}") from e
77
+
78
  def get_model_info(self) -> Dict[str, Any]:
79
  """Get information about the current model configuration"""
80
  if self.use_vllm:
 
90
  'model': self.model,
91
  'platform': 'Local Development'
92
  }
93
+
94
  def test_connection(self) -> bool:
95
  """Test if the LLM connection is working"""
96
  try:
 
99
  ]
100
  response = self.chat_completion(test_messages, max_tokens=10)
101
  return "OK" in response.upper()
102
+ except Exception:
103
  return False