Elysiadev11 commited on
Commit
e2eb614
·
verified ·
1 Parent(s): 8c44b85

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +22 -17
app.py CHANGED
@@ -28,27 +28,27 @@ CEREBRAS_BASE_URL = os.getenv("CEREBRAS_BASE_URL", "https://api.cerebras.ai/v1")
28
  MAX_REQUEST_TOKENS = int(os.getenv("MAX_REQUEST_TOKENS", "30000"))
29
 
30
  # Default model for Cerebras
31
- DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "qwen-3-235b-a22b-instruct-2507")
32
 
33
  # Model mapping: incoming model name -> Cerebras model name
34
  DEFAULT_MODEL_MAPPING = {
35
  # Claude models -> Cerebras
36
- "claude-opus-4-7": "qwen-3-235b-a22b-instruct-2507",
37
- "claude-opus-4-6": "qwen-3-235b-a22b-instruct-2507",
38
- "claude-opus-4-5": "qwen-3-235b-a22b-instruct-2507",
39
- "claude-opus-4-1": "qwen-3-235b-a22b-instruct-2507",
40
- "claude-opus-4-20250514": "qwen-3-235b-a22b-instruct-2507",
41
- "claude-sonnet-4-6": "qwen-3-235b-a22b-instruct-2507",
42
- "claude-sonnet-4-5": "qwen-3-235b-a22b-instruct-2507",
43
- "claude-sonnet-4-20250514": "qwen-3-235b-a22b-instruct-2507",
44
- "claude-haiku-4-5": "qwen-3-235b-a22b-instruct-2507",
45
- "claude-haiku-4-5-20251001": "qwen-3-235b-a22b-instruct-2507",
46
  # GPT models -> Cerebras
47
- "gpt-4": "qwen-3-235b-a22b-instruct-2507",
48
- "gpt-4o": "qwen-3-235b-a22b-instruct-2507",
49
- "gpt-4o-mini": "qwen-3-235b-a22b-instruct-2507",
50
- "gpt-4-turbo": "qwen-3-235b-a22b-instruct-2507",
51
- "gpt-3.5-turbo": "qwen-3-235b-a22b-instruct-2507",
52
  }
53
 
54
  def load_model_mapping():
@@ -400,7 +400,12 @@ async def chat(req: Request):
400
  }
401
 
402
  # Forward optional parameters
403
- for param in ["max_tokens", "max_completion_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty"]:
 
 
 
 
 
404
  if param in body:
405
  cerebras_body[param] = body[param]
406
 
 
28
  MAX_REQUEST_TOKENS = int(os.getenv("MAX_REQUEST_TOKENS", "30000"))
29
 
30
  # Default model for Cerebras
31
+ DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "llama-4-scout-17b-16e-instruct")
32
 
33
  # Model mapping: incoming model name -> Cerebras model name
34
  DEFAULT_MODEL_MAPPING = {
35
  # Claude models -> Cerebras
36
+ "claude-opus-4-7": "llama-4-scout-17b-16e-instruct",
37
+ "claude-opus-4-6": "llama-4-scout-17b-16e-instruct",
38
+ "claude-opus-4-5": "llama-4-scout-17b-16e-instruct",
39
+ "claude-opus-4-1": "llama-4-scout-17b-16e-instruct",
40
+ "claude-opus-4-20250514": "llama-4-scout-17b-16e-instruct",
41
+ "claude-sonnet-4-6": "llama-4-scout-17b-16e-instruct",
42
+ "claude-sonnet-4-5": "llama-4-scout-17b-16e-instruct",
43
+ "claude-sonnet-4-20250514": "llama-4-scout-17b-16e-instruct",
44
+ "claude-haiku-4-5": "llama-4-scout-17b-16e-instruct",
45
+ "claude-haiku-4-5-20251001": "llama-4-scout-17b-16e-instruct",
46
  # GPT models -> Cerebras
47
+ "gpt-4": "llama-4-scout-17b-16e-instruct",
48
+ "gpt-4o": "llama-4-scout-17b-16e-instruct",
49
+ "gpt-4o-mini": "llama-4-scout-17b-16e-instruct",
50
+ "gpt-4-turbo": "llama-4-scout-17b-16e-instruct",
51
+ "gpt-3.5-turbo": "llama-4-scout-17b-16e-instruct",
52
  }
53
 
54
  def load_model_mapping():
 
400
  }
401
 
402
  # Forward optional parameters
403
+ forward_params = [
404
+ "max_tokens", "max_completion_tokens", "temperature", "top_p", "stop",
405
+ "frequency_penalty", "presence_penalty", "tools", "tool_choice",
406
+ "parallel_tool_calls", "response_format"
407
+ ]
408
+ for param in forward_params:
409
  if param in body:
410
  cerebras_body[param] = body[param]
411