Spaces:
Sleeping
Sleeping
Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -28,27 +28,27 @@ CEREBRAS_BASE_URL = os.getenv("CEREBRAS_BASE_URL", "https://api.cerebras.ai/v1")
|
|
| 28 |
MAX_REQUEST_TOKENS = int(os.getenv("MAX_REQUEST_TOKENS", "30000"))
|
| 29 |
|
| 30 |
# Default model for Cerebras
|
| 31 |
-
DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "
|
| 32 |
|
| 33 |
# Model mapping: incoming model name -> Cerebras model name
|
| 34 |
DEFAULT_MODEL_MAPPING = {
|
| 35 |
# Claude models -> Cerebras
|
| 36 |
-
"claude-opus-4-7": "
|
| 37 |
-
"claude-opus-4-6": "
|
| 38 |
-
"claude-opus-4-5": "
|
| 39 |
-
"claude-opus-4-1": "
|
| 40 |
-
"claude-opus-4-20250514": "
|
| 41 |
-
"claude-sonnet-4-6": "
|
| 42 |
-
"claude-sonnet-4-5": "
|
| 43 |
-
"claude-sonnet-4-20250514": "
|
| 44 |
-
"claude-haiku-4-5": "
|
| 45 |
-
"claude-haiku-4-5-20251001": "
|
| 46 |
# GPT models -> Cerebras
|
| 47 |
-
"gpt-4": "
|
| 48 |
-
"gpt-4o": "
|
| 49 |
-
"gpt-4o-mini": "
|
| 50 |
-
"gpt-4-turbo": "
|
| 51 |
-
"gpt-3.5-turbo": "
|
| 52 |
}
|
| 53 |
|
| 54 |
def load_model_mapping():
|
|
@@ -400,7 +400,12 @@ async def chat(req: Request):
|
|
| 400 |
}
|
| 401 |
|
| 402 |
# Forward optional parameters
|
| 403 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 404 |
if param in body:
|
| 405 |
cerebras_body[param] = body[param]
|
| 406 |
|
|
|
|
| 28 |
MAX_REQUEST_TOKENS = int(os.getenv("MAX_REQUEST_TOKENS", "30000"))
|
| 29 |
|
| 30 |
# Default model for Cerebras
|
| 31 |
+
DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "llama-4-scout-17b-16e-instruct")
|
| 32 |
|
| 33 |
# Model mapping: incoming model name -> Cerebras model name
|
| 34 |
DEFAULT_MODEL_MAPPING = {
|
| 35 |
# Claude models -> Cerebras
|
| 36 |
+
"claude-opus-4-7": "llama-4-scout-17b-16e-instruct",
|
| 37 |
+
"claude-opus-4-6": "llama-4-scout-17b-16e-instruct",
|
| 38 |
+
"claude-opus-4-5": "llama-4-scout-17b-16e-instruct",
|
| 39 |
+
"claude-opus-4-1": "llama-4-scout-17b-16e-instruct",
|
| 40 |
+
"claude-opus-4-20250514": "llama-4-scout-17b-16e-instruct",
|
| 41 |
+
"claude-sonnet-4-6": "llama-4-scout-17b-16e-instruct",
|
| 42 |
+
"claude-sonnet-4-5": "llama-4-scout-17b-16e-instruct",
|
| 43 |
+
"claude-sonnet-4-20250514": "llama-4-scout-17b-16e-instruct",
|
| 44 |
+
"claude-haiku-4-5": "llama-4-scout-17b-16e-instruct",
|
| 45 |
+
"claude-haiku-4-5-20251001": "llama-4-scout-17b-16e-instruct",
|
| 46 |
# GPT models -> Cerebras
|
| 47 |
+
"gpt-4": "llama-4-scout-17b-16e-instruct",
|
| 48 |
+
"gpt-4o": "llama-4-scout-17b-16e-instruct",
|
| 49 |
+
"gpt-4o-mini": "llama-4-scout-17b-16e-instruct",
|
| 50 |
+
"gpt-4-turbo": "llama-4-scout-17b-16e-instruct",
|
| 51 |
+
"gpt-3.5-turbo": "llama-4-scout-17b-16e-instruct",
|
| 52 |
}
|
| 53 |
|
| 54 |
def load_model_mapping():
|
|
|
|
| 400 |
}
|
| 401 |
|
| 402 |
# Forward optional parameters
|
| 403 |
+
forward_params = [
|
| 404 |
+
"max_tokens", "max_completion_tokens", "temperature", "top_p", "stop",
|
| 405 |
+
"frequency_penalty", "presence_penalty", "tools", "tool_choice",
|
| 406 |
+
"parallel_tool_calls", "response_format"
|
| 407 |
+
]
|
| 408 |
+
for param in forward_params:
|
| 409 |
if param in body:
|
| 410 |
cerebras_body[param] = body[param]
|
| 411 |
|