Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Aksel Joonas Reedi commited on
Title gen: swap Llama-3.1-8B for gpt-oss-120b via Cerebras (#58)
Browse files- backend/routes/agent.py +8 -5
backend/routes/agent.py
CHANGED
|
@@ -166,9 +166,11 @@ async def generate_title(
|
|
| 166 |
) -> dict:
|
| 167 |
"""Generate a short title for a chat session based on the first user message.
|
| 168 |
|
| 169 |
-
Always uses
|
| 170 |
-
|
| 171 |
-
|
|
|
|
|
|
|
| 172 |
"""
|
| 173 |
api_key = (
|
| 174 |
os.environ.get("INFERENCE_TOKEN")
|
|
@@ -179,7 +181,7 @@ async def generate_title(
|
|
| 179 |
response = await acompletion(
|
| 180 |
# Double openai/ prefix: LiteLLM strips the first as its provider
|
| 181 |
# prefix, leaving the HF model id on the wire for the router.
|
| 182 |
-
model="openai/
|
| 183 |
api_base="https://router.huggingface.co/v1",
|
| 184 |
api_key=api_key,
|
| 185 |
messages=[
|
|
@@ -195,9 +197,10 @@ async def generate_title(
|
|
| 195 |
},
|
| 196 |
{"role": "user", "content": request.text[:500]},
|
| 197 |
],
|
| 198 |
-
max_tokens=
|
| 199 |
temperature=0.3,
|
| 200 |
timeout=10,
|
|
|
|
| 201 |
)
|
| 202 |
title = response.choices[0].message.content.strip().strip('"').strip("'")
|
| 203 |
title = title.translate(_TITLE_STRIP_CHARS).strip()
|
|
|
|
| 166 |
) -> dict:
|
| 167 |
"""Generate a short title for a chat session based on the first user message.
|
| 168 |
|
| 169 |
+
Always uses gpt-oss-120b via Cerebras on the HF router. The tab headline
|
| 170 |
+
renders as plain text, so the model is told to avoid markdown and any
|
| 171 |
+
stray formatting characters are stripped before returning. gpt-oss is a
|
| 172 |
+
reasoning model — reasoning_effort=low keeps the reasoning budget small
|
| 173 |
+
so the 60-token output budget isn't consumed before the title is written.
|
| 174 |
"""
|
| 175 |
api_key = (
|
| 176 |
os.environ.get("INFERENCE_TOKEN")
|
|
|
|
| 181 |
response = await acompletion(
|
| 182 |
# Double openai/ prefix: LiteLLM strips the first as its provider
|
| 183 |
# prefix, leaving the HF model id on the wire for the router.
|
| 184 |
+
model="openai/openai/gpt-oss-120b:cerebras",
|
| 185 |
api_base="https://router.huggingface.co/v1",
|
| 186 |
api_key=api_key,
|
| 187 |
messages=[
|
|
|
|
| 197 |
},
|
| 198 |
{"role": "user", "content": request.text[:500]},
|
| 199 |
],
|
| 200 |
+
max_tokens=60,
|
| 201 |
temperature=0.3,
|
| 202 |
timeout=10,
|
| 203 |
+
reasoning_effort="low",
|
| 204 |
)
|
| 205 |
title = response.choices[0].message.content.strip().strip('"').strip("'")
|
| 206 |
title = title.translate(_TITLE_STRIP_CHARS).strip()
|