Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Deploy 2026-04-22
Browse files- backend/routes/agent.py +24 -8
backend/routes/agent.py
CHANGED
|
@@ -157,39 +157,55 @@ async def set_model(body: dict, user: dict = Depends(get_current_user)) -> dict:
|
|
| 157 |
return {"model": model_id}
|
| 158 |
|
| 159 |
|
|
|
|
|
|
|
|
|
|
| 160 |
@router.post("/title")
|
| 161 |
async def generate_title(
|
| 162 |
request: SubmitRequest, user: dict = Depends(get_current_user)
|
| 163 |
) -> dict:
|
| 164 |
-
"""Generate a short title for a chat session based on the first user message.
|
| 165 |
-
|
| 166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
try:
|
| 168 |
response = await acompletion(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
messages=[
|
| 170 |
{
|
| 171 |
"role": "system",
|
| 172 |
"content": (
|
| 173 |
"Generate a very short title (max 6 words) for a chat conversation "
|
| 174 |
"that starts with the following user message. "
|
| 175 |
-
"Reply with ONLY the title
|
|
|
|
|
|
|
| 176 |
),
|
| 177 |
},
|
| 178 |
{"role": "user", "content": request.text[:500]},
|
| 179 |
],
|
| 180 |
max_tokens=20,
|
| 181 |
temperature=0.3,
|
| 182 |
-
timeout=
|
| 183 |
-
**llm_params,
|
| 184 |
)
|
| 185 |
title = response.choices[0].message.content.strip().strip('"').strip("'")
|
| 186 |
-
|
| 187 |
if len(title) > 50:
|
| 188 |
title = title[:50].rstrip() + "…"
|
| 189 |
return {"title": title}
|
| 190 |
except Exception as e:
|
| 191 |
logger.warning(f"Title generation failed: {e}")
|
| 192 |
-
# Fallback: truncate the message
|
| 193 |
fallback = request.text.strip()
|
| 194 |
title = fallback[:40].rstrip() + "…" if len(fallback) > 40 else fallback
|
| 195 |
return {"title": title}
|
|
|
|
| 157 |
return {"model": model_id}
|
| 158 |
|
| 159 |
|
| 160 |
+
_TITLE_STRIP_CHARS = str.maketrans("", "", "`*_~#[]()")
|
| 161 |
+
|
| 162 |
+
|
| 163 |
@router.post("/title")
|
| 164 |
async def generate_title(
|
| 165 |
request: SubmitRequest, user: dict = Depends(get_current_user)
|
| 166 |
) -> dict:
|
| 167 |
+
"""Generate a short title for a chat session based on the first user message.
|
| 168 |
+
|
| 169 |
+
Always uses Llama-3.1-8B-Instruct via Cerebras on the HF router. The tab
|
| 170 |
+
headline renders as plain text, so the model is told to avoid markdown
|
| 171 |
+
and any stray formatting characters are stripped before returning.
|
| 172 |
+
"""
|
| 173 |
+
api_key = (
|
| 174 |
+
os.environ.get("INFERENCE_TOKEN")
|
| 175 |
+
or (user.get("hf_token") if isinstance(user, dict) else None)
|
| 176 |
+
or os.environ.get("HF_TOKEN")
|
| 177 |
+
)
|
| 178 |
try:
|
| 179 |
response = await acompletion(
|
| 180 |
+
# Double openai/ prefix: LiteLLM strips the first as its provider
|
| 181 |
+
# prefix, leaving the HF model id on the wire for the router.
|
| 182 |
+
model="openai/meta-llama/Llama-3.1-8B-Instruct:cerebras",
|
| 183 |
+
api_base="https://router.huggingface.co/v1",
|
| 184 |
+
api_key=api_key,
|
| 185 |
messages=[
|
| 186 |
{
|
| 187 |
"role": "system",
|
| 188 |
"content": (
|
| 189 |
"Generate a very short title (max 6 words) for a chat conversation "
|
| 190 |
"that starts with the following user message. "
|
| 191 |
+
"Reply with ONLY the title in plain text. "
|
| 192 |
+
"Do NOT use markdown, backticks, asterisks, quotes, brackets, or any "
|
| 193 |
+
"formatting characters. No punctuation at the end."
|
| 194 |
),
|
| 195 |
},
|
| 196 |
{"role": "user", "content": request.text[:500]},
|
| 197 |
],
|
| 198 |
max_tokens=20,
|
| 199 |
temperature=0.3,
|
| 200 |
+
timeout=10,
|
|
|
|
| 201 |
)
|
| 202 |
title = response.choices[0].message.content.strip().strip('"').strip("'")
|
| 203 |
+
title = title.translate(_TITLE_STRIP_CHARS).strip()
|
| 204 |
if len(title) > 50:
|
| 205 |
title = title[:50].rstrip() + "…"
|
| 206 |
return {"title": title}
|
| 207 |
except Exception as e:
|
| 208 |
logger.warning(f"Title generation failed: {e}")
|
|
|
|
| 209 |
fallback = request.text.strip()
|
| 210 |
title = fallback[:40].rstrip() + "…" if len(fallback) > 40 else fallback
|
| 211 |
return {"title": title}
|