akseljoonas commited on
Commit
fb89433
·
2 Parent(s): d03c5f6849c88c

Deploy 2026-04-22

Browse files
Files changed (1) hide show
  1. backend/routes/agent.py +24 -8
backend/routes/agent.py CHANGED
@@ -157,39 +157,55 @@ async def set_model(body: dict, user: dict = Depends(get_current_user)) -> dict:
157
  return {"model": model_id}
158
 
159
 
 
 
 
160
  @router.post("/title")
161
  async def generate_title(
162
  request: SubmitRequest, user: dict = Depends(get_current_user)
163
  ) -> dict:
164
- """Generate a short title for a chat session based on the first user message."""
165
- model = session_manager.config.model_name
166
- llm_params = _resolve_llm_params(model, reasoning_effort="high")
 
 
 
 
 
 
 
 
167
  try:
168
  response = await acompletion(
 
 
 
 
 
169
  messages=[
170
  {
171
  "role": "system",
172
  "content": (
173
  "Generate a very short title (max 6 words) for a chat conversation "
174
  "that starts with the following user message. "
175
- "Reply with ONLY the title, no quotes, no punctuation at the end."
 
 
176
  ),
177
  },
178
  {"role": "user", "content": request.text[:500]},
179
  ],
180
  max_tokens=20,
181
  temperature=0.3,
182
- timeout=8,
183
- **llm_params,
184
  )
185
  title = response.choices[0].message.content.strip().strip('"').strip("'")
186
- # Safety: cap at 50 chars
187
  if len(title) > 50:
188
  title = title[:50].rstrip() + "…"
189
  return {"title": title}
190
  except Exception as e:
191
  logger.warning(f"Title generation failed: {e}")
192
- # Fallback: truncate the message
193
  fallback = request.text.strip()
194
  title = fallback[:40].rstrip() + "…" if len(fallback) > 40 else fallback
195
  return {"title": title}
 
157
  return {"model": model_id}
158
 
159
 
160
+ _TITLE_STRIP_CHARS = str.maketrans("", "", "`*_~#[]()")
161
+
162
+
163
  @router.post("/title")
164
  async def generate_title(
165
  request: SubmitRequest, user: dict = Depends(get_current_user)
166
  ) -> dict:
167
+ """Generate a short title for a chat session based on the first user message.
168
+
169
+ Always uses Llama-3.1-8B-Instruct via Cerebras on the HF router. The tab
170
+ headline renders as plain text, so the model is told to avoid markdown
171
+ and any stray formatting characters are stripped before returning.
172
+ """
173
+ api_key = (
174
+ os.environ.get("INFERENCE_TOKEN")
175
+ or (user.get("hf_token") if isinstance(user, dict) else None)
176
+ or os.environ.get("HF_TOKEN")
177
+ )
178
  try:
179
  response = await acompletion(
180
+ # Double openai/ prefix: LiteLLM strips the first as its provider
181
+ # prefix, leaving the HF model id on the wire for the router.
182
+ model="openai/meta-llama/Llama-3.1-8B-Instruct:cerebras",
183
+ api_base="https://router.huggingface.co/v1",
184
+ api_key=api_key,
185
  messages=[
186
  {
187
  "role": "system",
188
  "content": (
189
  "Generate a very short title (max 6 words) for a chat conversation "
190
  "that starts with the following user message. "
191
+ "Reply with ONLY the title in plain text. "
192
+ "Do NOT use markdown, backticks, asterisks, quotes, brackets, or any "
193
+ "formatting characters. No punctuation at the end."
194
  ),
195
  },
196
  {"role": "user", "content": request.text[:500]},
197
  ],
198
  max_tokens=20,
199
  temperature=0.3,
200
+ timeout=10,
 
201
  )
202
  title = response.choices[0].message.content.strip().strip('"').strip("'")
203
+ title = title.translate(_TITLE_STRIP_CHARS).strip()
204
  if len(title) > 50:
205
  title = title[:50].rstrip() + "…"
206
  return {"title": title}
207
  except Exception as e:
208
  logger.warning(f"Title generation failed: {e}")
 
209
  fallback = request.text.strip()
210
  title = fallback[:40].rstrip() + "…" if len(fallback) > 40 else fallback
211
  return {"title": title}