sbeechoi commited on
Commit
a2ea50c
·
verified ·
1 Parent(s): 011a7ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -10,7 +10,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
10
  os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
11
 
12
  MODEL_ID = os.getenv("MODEL_ID", "Qwen/Qwen3-0.6B")
13
- MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "256"))
14
  MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "1536"))
15
  MAX_HISTORY_TURNS = int(os.getenv("MAX_HISTORY_TURNS", "3"))
16
  N_THREADS = int(os.getenv("N_THREADS", str(max(1, os.cpu_count() or 1))))
@@ -322,7 +322,8 @@ with gr.Blocks(title="Local CPU split-reasoning chat") as demo:
322
  preset.change(
323
  fn=load_preset,
324
  inputs=preset,
325
- outputs=[system_prompt, user_input, thinking, sample_reasoning, sample_answer],
 
326
  )
327
 
328
  send_btn.click(
 
10
  os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
11
 
12
  MODEL_ID = os.getenv("MODEL_ID", "Qwen/Qwen3-0.6B")
13
+ MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "4096"))
14
  MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "1536"))
15
  MAX_HISTORY_TURNS = int(os.getenv("MAX_HISTORY_TURNS", "3"))
16
  N_THREADS = int(os.getenv("N_THREADS", str(max(1, os.cpu_count() or 1))))
 
322
  preset.change(
323
  fn=load_preset,
324
  inputs=preset,
325
+ # outputs=[system_prompt, user_input, thinking, sample_reasoning, sample_answer],
326
+ outputs=[system_prompt, user_input, thinking],
327
  )
328
 
329
  send_btn.click(