Update app.py
Browse files
app.py
CHANGED
|
@@ -10,7 +10,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
|
|
| 10 |
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
|
| 11 |
|
| 12 |
MODEL_ID = os.getenv("MODEL_ID", "Qwen/Qwen3-0.6B")
|
| 13 |
-
MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "
|
| 14 |
MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "1536"))
|
| 15 |
MAX_HISTORY_TURNS = int(os.getenv("MAX_HISTORY_TURNS", "3"))
|
| 16 |
N_THREADS = int(os.getenv("N_THREADS", str(max(1, os.cpu_count() or 1))))
|
|
@@ -322,7 +322,8 @@ with gr.Blocks(title="Local CPU split-reasoning chat") as demo:
|
|
| 322 |
preset.change(
|
| 323 |
fn=load_preset,
|
| 324 |
inputs=preset,
|
| 325 |
-
outputs=[system_prompt, user_input, thinking, sample_reasoning, sample_answer],
|
|
|
|
| 326 |
)
|
| 327 |
|
| 328 |
send_btn.click(
|
|
|
|
| 10 |
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
|
| 11 |
|
| 12 |
MODEL_ID = os.getenv("MODEL_ID", "Qwen/Qwen3-0.6B")
|
| 13 |
+
MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "4096"))
|
| 14 |
MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "1536"))
|
| 15 |
MAX_HISTORY_TURNS = int(os.getenv("MAX_HISTORY_TURNS", "3"))
|
| 16 |
N_THREADS = int(os.getenv("N_THREADS", str(max(1, os.cpu_count() or 1))))
|
|
|
|
| 322 |
preset.change(
|
| 323 |
fn=load_preset,
|
| 324 |
inputs=preset,
|
| 325 |
+
# outputs=[system_prompt, user_input, thinking, sample_reasoning, sample_answer],
|
| 326 |
+
outputs=[system_prompt, user_input, thinking],
|
| 327 |
)
|
| 328 |
|
| 329 |
send_btn.click(
|