DedeProGames commited on
Commit
8980e37
·
verified ·
1 Parent(s): 99a0cab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -10
app.py CHANGED
@@ -11,20 +11,21 @@ from transformers import (
11
  TextIteratorStreamer,
12
  )
13
 
14
- MODEL_ID = "Qwen/Qwen3.6-27B"
15
- TITLE = "Qwen3.6-27B Zero"
16
- SUBTITLE = "Text-only Qwen3.6 deployment for ZeroGPU with 4-bit loading, thinking controls, and streaming chat."
17
  DESCRIPTION = (
18
  "Optimized for ZeroGPU usage: text-only chat, NF4 4-bit quantization, bounded context, "
19
  "and shorter default generation lengths for better queue behavior."
20
  )
21
  SYSTEM_PROMPT = (
22
- "You are Qwen3.6-27B, a highly capable assistant for coding, research, and long-form reasoning. "
23
- "Be clear, accurate, and useful."
 
24
  )
25
  PLACEHOLDER = (
26
- "Ask for code, debugging, planning, long-form answers, or agentic workflows. "
27
- "Thinking mode is enabled by default."
28
  )
29
  MAX_INPUT_TOKENS = 16384
30
  DEFAULT_MAX_NEW_TOKENS = 4096
@@ -191,13 +192,13 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
191
  gr.Slider(minimum=1.0, maximum=1.5, step=0.05, value=1.0, label="Repetition penalty", render=False),
192
  ],
193
  examples=[
194
- ["Design a production-ready architecture for a SaaS analytics platform with clear tradeoffs."],
195
  ["Write a detailed debugging plan for a flaky async Python test suite."],
196
  ["Build a responsive landing page in React and Tailwind for a premium AI coding product."],
197
- ["Refactor this idea into a clear engineering plan: multi-tenant background job processing with retries and observability."],
198
  ],
199
  cache_examples=False,
200
  )
201
 
202
  if __name__ == "__main__":
203
- demo.launch()
 
11
  TextIteratorStreamer,
12
  )
13
 
14
+ MODEL_ID = "OrionLLM/GRM-2.6-Opus"
15
+ TITLE = "GRM-2.6-Opus Zero"
16
+ SUBTITLE = "Text-only GRM-2.6-Opus deployment for ZeroGPU with 4-bit loading, thinking controls, and streaming chat."
17
  DESCRIPTION = (
18
  "Optimized for ZeroGPU usage: text-only chat, NF4 4-bit quantization, bounded context, "
19
  "and shorter default generation lengths for better queue behavior."
20
  )
21
  SYSTEM_PROMPT = (
22
+ "You are GRM-2.6-Opus, an advanced reasoning assistant by OrionLLM for coding, research, "
23
+ "agentic workflows, terminal tasks, and long-form problem solving. Be clear, accurate, useful, "
24
+ "and think carefully before answering."
25
  )
26
  PLACEHOLDER = (
27
+ "Ask GRM-2.6-Opus for code, debugging, planning, research, long-form reasoning, "
28
+ "terminal-agent tasks, or complex multi-step workflows. Thinking mode is enabled by default."
29
  )
30
  MAX_INPUT_TOKENS = 16384
31
  DEFAULT_MAX_NEW_TOKENS = 4096
 
192
  gr.Slider(minimum=1.0, maximum=1.5, step=0.05, value=1.0, label="Repetition penalty", render=False),
193
  ],
194
  examples=[
195
+ ["Design a production-ready architecture for a local AI terminal-agent platform using GRM-2.6-Opus."],
196
  ["Write a detailed debugging plan for a flaky async Python test suite."],
197
  ["Build a responsive landing page in React and Tailwind for a premium AI coding product."],
198
+ ["Create an agentic workflow plan for solving a Terminal-Bench style task from scratch."],
199
  ],
200
  cache_examples=False,
201
  )
202
 
203
  if __name__ == "__main__":
204
+ demo.launch()