Spaces:
Running on Zero
Running on Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,20 +11,21 @@ from transformers import (
|
|
| 11 |
TextIteratorStreamer,
|
| 12 |
)
|
| 13 |
|
| 14 |
-
MODEL_ID = "
|
| 15 |
-
TITLE = "
|
| 16 |
-
SUBTITLE = "Text-only
|
| 17 |
DESCRIPTION = (
|
| 18 |
"Optimized for ZeroGPU usage: text-only chat, NF4 4-bit quantization, bounded context, "
|
| 19 |
"and shorter default generation lengths for better queue behavior."
|
| 20 |
)
|
| 21 |
SYSTEM_PROMPT = (
|
| 22 |
-
"You are
|
| 23 |
-
"Be clear, accurate,
|
|
|
|
| 24 |
)
|
| 25 |
PLACEHOLDER = (
|
| 26 |
-
"Ask for code, debugging, planning, long-form
|
| 27 |
-
"Thinking mode is enabled by default."
|
| 28 |
)
|
| 29 |
MAX_INPUT_TOKENS = 16384
|
| 30 |
DEFAULT_MAX_NEW_TOKENS = 4096
|
|
@@ -191,13 +192,13 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
|
|
| 191 |
gr.Slider(minimum=1.0, maximum=1.5, step=0.05, value=1.0, label="Repetition penalty", render=False),
|
| 192 |
],
|
| 193 |
examples=[
|
| 194 |
-
["Design a production-ready architecture for a
|
| 195 |
["Write a detailed debugging plan for a flaky async Python test suite."],
|
| 196 |
["Build a responsive landing page in React and Tailwind for a premium AI coding product."],
|
| 197 |
-
["
|
| 198 |
],
|
| 199 |
cache_examples=False,
|
| 200 |
)
|
| 201 |
|
| 202 |
if __name__ == "__main__":
|
| 203 |
-
demo.launch()
|
|
|
|
| 11 |
TextIteratorStreamer,
|
| 12 |
)
|
| 13 |
|
| 14 |
+
MODEL_ID = "OrionLLM/GRM-2.6-Opus"
|
| 15 |
+
TITLE = "GRM-2.6-Opus Zero"
|
| 16 |
+
SUBTITLE = "Text-only GRM-2.6-Opus deployment for ZeroGPU with 4-bit loading, thinking controls, and streaming chat."
|
| 17 |
DESCRIPTION = (
|
| 18 |
"Optimized for ZeroGPU usage: text-only chat, NF4 4-bit quantization, bounded context, "
|
| 19 |
"and shorter default generation lengths for better queue behavior."
|
| 20 |
)
|
| 21 |
SYSTEM_PROMPT = (
|
| 22 |
+
"You are GRM-2.6-Opus, an advanced reasoning assistant by OrionLLM for coding, research, "
|
| 23 |
+
"agentic workflows, terminal tasks, and long-form problem solving. Be clear, accurate, useful, "
|
| 24 |
+
"and think carefully before answering."
|
| 25 |
)
|
| 26 |
PLACEHOLDER = (
|
| 27 |
+
"Ask GRM-2.6-Opus for code, debugging, planning, research, long-form reasoning, "
|
| 28 |
+
"terminal-agent tasks, or complex multi-step workflows. Thinking mode is enabled by default."
|
| 29 |
)
|
| 30 |
MAX_INPUT_TOKENS = 16384
|
| 31 |
DEFAULT_MAX_NEW_TOKENS = 4096
|
|
|
|
| 192 |
gr.Slider(minimum=1.0, maximum=1.5, step=0.05, value=1.0, label="Repetition penalty", render=False),
|
| 193 |
],
|
| 194 |
examples=[
|
| 195 |
+
["Design a production-ready architecture for a local AI terminal-agent platform using GRM-2.6-Opus."],
|
| 196 |
["Write a detailed debugging plan for a flaky async Python test suite."],
|
| 197 |
["Build a responsive landing page in React and Tailwind for a premium AI coding product."],
|
| 198 |
+
["Create an agentic workflow plan for solving a Terminal-Bench style task from scratch."],
|
| 199 |
],
|
| 200 |
cache_examples=False,
|
| 201 |
)
|
| 202 |
|
| 203 |
if __name__ == "__main__":
|
| 204 |
+
demo.launch()
|