GRM-2.6-Opus

Running on Zero

App Files Files Community

DedeProGames commited on 16 days ago

Commit

8980e37

verified ·

1 Parent(s): 99a0cab

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -10

app.py CHANGED Viewed

@@ -11,20 +11,21 @@ from transformers import (
     TextIteratorStreamer,
 )
-MODEL_ID = "Qwen/Qwen3.6-27B"
-TITLE = "Qwen3.6-27B Zero"
-SUBTITLE = "Text-only Qwen3.6 deployment for ZeroGPU with 4-bit loading, thinking controls, and streaming chat."
 DESCRIPTION = (
     "Optimized for ZeroGPU usage: text-only chat, NF4 4-bit quantization, bounded context, "
     "and shorter default generation lengths for better queue behavior."
 )
 SYSTEM_PROMPT = (
-    "You are Qwen3.6-27B, a highly capable assistant for coding, research, and long-form reasoning. "
-    "Be clear, accurate, and useful."
 )
 PLACEHOLDER = (
-    "Ask for code, debugging, planning, long-form answers, or agentic workflows. "
-    "Thinking mode is enabled by default."
 )
 MAX_INPUT_TOKENS = 16384
 DEFAULT_MAX_NEW_TOKENS = 4096
@@ -191,13 +192,13 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
             gr.Slider(minimum=1.0, maximum=1.5, step=0.05, value=1.0, label="Repetition penalty", render=False),
         ],
         examples=[
-            ["Design a production-ready architecture for a SaaS analytics platform with clear tradeoffs."],
             ["Write a detailed debugging plan for a flaky async Python test suite."],
             ["Build a responsive landing page in React and Tailwind for a premium AI coding product."],
-            ["Refactor this idea into a clear engineering plan: multi-tenant background job processing with retries and observability."],
         ],
         cache_examples=False,
     )
 if __name__ == "__main__":
-    demo.launch()

     TextIteratorStreamer,
 )
+MODEL_ID = "OrionLLM/GRM-2.6-Opus"
+TITLE = "GRM-2.6-Opus Zero"
+SUBTITLE = "Text-only GRM-2.6-Opus deployment for ZeroGPU with 4-bit loading, thinking controls, and streaming chat."
 DESCRIPTION = (
     "Optimized for ZeroGPU usage: text-only chat, NF4 4-bit quantization, bounded context, "
     "and shorter default generation lengths for better queue behavior."
 )
 SYSTEM_PROMPT = (
+    "You are GRM-2.6-Opus, an advanced reasoning assistant by OrionLLM for coding, research, "
+    "agentic workflows, terminal tasks, and long-form problem solving. Be clear, accurate, useful, "
+    "and think carefully before answering."
 )
 PLACEHOLDER = (
+    "Ask GRM-2.6-Opus for code, debugging, planning, research, long-form reasoning, "
+    "terminal-agent tasks, or complex multi-step workflows. Thinking mode is enabled by default."
 )
 MAX_INPUT_TOKENS = 16384
 DEFAULT_MAX_NEW_TOKENS = 4096
             gr.Slider(minimum=1.0, maximum=1.5, step=0.05, value=1.0, label="Repetition penalty", render=False),
         ],
         examples=[
+            ["Design a production-ready architecture for a local AI terminal-agent platform using GRM-2.6-Opus."],
             ["Write a detailed debugging plan for a flaky async Python test suite."],
             ["Build a responsive landing page in React and Tailwind for a premium AI coding product."],
+            ["Create an agentic workflow plan for solving a Terminal-Bench style task from scratch."],
         ],
         cache_examples=False,
     )
 if __name__ == "__main__":
+    demo.launch()