Don Rishabh Claude Opus 4.7 (1M context) commited on
Commit
82e3e94
·
1 Parent(s): 5f71cca

demo: add 'Try a new task' tab

Browse files

Wraps the existing 3-column browse layout in a Tabs() and adds a second
tab where the user types a free-form task description + optional test
input. The trained agent compresses the description into a system
prompt; the target then runs that prompt against the input. First click
in the tab loads agent + LoRA on demand (~6 GB).

Reuses load_agents / _agent_generate / extract_prompt / run_target_batch
from the existing handlers — net add is ~75 lines of UI plus one
compress_and_run handler.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (1) hide show
  1. space-demo/app.py +192 -82
space-demo/app.py CHANGED
@@ -445,6 +445,65 @@ def generate_three(verbose_prompt: str, base_prompt: str, trained_prompt: str,
445
  return outs[0], outs[1], outs[2], metrics
446
 
447
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
  # ---------------------------------------------------------------------------
449
  # Build app
450
  # ---------------------------------------------------------------------------
@@ -461,94 +520,140 @@ def build_app() -> gr.Blocks:
461
  gr.Markdown(
462
  f"# Prompt Golf — Compression Demo\n"
463
  f"Compressed prompts from a Qwen3-1.7B agent (trained via GRPO), "
464
- f"scored against **`{DEFAULTS['target_model']}`** as the target. "
465
- f"Tasks ordered by reward gain (top = biggest improvement).\n\n"
466
- f"Three columns: **verbose** (the human-written task description), "
467
- f"**untrained** (raw Qwen3 output), and **trained** (after RL "
468
- f"fine-tuning). Pick a task, type a test input, watch the target "
469
- f"produce outputs with each prompt side by side."
470
  )
471
 
472
- with gr.Row():
473
- task_dd = gr.Dropdown(
474
- choices=task_choices(),
475
- value=initial,
476
- label="Task",
477
- scale=4,
478
- )
479
- cat = gr.Textbox(label="category", interactive=False, scale=1)
480
- scorer = gr.Textbox(label="scorer", interactive=False, scale=1)
481
-
482
- # Hidden state for live regen
483
- _task_id_state = gr.Textbox(visible=False)
484
- _budget_state = gr.Textbox(visible=False)
485
-
486
- with gr.Row():
487
- with gr.Column():
488
- gr.Markdown("### Verbose (human-written)")
489
- verbose_box = gr.Textbox(
490
- label="prompt", lines=8, interactive=True,
491
  )
492
  with gr.Row():
493
- v_tok = gr.Textbox(label="tokens", interactive=False)
494
- v_acc = gr.Textbox(label="accuracy", interactive=False)
495
- with gr.Column():
496
- gr.Markdown("### Untrained agent (base)")
497
- base_box = gr.Textbox(
498
- label="prompt", lines=8, interactive=True,
499
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500
  with gr.Row():
501
- b_tok = gr.Textbox(label="tokens", interactive=False)
502
- b_acc = gr.Textbox(label="accuracy", interactive=False)
503
- with gr.Column():
504
- gr.Markdown("### Trained agent (compressed)")
505
- trained_box = gr.Textbox(
506
- label="prompt", lines=8, interactive=True,
 
 
 
 
 
 
 
 
507
  )
 
508
  with gr.Row():
509
- t_tok = gr.Textbox(label="tokens", interactive=False)
510
- t_acc = gr.Textbox(label="accuracy", interactive=False)
511
-
512
- gr.Markdown("### Test input — edit to try your own")
513
- with gr.Row():
514
- sample_dd = gr.Dropdown(
515
- choices=[],
516
- label="Sample test inputs from this task (click to load)",
517
- interactive=True,
518
- allow_custom_value=False,
519
- scale=2,
520
- )
521
- test_input = gr.Textbox(
522
- label="input",
523
- lines=3,
524
- placeholder=("Type or paste a test input, or pick a sample "
525
- "from the dropdown above. The three prompts will "
526
- "each be prepended to it before the target "
527
- "generates."),
528
- )
529
 
530
- with gr.Row():
531
- regen_btn = gr.Button(
532
- "Regenerate prompts live (loads agent + LoRA)",
533
- variant="secondary",
534
- )
535
- run_btn = gr.Button(
536
- "Run target with all three prompts", variant="primary"
537
- )
538
- regen_status = gr.Textbox(label="agent status", interactive=False)
539
-
540
- with gr.Row():
541
- with gr.Column():
542
- gr.Markdown("### Target output — VERBOSE")
543
- out_v = gr.Textbox(label="output", lines=4, interactive=False)
544
- with gr.Column():
545
- gr.Markdown("### Target output UNTRAINED")
546
- out_b = gr.Textbox(label="output", lines=4, interactive=False)
547
- with gr.Column():
548
- gr.Markdown("### Target output TRAINED")
549
- out_t = gr.Textbox(label="output", lines=4, interactive=False)
550
-
551
- metrics = gr.Textbox(label="metrics", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
552
 
553
  gr.Markdown(
554
  "---\n"
@@ -557,8 +662,7 @@ def build_app() -> gr.Blocks:
557
  "an OpenEnv environment where the agent's *action* is a prompt "
558
  "and the *reward* is how well that prompt steers a frozen target "
559
  "LLM. The trained adapter shown here was fine-tuned with GRPO on "
560
- "a 90-task bank including 3 long-context policy-compression "
561
- "tasks (~700-token policies → ~25-token classifier prompts).\n"
562
  "- 📝 [Blog post](https://huggingface.co/spaces/rishabh16196/prompt_golf_env/blob/main/BLOG_POST.md)\n"
563
  "- 📊 [Demo CSV](https://huggingface.co/rishabh16196/prompt-golf-qwen-to-llama-nothink/blob/main/evals/qwen_to_llama_demo.csv)\n"
564
  "- 🤖 [Trained adapter](https://huggingface.co/rishabh16196/prompt-golf-qwen-to-llama-nothink)"
@@ -582,6 +686,12 @@ def build_app() -> gr.Blocks:
582
  inputs=[verbose_box, base_box, trained_box, test_input],
583
  outputs=[out_v, out_b, out_t, metrics],
584
  )
 
 
 
 
 
 
585
  app.load(select_task, inputs=[task_dd], outputs=select_outputs)
586
 
587
  return app
 
445
  return outs[0], outs[1], outs[2], metrics
446
 
447
 
448
+ def compress_and_run(description: str, budget_str: str, test_input: str):
449
+ """Custom-task tab: take a free-form task description + test input,
450
+ have the trained agent emit a compressed prompt, then run the target.
451
+ """
452
+ description = (description or "").strip()
453
+ test_input = (test_input or "").strip()
454
+ if not description:
455
+ return "", "", "", "(describe your task above)"
456
+ if not load_agents():
457
+ return "", "", "", ("agent loading disabled — set "
458
+ "DEMO_AGENT_ADAPTER to enable this tab")
459
+ try:
460
+ budget = int(budget_str)
461
+ except (ValueError, TypeError):
462
+ budget = 60
463
+
464
+ user_msg = build_user_message(
465
+ task_id="custom_task", category="custom",
466
+ description=description, budget=budget,
467
+ target_model_id=DEFAULTS["target_model"],
468
+ )
469
+ messages = [
470
+ {"role": "system", "content": SYSTEM_PROMPT},
471
+ {"role": "user", "content": user_msg},
472
+ ]
473
+ try:
474
+ chat_str = _AGENT_TOK.apply_chat_template(
475
+ messages, tokenize=False, add_generation_prompt=True,
476
+ enable_thinking=DEFAULTS["enable_thinking"],
477
+ )
478
+ except TypeError:
479
+ chat_str = _AGENT_TOK.apply_chat_template(
480
+ messages, tokenize=False, add_generation_prompt=True,
481
+ )
482
+
483
+ t0 = time.time()
484
+ raw = _agent_generate(
485
+ _AGENT_TRAINED, _AGENT_TOK, chat_str,
486
+ max_new_tokens=DEFAULTS["agent_max_new_tokens"],
487
+ )
488
+ t1 = time.time()
489
+ trained_prompt = extract_prompt(raw)
490
+ trained_tok = count_tokens(trained_prompt)
491
+
492
+ if test_input:
493
+ outs = run_target_batch([trained_prompt], test_input)
494
+ target_output = outs[0]
495
+ t2 = time.time()
496
+ msg = (
497
+ f"agent: {t1-t0:.1f}s | target: {t2-t1:.1f}s | "
498
+ f"trained prompt: {trained_tok} tok"
499
+ )
500
+ else:
501
+ target_output = "(enter a test input to run the target)"
502
+ msg = f"agent: {t1-t0:.1f}s | trained prompt: {trained_tok} tok"
503
+
504
+ return trained_prompt, str(trained_tok), target_output, msg
505
+
506
+
507
  # ---------------------------------------------------------------------------
508
  # Build app
509
  # ---------------------------------------------------------------------------
 
520
  gr.Markdown(
521
  f"# Prompt Golf — Compression Demo\n"
522
  f"Compressed prompts from a Qwen3-1.7B agent (trained via GRPO), "
523
+ f"scored against **`{DEFAULTS['target_model']}`** as the target."
 
 
 
 
 
524
  )
525
 
526
+ with gr.Tabs():
527
+ with gr.TabItem("Browse trained-vs-untrained"):
528
+ gr.Markdown(
529
+ "Tasks ordered by reward gain (top = biggest "
530
+ "improvement). Three columns: **verbose** (human-"
531
+ "written), **untrained** (raw Qwen3), and **trained** "
532
+ "(after RL fine-tuning). Pick a task, type a test "
533
+ "input, watch the target produce outputs side by side."
 
 
 
 
 
 
 
 
 
 
 
534
  )
535
  with gr.Row():
536
+ task_dd = gr.Dropdown(
537
+ choices=task_choices(),
538
+ value=initial,
539
+ label="Task",
540
+ scale=4,
541
+ )
542
+ cat = gr.Textbox(label="category", interactive=False, scale=1)
543
+ scorer = gr.Textbox(label="scorer", interactive=False, scale=1)
544
+
545
+ # Hidden state for live regen
546
+ _task_id_state = gr.Textbox(visible=False)
547
+ _budget_state = gr.Textbox(visible=False)
548
+
549
+ with gr.Row():
550
+ with gr.Column():
551
+ gr.Markdown("### Verbose (human-written)")
552
+ verbose_box = gr.Textbox(
553
+ label="prompt", lines=8, interactive=True,
554
+ )
555
+ with gr.Row():
556
+ v_tok = gr.Textbox(label="tokens", interactive=False)
557
+ v_acc = gr.Textbox(label="accuracy", interactive=False)
558
+ with gr.Column():
559
+ gr.Markdown("### Untrained agent (base)")
560
+ base_box = gr.Textbox(
561
+ label="prompt", lines=8, interactive=True,
562
+ )
563
+ with gr.Row():
564
+ b_tok = gr.Textbox(label="tokens", interactive=False)
565
+ b_acc = gr.Textbox(label="accuracy", interactive=False)
566
+ with gr.Column():
567
+ gr.Markdown("### Trained agent (compressed)")
568
+ trained_box = gr.Textbox(
569
+ label="prompt", lines=8, interactive=True,
570
+ )
571
+ with gr.Row():
572
+ t_tok = gr.Textbox(label="tokens", interactive=False)
573
+ t_acc = gr.Textbox(label="accuracy", interactive=False)
574
+
575
+ gr.Markdown("### Test input — edit to try your own")
576
  with gr.Row():
577
+ sample_dd = gr.Dropdown(
578
+ choices=[],
579
+ label="Sample test inputs from this task (click to load)",
580
+ interactive=True,
581
+ allow_custom_value=False,
582
+ scale=2,
583
+ )
584
+ test_input = gr.Textbox(
585
+ label="input",
586
+ lines=3,
587
+ placeholder=("Type or paste a test input, or pick a sample "
588
+ "from the dropdown above. The three prompts will "
589
+ "each be prepended to it before the target "
590
+ "generates."),
591
  )
592
+
593
  with gr.Row():
594
+ regen_btn = gr.Button(
595
+ "Regenerate prompts live (loads agent + LoRA)",
596
+ variant="secondary",
597
+ )
598
+ run_btn = gr.Button(
599
+ "Run target with all three prompts", variant="primary"
600
+ )
601
+ regen_status = gr.Textbox(label="agent status", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
602
 
603
+ with gr.Row():
604
+ with gr.Column():
605
+ gr.Markdown("### Target output VERBOSE")
606
+ out_v = gr.Textbox(label="output", lines=4, interactive=False)
607
+ with gr.Column():
608
+ gr.Markdown("### Target output — UNTRAINED")
609
+ out_b = gr.Textbox(label="output", lines=4, interactive=False)
610
+ with gr.Column():
611
+ gr.Markdown("### Target output — TRAINED")
612
+ out_t = gr.Textbox(label="output", lines=4, interactive=False)
613
+
614
+ metrics = gr.Textbox(label="metrics", interactive=False)
615
+
616
+ with gr.TabItem("Try a new task"):
617
+ gr.Markdown(
618
+ "Describe a brand-new task, set a token budget, and "
619
+ "(optionally) a test input. The trained agent will "
620
+ "compress your description into a short system prompt, "
621
+ "then the target runs it on your input. First click "
622
+ "loads the agent + LoRA (~6 GB)."
623
+ )
624
+ custom_desc = gr.Textbox(
625
+ label="Describe your task",
626
+ lines=4,
627
+ placeholder=("e.g. Classify the input email as urgent, "
628
+ "normal, or spam. Output one word."),
629
+ )
630
+ with gr.Row():
631
+ custom_budget = gr.Textbox(
632
+ label="Token budget", value="60", scale=1,
633
+ )
634
+ custom_input = gr.Textbox(
635
+ label="Test input (optional)", lines=2, scale=4,
636
+ placeholder="Leave blank to just see the prompt.",
637
+ )
638
+ custom_btn = gr.Button(
639
+ "Compress with trained agent + run target",
640
+ variant="primary",
641
+ )
642
+ with gr.Row():
643
+ with gr.Column(scale=2):
644
+ gr.Markdown("### Trained agent prompt")
645
+ custom_prompt_out = gr.Textbox(
646
+ label="prompt", lines=6, interactive=False,
647
+ )
648
+ custom_tok = gr.Textbox(
649
+ label="tokens", interactive=False,
650
+ )
651
+ with gr.Column(scale=2):
652
+ gr.Markdown("### Target output")
653
+ custom_target_out = gr.Textbox(
654
+ label="output", lines=6, interactive=False,
655
+ )
656
+ custom_status = gr.Textbox(label="status", interactive=False)
657
 
658
  gr.Markdown(
659
  "---\n"
 
662
  "an OpenEnv environment where the agent's *action* is a prompt "
663
  "and the *reward* is how well that prompt steers a frozen target "
664
  "LLM. The trained adapter shown here was fine-tuned with GRPO on "
665
+ "a 90-task bank.\n"
 
666
  "- 📝 [Blog post](https://huggingface.co/spaces/rishabh16196/prompt_golf_env/blob/main/BLOG_POST.md)\n"
667
  "- 📊 [Demo CSV](https://huggingface.co/rishabh16196/prompt-golf-qwen-to-llama-nothink/blob/main/evals/qwen_to_llama_demo.csv)\n"
668
  "- 🤖 [Trained adapter](https://huggingface.co/rishabh16196/prompt-golf-qwen-to-llama-nothink)"
 
686
  inputs=[verbose_box, base_box, trained_box, test_input],
687
  outputs=[out_v, out_b, out_t, metrics],
688
  )
689
+ custom_btn.click(
690
+ compress_and_run,
691
+ inputs=[custom_desc, custom_budget, custom_input],
692
+ outputs=[custom_prompt_out, custom_tok,
693
+ custom_target_out, custom_status],
694
+ )
695
  app.load(select_task, inputs=[task_dd], outputs=select_outputs)
696
 
697
  return app