File size: 2,620 Bytes
e8f2f91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
"""Tiny Gradio landing page for the OpenSleuth Colab notebook Space.

The actual training happens in the notebook (`train_opensleuth_grpo.ipynb` in
this same repo, downloadable from the Files tab). This app just renders a
clickable Open-In-Colab card so visitors can launch it in one click.
"""

from __future__ import annotations

import gradio as gr

NOTEBOOK_PATH = "train_opensleuth_grpo.ipynb"
SPACE_ID = "anugrah55/opensleuth-colab"
COLAB_URL = (
    "https://colab.research.google.com/#fileId="
    f"https%3A//huggingface.co/spaces/{SPACE_ID}/blob/main/{NOTEBOOK_PATH}"
)

LANDING_MD = f"""
# OpenSleuth — Colab quickstart

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)]({COLAB_URL})

OpenSleuth is an *Algorithmic Detective* RL environment. An LLM agent reverse-engineers an unknown black-box Python function by probing it and then submitting a Python replica. The env fuzz-tests the submission against the hidden reference (with a complexity penalty) and returns a scalar reward.

This Space hosts the **minimum reproducible Colab notebook** for training an
agent against the live env Space using **HF TRL's `GRPOTrainer`** + **bnb-4bit**
+ **LoRA** on a free-tier Colab T4. End-to-end runtime: ~15 – 25 minutes.

### One-click training

1. Click the **Open in Colab** badge above (or grab `{NOTEBOOK_PATH}` from the **Files** tab and upload it to Colab manually).
2. In Colab: `Runtime → Change runtime type → GPU → T4`.
3. `Runtime → Run all`.

### Defaults

| Knob | Value |
|------|-------|
| Model | `Qwen/Qwen2.5-0.5B-Instruct` |
| Quant | bnb-4bit (nf4 + double-quant) |
| LoRA  | r=16, alpha=32, q/k/v/o |
| Tasks | all 15 from `anugrah55/opensleuth-tasks` |
| GRPO `num_generations` | 4 |
| Epochs | 1 |

### Links

- **Env Space (REST API the notebook calls):** https://huggingface.co/spaces/anugrah55/opensleuth-env-gemini-cli
- **Training Space (full 3B retrain):** https://huggingface.co/spaces/anugrah55/opensleuth-training-gemini-cli
- **Open-ended task catalog:** https://huggingface.co/datasets/anugrah55/opensleuth-tasks
"""


def _open_colab() -> str:
    return f"Opening Colab: {COLAB_URL}"


with gr.Blocks(title="OpenSleuth — Colab quickstart") as demo:
    gr.Markdown(LANDING_MD)
    with gr.Row():
        gr.Button(
            value="Open in Google Colab",
            link=COLAB_URL,
            variant="primary",
        )
        gr.Button(
            value="View notebook in Files tab",
            link=f"https://huggingface.co/spaces/{SPACE_ID}/blob/main/{NOTEBOOK_PATH}",
        )


if __name__ == "__main__":
    demo.launch()