File size: 3,828 Bytes

a15535e

"""Pre-create the trained-model repo on the Hub with a placeholder card.

After GRPO training in the Colab notebook, the same repo just receives the
LoRA `adapter_config.json` + `adapter_model.safetensors` and the curated
`repair_library.json` — no extra setup needed there.
"""
from __future__ import annotations

import os
import sys
from pathlib import Path
from textwrap import dedent

from huggingface_hub import HfApi


REPO_ID = os.environ.get("MODEL_REPO", "akhiilll/forgeenv-repair-agent")
TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
if not TOKEN:
    sys.exit("set HF_TOKEN or run `huggingface-cli login` first.")


CARD = dedent(
    """\
    ---
    license: apache-2.0
    base_model: Qwen/Qwen2.5-3B-Instruct
    library_name: peft
    pipeline_tag: text-generation
    tags:
      - openenv
      - self-improvement
      - code-repair
      - schema-drift
      - reinforcement-learning
      - huggingface
      - lora
    ---

    # ForgeEnv Repair Agent (LoRA)

    > **Status: training-in-progress.** The LoRA adapter weights and
    > `repair_library.json` will be pushed here once the Colab training
    > notebook finishes warm-start SFT + GRPO. The repo is created up
    > front so all the project links resolve.

    A LoRA adapter on top of
    [`Qwen/Qwen2.5-3B-Instruct`](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct),
    trained inside [`akhiilll/forgeenv`](https://huggingface.co/spaces/akhiilll/forgeenv) —
    a self-improving OpenEnv environment for HuggingFace ecosystem repair
    under library version drift. Training pipeline: warm-start SFT (1k
    steps) + GRPO (TRL + Unsloth) with R-Zero-style Challenger / Solver
    co-evolution.

    ## Files (after training pushes)

    | File                          | Purpose                                  |
    | ----------------------------- | ---------------------------------------- |
    | `adapter_config.json`         | LoRA adapter configuration               |
    | `adapter_model.safetensors`   | LoRA adapter weights                     |
    | `tokenizer*`                  | Tokenizer files (Qwen2.5)                |
    | `repair_library.json`         | Curated successful repair patterns       |

    ## Usage (post-training)

    ```python
    from peft import PeftModel
    from transformers import AutoModelForCausalLM, AutoTokenizer

    base = AutoModelForCausalLM.from_pretrained(
        "Qwen/Qwen2.5-3B-Instruct", torch_dtype="auto", device_map="auto"
    )
    model = PeftModel.from_pretrained(base, "akhiilll/forgeenv-repair-agent")
    tok = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B-Instruct")
    ```

    ## Live demo

    Try it in a browser at
    [`akhiilll/forgeenv-demo`](https://huggingface.co/spaces/akhiilll/forgeenv-demo)
    (Gradio + ZeroGPU).

    ## Citations

    - Huang et al., *R-Zero: Self-Evolving Reasoning LLM From Zero Data* (2025)
    - Zhao et al., *Absolute Zero: Reinforced Self-play Reasoning with Zero Data* (2025)
    - Liu et al., *SPIRAL: Self-Play on Zero-Sum Games* (2025)
    """
)


def main() -> None:
    api = HfApi()
    api.create_repo(
        repo_id=REPO_ID,
        repo_type="model",
        token=TOKEN,
        exist_ok=True,
        private=False,
    )
    print(f"[bootstrap] repo ready: {REPO_ID}")

    tmp = Path(".model_card_tmp.md")
    tmp.write_text(CARD, encoding="utf-8")
    try:
        api.upload_file(
            path_or_fileobj=str(tmp),
            path_in_repo="README.md",
            repo_id=REPO_ID,
            repo_type="model",
            token=TOKEN,
            commit_message="Initial placeholder model card",
        )
    finally:
        tmp.unlink(missing_ok=True)
    print(f"[bootstrap] model card uploaded: https://huggingface.co/{REPO_ID}")


if __name__ == "__main__":
    main()