| """Pre-create the trained-model repo on the Hub with a placeholder card. |
| |
| After GRPO training in the Colab notebook, the same repo just receives the |
| LoRA `adapter_config.json` + `adapter_model.safetensors` and the curated |
| `repair_library.json` — no extra setup needed there. |
| """ |
| from __future__ import annotations |
|
|
| import os |
| import sys |
| from pathlib import Path |
| from textwrap import dedent |
|
|
| from huggingface_hub import HfApi |
|
|
|
|
| REPO_ID = os.environ.get("MODEL_REPO", "akhiilll/forgeenv-repair-agent") |
| TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") |
| if not TOKEN: |
| sys.exit("set HF_TOKEN or run `huggingface-cli login` first.") |
|
|
|
|
| CARD = dedent( |
| """\ |
| --- |
| license: apache-2.0 |
| base_model: Qwen/Qwen2.5-3B-Instruct |
| library_name: peft |
| pipeline_tag: text-generation |
| tags: |
| - openenv |
| - self-improvement |
| - code-repair |
| - schema-drift |
| - reinforcement-learning |
| - huggingface |
| - lora |
| --- |
| |
| # ForgeEnv Repair Agent (LoRA) |
| |
| > **Status: training-in-progress.** The LoRA adapter weights and |
| > `repair_library.json` will be pushed here once the Colab training |
| > notebook finishes warm-start SFT + GRPO. The repo is created up |
| > front so all the project links resolve. |
| |
| A LoRA adapter on top of |
| [`Qwen/Qwen2.5-3B-Instruct`](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct), |
| trained inside [`akhiilll/forgeenv`](https://huggingface.co/spaces/akhiilll/forgeenv) — |
| a self-improving OpenEnv environment for HuggingFace ecosystem repair |
| under library version drift. Training pipeline: warm-start SFT (1k |
| steps) + GRPO (TRL + Unsloth) with R-Zero-style Challenger / Solver |
| co-evolution. |
| |
| ## Files (after training pushes) |
| |
| | File | Purpose | |
| | ----------------------------- | ---------------------------------------- | |
| | `adapter_config.json` | LoRA adapter configuration | |
| | `adapter_model.safetensors` | LoRA adapter weights | |
| | `tokenizer*` | Tokenizer files (Qwen2.5) | |
| | `repair_library.json` | Curated successful repair patterns | |
| |
| ## Usage (post-training) |
| |
| ```python |
| from peft import PeftModel |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| |
| base = AutoModelForCausalLM.from_pretrained( |
| "Qwen/Qwen2.5-3B-Instruct", torch_dtype="auto", device_map="auto" |
| ) |
| model = PeftModel.from_pretrained(base, "akhiilll/forgeenv-repair-agent") |
| tok = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B-Instruct") |
| ``` |
| |
| ## Live demo |
| |
| Try it in a browser at |
| [`akhiilll/forgeenv-demo`](https://huggingface.co/spaces/akhiilll/forgeenv-demo) |
| (Gradio + ZeroGPU). |
| |
| ## Citations |
| |
| - Huang et al., *R-Zero: Self-Evolving Reasoning LLM From Zero Data* (2025) |
| - Zhao et al., *Absolute Zero: Reinforced Self-play Reasoning with Zero Data* (2025) |
| - Liu et al., *SPIRAL: Self-Play on Zero-Sum Games* (2025) |
| """ |
| ) |
|
|
|
|
| def main() -> None: |
| api = HfApi() |
| api.create_repo( |
| repo_id=REPO_ID, |
| repo_type="model", |
| token=TOKEN, |
| exist_ok=True, |
| private=False, |
| ) |
| print(f"[bootstrap] repo ready: {REPO_ID}") |
|
|
| tmp = Path(".model_card_tmp.md") |
| tmp.write_text(CARD, encoding="utf-8") |
| try: |
| api.upload_file( |
| path_or_fileobj=str(tmp), |
| path_in_repo="README.md", |
| repo_id=REPO_ID, |
| repo_type="model", |
| token=TOKEN, |
| commit_message="Initial placeholder model card", |
| ) |
| finally: |
| tmp.unlink(missing_ok=True) |
| print(f"[bootstrap] model card uploaded: https://huggingface.co/{REPO_ID}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|