"""Pre-create the trained-model repo on the Hub with a placeholder card. After GRPO training in the Colab notebook, the same repo just receives the LoRA `adapter_config.json` + `adapter_model.safetensors` and the curated `repair_library.json` — no extra setup needed there. """ from __future__ import annotations import os import sys from pathlib import Path from textwrap import dedent from huggingface_hub import HfApi REPO_ID = os.environ.get("MODEL_REPO", "akhiilll/forgeenv-repair-agent") TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") if not TOKEN: sys.exit("set HF_TOKEN or run `huggingface-cli login` first.") CARD = dedent( """\ --- license: apache-2.0 base_model: Qwen/Qwen2.5-3B-Instruct library_name: peft pipeline_tag: text-generation tags: - openenv - self-improvement - code-repair - schema-drift - reinforcement-learning - huggingface - lora --- # ForgeEnv Repair Agent (LoRA) > **Status: training-in-progress.** The LoRA adapter weights and > `repair_library.json` will be pushed here once the Colab training > notebook finishes warm-start SFT + GRPO. The repo is created up > front so all the project links resolve. A LoRA adapter on top of [`Qwen/Qwen2.5-3B-Instruct`](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct), trained inside [`akhiilll/forgeenv`](https://huggingface.co/spaces/akhiilll/forgeenv) — a self-improving OpenEnv environment for HuggingFace ecosystem repair under library version drift. Training pipeline: warm-start SFT (1k steps) + GRPO (TRL + Unsloth) with R-Zero-style Challenger / Solver co-evolution. ## Files (after training pushes) | File | Purpose | | ----------------------------- | ---------------------------------------- | | `adapter_config.json` | LoRA adapter configuration | | `adapter_model.safetensors` | LoRA adapter weights | | `tokenizer*` | Tokenizer files (Qwen2.5) | | `repair_library.json` | Curated successful repair patterns | ## Usage (post-training) ```python from peft import PeftModel from transformers import AutoModelForCausalLM, AutoTokenizer base = AutoModelForCausalLM.from_pretrained( "Qwen/Qwen2.5-3B-Instruct", torch_dtype="auto", device_map="auto" ) model = PeftModel.from_pretrained(base, "akhiilll/forgeenv-repair-agent") tok = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B-Instruct") ``` ## Live demo Try it in a browser at [`akhiilll/forgeenv-demo`](https://huggingface.co/spaces/akhiilll/forgeenv-demo) (Gradio + ZeroGPU). ## Citations - Huang et al., *R-Zero: Self-Evolving Reasoning LLM From Zero Data* (2025) - Zhao et al., *Absolute Zero: Reinforced Self-play Reasoning with Zero Data* (2025) - Liu et al., *SPIRAL: Self-Play on Zero-Sum Games* (2025) """ ) def main() -> None: api = HfApi() api.create_repo( repo_id=REPO_ID, repo_type="model", token=TOKEN, exist_ok=True, private=False, ) print(f"[bootstrap] repo ready: {REPO_ID}") tmp = Path(".model_card_tmp.md") tmp.write_text(CARD, encoding="utf-8") try: api.upload_file( path_or_fileobj=str(tmp), path_in_repo="README.md", repo_id=REPO_ID, repo_type="model", token=TOKEN, commit_message="Initial placeholder model card", ) finally: tmp.unlink(missing_ok=True) print(f"[bootstrap] model card uploaded: https://huggingface.co/{REPO_ID}") if __name__ == "__main__": main()