| """Pre-create the trained-model repo on the Hub with a placeholder card. | |
| After GRPO training in the Colab notebook, the same repo just receives the | |
| LoRA `adapter_config.json` + `adapter_model.safetensors` and the curated | |
| `repair_library.json` — no extra setup needed there. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import sys | |
| from pathlib import Path | |
| from textwrap import dedent | |
| from huggingface_hub import HfApi | |
| REPO_ID = os.environ.get("MODEL_REPO", "akhiilll/forgeenv-repair-agent") | |
| TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") | |
| if not TOKEN: | |
| sys.exit("set HF_TOKEN or run `huggingface-cli login` first.") | |
| CARD = dedent( | |
| """\ | |
| --- | |
| license: apache-2.0 | |
| base_model: Qwen/Qwen2.5-3B-Instruct | |
| library_name: peft | |
| pipeline_tag: text-generation | |
| tags: | |
| - openenv | |
| - self-improvement | |
| - code-repair | |
| - schema-drift | |
| - reinforcement-learning | |
| - huggingface | |
| - lora | |
| --- | |
| # ForgeEnv Repair Agent (LoRA) | |
| > **Status: training-in-progress.** The LoRA adapter weights and | |
| > `repair_library.json` will be pushed here once the Colab training | |
| > notebook finishes warm-start SFT + GRPO. The repo is created up | |
| > front so all the project links resolve. | |
| A LoRA adapter on top of | |
| [`Qwen/Qwen2.5-3B-Instruct`](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct), | |
| trained inside [`akhiilll/forgeenv`](https://huggingface.co/spaces/akhiilll/forgeenv) — | |
| a self-improving OpenEnv environment for HuggingFace ecosystem repair | |
| under library version drift. Training pipeline: warm-start SFT (1k | |
| steps) + GRPO (TRL + Unsloth) with R-Zero-style Challenger / Solver | |
| co-evolution. | |
| ## Files (after training pushes) | |
| | File | Purpose | | |
| | ----------------------------- | ---------------------------------------- | | |
| | `adapter_config.json` | LoRA adapter configuration | | |
| | `adapter_model.safetensors` | LoRA adapter weights | | |
| | `tokenizer*` | Tokenizer files (Qwen2.5) | | |
| | `repair_library.json` | Curated successful repair patterns | | |
| ## Usage (post-training) | |
| ```python | |
| from peft import PeftModel | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| base = AutoModelForCausalLM.from_pretrained( | |
| "Qwen/Qwen2.5-3B-Instruct", torch_dtype="auto", device_map="auto" | |
| ) | |
| model = PeftModel.from_pretrained(base, "akhiilll/forgeenv-repair-agent") | |
| tok = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B-Instruct") | |
| ``` | |
| ## Live demo | |
| Try it in a browser at | |
| [`akhiilll/forgeenv-demo`](https://huggingface.co/spaces/akhiilll/forgeenv-demo) | |
| (Gradio + ZeroGPU). | |
| ## Citations | |
| - Huang et al., *R-Zero: Self-Evolving Reasoning LLM From Zero Data* (2025) | |
| - Zhao et al., *Absolute Zero: Reinforced Self-play Reasoning with Zero Data* (2025) | |
| - Liu et al., *SPIRAL: Self-Play on Zero-Sum Games* (2025) | |
| """ | |
| ) | |
| def main() -> None: | |
| api = HfApi() | |
| api.create_repo( | |
| repo_id=REPO_ID, | |
| repo_type="model", | |
| token=TOKEN, | |
| exist_ok=True, | |
| private=False, | |
| ) | |
| print(f"[bootstrap] repo ready: {REPO_ID}") | |
| tmp = Path(".model_card_tmp.md") | |
| tmp.write_text(CARD, encoding="utf-8") | |
| try: | |
| api.upload_file( | |
| path_or_fileobj=str(tmp), | |
| path_in_repo="README.md", | |
| repo_id=REPO_ID, | |
| repo_type="model", | |
| token=TOKEN, | |
| commit_message="Initial placeholder model card", | |
| ) | |
| finally: | |
| tmp.unlink(missing_ok=True) | |
| print(f"[bootstrap] model card uploaded: https://huggingface.co/{REPO_ID}") | |
| if __name__ == "__main__": | |
| main() | |