File size: 3,948 Bytes

b0fbec3

"""Pre-create the trained-model repo on the Hub with a placeholder card.



After GRPO training in the Colab notebook, the same repo just receives the

LoRA `adapter_config.json` + `adapter_model.safetensors` and the curated

`repair_library.json` — no extra setup needed there.

"""
from __future__ import annotations

import os
import sys
from pathlib import Path
from textwrap import dedent

from huggingface_hub import HfApi


REPO_ID = os.environ.get("MODEL_REPO", "akhiilll/forgeenv-repair-agent")
TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
if not TOKEN:
    sys.exit("set HF_TOKEN or run `huggingface-cli login` first.")


CARD = dedent(
    """\

    ---

    license: apache-2.0

    base_model: Qwen/Qwen2.5-3B-Instruct

    library_name: peft

    pipeline_tag: text-generation

    tags:

      - openenv

      - self-improvement

      - code-repair

      - schema-drift

      - reinforcement-learning

      - huggingface

      - lora

    ---



    # ForgeEnv Repair Agent (LoRA)



    > **Status: training-in-progress.** The LoRA adapter weights and

    > `repair_library.json` will be pushed here once the Colab training

    > notebook finishes warm-start SFT + GRPO. The repo is created up

    > front so all the project links resolve.



    A LoRA adapter on top of

    [`Qwen/Qwen2.5-3B-Instruct`](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct),

    trained inside [`akhiilll/forgeenv`](https://huggingface.co/spaces/akhiilll/forgeenv) —

    a self-improving OpenEnv environment for HuggingFace ecosystem repair

    under library version drift. Training pipeline: warm-start SFT (1k

    steps) + GRPO (TRL + Unsloth) with R-Zero-style Challenger / Solver

    co-evolution.



    ## Files (after training pushes)



    | File                          | Purpose                                  |

    | ----------------------------- | ---------------------------------------- |

    | `adapter_config.json`         | LoRA adapter configuration               |

    | `adapter_model.safetensors`   | LoRA adapter weights                     |

    | `tokenizer*`                  | Tokenizer files (Qwen2.5)                |

    | `repair_library.json`         | Curated successful repair patterns       |



    ## Usage (post-training)



    ```python

    from peft import PeftModel

    from transformers import AutoModelForCausalLM, AutoTokenizer



    base = AutoModelForCausalLM.from_pretrained(

        "Qwen/Qwen2.5-3B-Instruct", torch_dtype="auto", device_map="auto"

    )

    model = PeftModel.from_pretrained(base, "akhiilll/forgeenv-repair-agent")

    tok = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B-Instruct")

    ```



    ## Live demo



    Try it in a browser at

    [`akhiilll/forgeenv-demo`](https://huggingface.co/spaces/akhiilll/forgeenv-demo)

    (Gradio + ZeroGPU).



    ## Citations



    - Huang et al., *R-Zero: Self-Evolving Reasoning LLM From Zero Data* (2025)

    - Zhao et al., *Absolute Zero: Reinforced Self-play Reasoning with Zero Data* (2025)

    - Liu et al., *SPIRAL: Self-Play on Zero-Sum Games* (2025)

    """
)


def main() -> None:
    api = HfApi()
    api.create_repo(
        repo_id=REPO_ID,
        repo_type="model",
        token=TOKEN,
        exist_ok=True,
        private=False,
    )
    print(f"[bootstrap] repo ready: {REPO_ID}")

    tmp = Path(".model_card_tmp.md")
    tmp.write_text(CARD, encoding="utf-8")
    try:
        api.upload_file(
            path_or_fileobj=str(tmp),
            path_in_repo="README.md",
            repo_id=REPO_ID,
            repo_type="model",
            token=TOKEN,
            commit_message="Initial placeholder model card",
        )
    finally:
        tmp.unlink(missing_ok=True)
    print(f"[bootstrap] model card uploaded: https://huggingface.co/{REPO_ID}")


if __name__ == "__main__":
    main()