forgeenv-source / scripts /bootstrap_model_repo.py

forgeenv source snapshot for training job

a15535e verified 13 days ago

3.83 kB

	"""Pre-create the trained-model repo on the Hub with a placeholder card.

	After GRPO training in the Colab notebook, the same repo just receives the
	LoRA `adapter_config.json` + `adapter_model.safetensors` and the curated
	`repair_library.json` — no extra setup needed there.
	"""
	from __future__ import annotations

	import os
	import sys
	from pathlib import Path
	from textwrap import dedent

	from huggingface_hub import HfApi


	REPO_ID = os.environ.get("MODEL_REPO", "akhiilll/forgeenv-repair-agent")
	TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
	if not TOKEN:
	sys.exit("set HF_TOKEN or run `huggingface-cli login` first.")


	CARD = dedent(
	"""\
	---
	license: apache-2.0
	base_model: Qwen/Qwen2.5-3B-Instruct
	library_name: peft
	pipeline_tag: text-generation
	tags:
	- openenv
	- self-improvement
	- code-repair
	- schema-drift
	- reinforcement-learning
	- huggingface
	- lora
	---

	# ForgeEnv Repair Agent (LoRA)

	> Status: training-in-progress. The LoRA adapter weights and
	> `repair_library.json` will be pushed here once the Colab training
	> notebook finishes warm-start SFT + GRPO. The repo is created up
	> front so all the project links resolve.

	A LoRA adapter on top of
	[`Qwen/Qwen2.5-3B-Instruct`](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct),
	trained inside [`akhiilll/forgeenv`](https://huggingface.co/spaces/akhiilll/forgeenv) —
	a self-improving OpenEnv environment for HuggingFace ecosystem repair
	under library version drift. Training pipeline: warm-start SFT (1k
	steps) + GRPO (TRL + Unsloth) with R-Zero-style Challenger / Solver
	co-evolution.

	## Files (after training pushes)

	\| File \| Purpose \|
	\| ----------------------------- \| ---------------------------------------- \|
	\| `adapter_config.json` \| LoRA adapter configuration \|
	\| `adapter_model.safetensors` \| LoRA adapter weights \|
	\| `tokenizer*` \| Tokenizer files (Qwen2.5) \|
	\| `repair_library.json` \| Curated successful repair patterns \|

	## Usage (post-training)

	```python
	from peft import PeftModel
	from transformers import AutoModelForCausalLM, AutoTokenizer

	base = AutoModelForCausalLM.from_pretrained(
	"Qwen/Qwen2.5-3B-Instruct", torch_dtype="auto", device_map="auto"
	)
	model = PeftModel.from_pretrained(base, "akhiilll/forgeenv-repair-agent")
	tok = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B-Instruct")
	```

	## Live demo

	Try it in a browser at
	[`akhiilll/forgeenv-demo`](https://huggingface.co/spaces/akhiilll/forgeenv-demo)
	(Gradio + ZeroGPU).

	## Citations

	- Huang et al., R-Zero: Self-Evolving Reasoning LLM From Zero Data (2025)
	- Zhao et al., Absolute Zero: Reinforced Self-play Reasoning with Zero Data (2025)
	- Liu et al., SPIRAL: Self-Play on Zero-Sum Games (2025)
	"""
	)


	def main() -> None:
	api = HfApi()
	api.create_repo(
	repo_id=REPO_ID,
	repo_type="model",
	token=TOKEN,
	exist_ok=True,
	private=False,
	)
	print(f"[bootstrap] repo ready: {REPO_ID}")

	tmp = Path(".model_card_tmp.md")
	tmp.write_text(CARD, encoding="utf-8")
	try:
	api.upload_file(
	path_or_fileobj=str(tmp),
	path_in_repo="README.md",
	repo_id=REPO_ID,
	repo_type="model",
	token=TOKEN,
	commit_message="Initial placeholder model card",
	)
	finally:
	tmp.unlink(missing_ok=True)
	print(f"[bootstrap] model card uploaded: https://huggingface.co/{REPO_ID}")


	if __name__ == "__main__":
	main()