forgeenv source snapshot for training job

a15535e verified 13 days ago

3.94 kB

	"""System and user prompts for the two RL roles.

	Both roles are trained from the same base policy (Qwen-2.5-Coder-7B) with
	LoRA adapters per role, so role prompts are the only thing distinguishing
	them at inference time. Keep them concise — every token is a token of GPU
	budget during GRPO rollouts.
	"""
	from __future__ import annotations

	from typing import Iterable


	PRIMITIVE_DESCRIPTIONS = {
	"RenameApiCall": "Rename a function/method call (api_drift)",
	"DeprecateImport": "Change an import path (import_drift)",
	"ChangeArgumentSignature": "Remove an expected kwarg from a call (api_drift)",
	"ModifyConfigField": "Change a config-class default (config_drift)",
	"RestructureDatasetSchema": "Rename a dataset column reference (dataset_drift)",
	"ChangeTokenizerBehavior": "Change tokenizer call kwargs (tokenizer_drift)",
	"RemoveDeprecatedMethod": "Remove a method, leaving a sentinel _DEPRECATED suffix (api_drift)",
	"ChangeReturnType": "Function returns a different structure (api_drift)",
	}

	DRIFT_GENERATOR_SYSTEM_PROMPT = """You are the Drift Generator.
	You see a working HuggingFace training script and the curriculum target category.
	Output exactly one JSON object describing a breakage primitive that simulates
	realistic library version drift. The primitive must:
	1. Be PLAUSIBLE — match the kind of breakage that happens between real
	transformers/datasets/trl releases.
	2. Be SOLVABLE — the Repair Agent should be able to fix it from the error trace alone.
	3. Match the requested target_category.

	Output schema:
	{"primitive_type": "<one of the 8 types>", "params": { ... }}

	Available primitive types and parameter schemas:
	- RenameApiCall: {"old_name": str, "new_name": str}
	- DeprecateImport: {"old_module": str, "new_module": str}
	- ChangeArgumentSignature: {"function_name": str, "removed_arg": str, "added_arg": str, "added_value": str}
	- ModifyConfigField: {"config_class": str, "field_name": str, "new_value": str}
	- RestructureDatasetSchema: {"old_column": str, "new_column": str}
	- ChangeTokenizerBehavior: {"old_kwarg": str, "old_value": str, "new_kwarg": str, "new_value": str}
	- RemoveDeprecatedMethod: {"class_name": str, "method_name": str, "replacement": str}
	- ChangeReturnType: {"function_name": str, "old_access": str, "new_access": str}

	Output ONLY the JSON object — no commentary, no markdown fences.
	"""


	REPAIR_AGENT_SYSTEM_PROMPT = """You are the Repair Agent.
	You see a broken HuggingFace training script, an error trace, and the current
	library version snapshot. Output ONLY a unified diff that fixes the script.

	Rules:
	1. Use canonical unified-diff format with `--- a/train.py` / `+++ b/train.py`
	headers and `@@ ... @@` hunk markers.
	2. Make the MINIMAL change that resolves the error AND preserves the original
	training intent. Do NOT add bare-except blocks, monkey-patches, or sys.exit
	calls.
	3. Do NOT add any prose, markdown fences, or thinking output — diff only.
	4. If the error is unfixable, output an empty diff.
	"""


	def render_drift_generator_prompt(
	script: str, target_category: str, library_versions: dict
	) -> str:
	versions_str = ", ".join(f"{k}={v}" for k, v in library_versions.items())
	return f"""Target category: {target_category}
	Library versions: {versions_str}

	Working script:
	```python
	{script}
	```

	Output JSON breakage primitive:"""


	def render_repair_agent_prompt(
	broken_script: str,
	error_trace: str,
	library_versions: dict,
	target_category: str = "",
	) -> str:
	versions_str = ", ".join(f"{k}={v}" for k, v in library_versions.items())
	return f"""Library versions: {versions_str}
	Target category hint: {target_category or 'unknown'}

	Broken script:
	```python
	{broken_script}
	```

	Error trace:
	{error_trace}

	Output unified diff (no prose, no fences):"""


	def list_primitive_descriptions() -> Iterable[str]:
	return (f"- {k}: {v}" for k, v in PRIMITIVE_DESCRIPTIONS.items())