| """Fast simulation executor for development.
|
|
|
| Static-analysis-based execution simulator. Sub-100ms per call. No Docker
|
| required. The success probability of a simulated run depends on whether
|
| the script contains expected HF training markers (model imports, training
|
| calls, save calls). When the simulation succeeds, a synthetic decreasing
|
| loss curve is emitted; when it fails, a representative HF error is raised.
|
| """
|
| from __future__ import annotations
|
|
|
| import random
|
| import time
|
| from typing import Optional
|
|
|
| from forgeenv.sandbox.ast_validator import validate_script
|
| from forgeenv.tasks.models import ExecutionResult, Task
|
|
|
|
|
| class SimulationExecutor:
|
| """Simulates script execution via static analysis.
|
|
|
| Use this throughout development phases. Real Docker execution is added
|
| later for grounded final-stage verification.
|
| """
|
|
|
| def __init__(self, seed: Optional[int] = None) -> None:
|
| self._rng = random.Random(seed) if seed is not None else random
|
|
|
| def execute(
|
| self, script_content: str, task: Optional[Task] = None
|
| ) -> ExecutionResult:
|
| start = time.time()
|
|
|
| validation = validate_script(script_content)
|
| if not validation.is_valid:
|
| return ExecutionResult(
|
| exit_code=1,
|
| stdout="",
|
| stderr=f"Validation failed: {'; '.join(validation.violations)}",
|
| wall_time_ms=int((time.time() - start) * 1000),
|
| script_content=script_content,
|
| )
|
|
|
| try:
|
| compile(script_content, "<forge_script>", "exec")
|
| except SyntaxError as e:
|
| return ExecutionResult(
|
| exit_code=1,
|
| stdout="",
|
| stderr=f"SyntaxError: {e}",
|
| wall_time_ms=int((time.time() - start) * 1000),
|
| script_content=script_content,
|
| )
|
|
|
| has_model_import = any(
|
| kw in script_content
|
| for kw in ("from transformers", "import torch", "from datasets")
|
| )
|
| has_training_call = any(
|
| kw in script_content
|
| for kw in ("trainer.train()", ".fit(", "train_loop", "for epoch")
|
| )
|
| has_save = any(
|
| kw in script_content
|
| for kw in ("save_pretrained", "save_model", "torch.save")
|
| )
|
|
|
| success_prob = 0.3
|
| if has_model_import:
|
| success_prob += 0.3
|
| if has_training_call:
|
| success_prob += 0.2
|
| if has_save:
|
| success_prob += 0.1
|
|
|
|
|
|
|
|
|
|
|
| broken_markers = (
|
| "_DEPRECATED(",
|
| "transformers.legacy",
|
| "from transformers.training import",
|
| ".start_training(",
|
| "load_from_hub(",
|
| "save_to_hub(",
|
| "pad_to_max_length=",
|
| "evaluation_loop(",
|
| )
|
| if any(marker in script_content for marker in broken_markers):
|
| success_prob = 0.0
|
|
|
|
|
| import re as _re
|
|
|
| if _re.search(r"['\"]input_text['\"]\s*[]:),]", script_content):
|
| success_prob = min(success_prob, 0.05)
|
| if _re.search(r"['\"]words['\"]\s*[]:),]", script_content):
|
| success_prob = min(success_prob, 0.05)
|
|
|
| if _re.search(r"\btruncate\s*=", script_content):
|
| success_prob = min(success_prob, 0.05)
|
|
|
| succeeded = self._rng.random() < success_prob
|
|
|
| if succeeded:
|
| steps = self._rng.randint(20, 50)
|
| log_lines: list[str] = []
|
| loss = self._rng.uniform(2.0, 4.0)
|
| for step in range(1, steps + 1):
|
| loss *= self._rng.uniform(0.92, 0.99)
|
| log_lines.append(f"step={step} loss={loss:.4f}")
|
| log_lines.append("eval_accuracy=0.78")
|
| log_lines.append("TRAINING_COMPLETE")
|
|
|
| return ExecutionResult(
|
| exit_code=0,
|
| stdout="\n".join(log_lines),
|
| stderr="",
|
| wall_time_ms=int((time.time() - start) * 1000)
|
| + self._rng.randint(1000, 5000),
|
| checkpoint_exists=True,
|
| peak_memory_mb=self._rng.uniform(500, 2000),
|
| script_content=script_content,
|
| )
|
|
|
| error_types = [
|
| "ImportError: cannot import name 'OldTrainer' from 'transformers'",
|
| "AttributeError: 'Trainer' object has no attribute 'evaluate_model'",
|
| "KeyError: 'text' column not found in dataset",
|
| "TypeError: __init__() got an unexpected keyword argument 'num_epochs'",
|
| "RuntimeError: Expected input batch_size (16) to match target batch_size (32)",
|
| "ModuleNotFoundError: No module named 'transformers.legacy'",
|
| ]
|
| return ExecutionResult(
|
| exit_code=1,
|
| stdout="",
|
| stderr=self._rng.choice(error_types),
|
| wall_time_ms=int((time.time() - start) * 1000)
|
| + self._rng.randint(100, 500),
|
| script_content=script_content,
|
| )
|
|
|