Spaces:

Pratyush-01
/

physix-live

Sleeping

App Files Files Community

Pratyush-01 commited on 29 days ago

Commit

0b8f87b

verified ·

1 Parent(s): c59b8f5

cleanup: strip verbose comments from physix/training/sft.py

Browse files

Files changed (1) hide show

physix/training/sft.py +2 -20

physix/training/sft.py CHANGED Viewed

@@ -48,22 +48,10 @@ from physix.models import DEFAULT_MAX_TURNS, PhysiXObservation
 _log = logging.getLogger(__name__)
-# ─── Dataset ──────────────────────────────────────────────────────────────────
 def _gt_completion(system: PhysicalSystem) -> str:
-    """Build the ground-truth completion JSON for one system.
-    We include the system's sampled parameters so the model learns that the
-    ``params`` field must contain the symbols it references in the equation.
-    The SFT target is the *exact* JSON string the env's verifier accepts;
-    GRPO will later teach the model to refine parameter values per trajectory.
-    """
     import re as _re
     eq = system.ground_truth_equation()
-    # Extract all identifier tokens that appear in the equation, then keep
-    # only those that are declared as system parameters. We use a proper
-    # identifier regex (not split-on-whitespace) so symbols inside function
-    # calls like sin(theta) and fractions like -(g/L) are caught.
     reserved = set(system.state_variables) | {"dt", "d", "t", "sin", "cos",
                                                "tan", "exp", "log", "sqrt", "abs"}
     eq_tokens = set(_re.findall(r'\b([A-Za-z_][A-Za-z0-9_]*)\b', eq))
@@ -130,8 +118,6 @@ def _build_obs(system: PhysicalSystem, trajectory: TrajectoryData) -> PhysiXObse
     )
-# ─── Training ─────────────────────────────────────────────────────────────────
 def train_sft(
     model_name: str = "Qwen/Qwen2.5-1.5B-Instruct",
     output_dir: str = "runs/physix-1.5b-sft",
@@ -151,15 +137,11 @@ def train_sft(
 ) -> None:
     _configure_logging()
-    # Heavy imports: only available in [train] env.
     import wandb
     from unsloth import FastLanguageModel
     from trl import SFTTrainer, SFTConfig
-    # Force a fresh W&B run for SFT regardless of any inherited WANDB_RUN_ID
-    # / WANDB_RESUME env vars (those are intended for the GRPO stage). If we
-    # let wandb.init() try to resume a foreign run id it will block for ~90s
-    # fetching that run's history before giving up.
     for stale in ("WANDB_RUN_ID", "WANDB_RESUME"):
         os.environ.pop(stale, None)

 _log = logging.getLogger(__name__)
 def _gt_completion(system: PhysicalSystem) -> str:
+    """Return the ground-truth completion JSON for one system."""
     import re as _re
     eq = system.ground_truth_equation()
     reserved = set(system.state_variables) | {"dt", "d", "t", "sin", "cos",
                                                "tan", "exp", "log", "sqrt", "abs"}
     eq_tokens = set(_re.findall(r'\b([A-Za-z_][A-Za-z0-9_]*)\b', eq))
     )
 def train_sft(
     model_name: str = "Qwen/Qwen2.5-1.5B-Instruct",
     output_dir: str = "runs/physix-1.5b-sft",
 ) -> None:
     _configure_logging()
     import wandb
     from unsloth import FastLanguageModel
     from trl import SFTTrainer, SFTConfig
+    # Clear stale resume vars so SFT starts a fresh W&B run.
     for stale in ("WANDB_RUN_ID", "WANDB_RESUME"):
         os.environ.pop(stale, None)