Spaces:

Humanlearning
/

Cyber_analyst-round1

Sleeping

Humanlearning commited on 12 days ago

Commit

1544ce8

1 Parent(s): 60f97ab

fix: update README with SFT training configuration details, modify modal training scripts to disable assistant-only loss and packing for compatibility, and adjust test assertions to reflect these changes

Files changed (4) hide show

README.md CHANGED Viewed

@@ -331,11 +331,14 @@ uv run --extra modal modal run --detach scripts/modal_train_sft.py \
 `scripts/modal_train_sft.py` re-checks the JSONL reward metadata locally before
 upload and again inside Modal before loading the model. It refuses to start SFT
 unless all required curriculum difficulties are represented and the verifier
-reward metadata passes. The default SFT config trains one full epoch
-(`--max-steps -1`) with packed assistant-only loss, bf16/tf32, LoRA rank 32,
-and Modal GPU fallback `H200 -> H100 -> A100-80GB -> L40S`. A warm run for the
-300-episode dataset should usually finish in about 15-45 minutes; first image
-or model-cache builds can push that closer to 35-75 minutes.
 Continue GRPO from the SFT LoRA:

 `scripts/modal_train_sft.py` re-checks the JSONL reward metadata locally before
 upload and again inside Modal before loading the model. It refuses to start SFT
 unless all required curriculum difficulties are represented and the verifier
+reward metadata passes. The default SFT config trains the full dataset
+(`--max-steps -1`) with bf16/tf32, LoRA rank 32, and Modal GPU fallback
+`H200 -> H100 -> A100-80GB -> L40S`. TRL does not support packing or
+assistant-only loss for the Gemma 4 vision-language loader, so both remain
+disabled for this model. Dataset preprocessing disables multiprocessing because
+the Gemma/Unsloth config is not pickle-safe under TRL dataset workers. A warm run
+for the 300-400 episode dataset should usually finish in about 20-60 minutes;
+first image or model-cache builds can push that closer to 45-90 minutes.
 Continue GRPO from the SFT LoRA:

scripts/modal_train_grpo.py CHANGED Viewed

@@ -1088,7 +1088,11 @@ def train_cybersecurity_owasp_grpo(
     from peft import PeftModel
     from transformers import TrainerCallback
     from trl import GRPOConfig, GRPOTrainer, clone_chat_template
-    from trl.chat_template_utils import add_response_schema
     import trackio

     from peft import PeftModel
     from transformers import TrainerCallback
     from trl import GRPOConfig, GRPOTrainer, clone_chat_template
+    try:
+        from trl.chat_template_utils import add_response_schema
+    except ImportError:
+        def add_response_schema(tokenizer):
+            return tokenizer
     import trackio

scripts/modal_train_sft.py CHANGED Viewed

@@ -376,7 +376,11 @@ def train_cybersecurity_owasp_sft(
     from datasets import load_dataset
     from huggingface_hub import snapshot_download
     from trl import SFTConfig, SFTTrainer
-    from trl.chat_template_utils import add_response_schema
     from unsloth import FastVisionModel
     model_name = _ensure_gemma4_model(model_name)
@@ -478,6 +482,7 @@ def train_cybersecurity_owasp_sft(
         "gradient_accumulation_steps": gradient_accumulation_steps,
         "learning_rate": learning_rate,
         "optim": "adamw_8bit",
         "logging_steps": 1,
         "logging_first_step": True,
         "save_steps": max(10, max_steps) if max_steps > 0 else 100,
@@ -485,9 +490,8 @@ def train_cybersecurity_owasp_sft(
         "project": trackio_project,
         "trackio_space_id": trackio_space_id,
         "run_name": run_name,
-        "assistant_only_loss": True,
-        "packing": True,
-        "packing_strategy": "bfd",
         "bf16": True,
         "tf32": True,
         "gradient_checkpointing": True,

     from datasets import load_dataset
     from huggingface_hub import snapshot_download
     from trl import SFTConfig, SFTTrainer
+    try:
+        from trl.chat_template_utils import add_response_schema
+    except ImportError:
+        def add_response_schema(tokenizer):
+            return tokenizer
     from unsloth import FastVisionModel
     model_name = _ensure_gemma4_model(model_name)
         "gradient_accumulation_steps": gradient_accumulation_steps,
         "learning_rate": learning_rate,
         "optim": "adamw_8bit",
+        "dataset_num_proc": None,
         "logging_steps": 1,
         "logging_first_step": True,
         "save_steps": max(10, max_steps) if max_steps > 0 else 100,
         "project": trackio_project,
         "trackio_space_id": trackio_space_id,
         "run_name": run_name,
+        "assistant_only_loss": False,
+        "packing": False,
         "bf16": True,
         "tf32": True,
         "gradient_checkpointing": True,

tests/test_modal_scenario_cache_static.py CHANGED Viewed

@@ -55,9 +55,10 @@ def test_modal_sft_defaults_match_300_episode_fast_handoff_plan():
     assert source.count("max_steps: int = -1") >= 2
     assert source.count("per_device_train_batch_size: int = 4") >= 2
     assert source.count("gradient_accumulation_steps: int = 4") >= 2
-    assert '"assistant_only_loss": True' in source
-    assert '"packing": True' in source
-    assert '"packing_strategy": "bfd"' in source
     assert '"bf16": True' in source
     assert '"tf32": True' in source
     assert '"hub_strategy": "every_save"' in source

     assert source.count("max_steps: int = -1") >= 2
     assert source.count("per_device_train_batch_size: int = 4") >= 2
     assert source.count("gradient_accumulation_steps: int = 4") >= 2
+    assert '"assistant_only_loss": False' in source
+    assert '"packing": False' in source
+    assert '"packing_strategy": "bfd"' not in source
+    assert '"dataset_num_proc": None' in source
     assert '"bf16": True' in source
     assert '"tf32": True' in source
     assert '"hub_strategy": "every_save"' in source