Spaces:

Humanlearning
/

Cyber_analyst-round1

Sleeping

App Files Files Community

Humanlearning commited on 12 days ago

Commit

e5fe6f5

1 Parent(s): 1544ce8

feat: enhance SFT training process with new tokenization method, implement custom trainer class for loss computation, and update README with GRPO launcher details for Unsloth LoRA integration

Browse files

Files changed (4) hide show

README.md +11 -4
scripts/modal_train_grpo.py +57 -2
scripts/modal_train_sft.py +57 -2
tests/test_modal_scenario_cache_static.py +8 -1

README.md CHANGED Viewed

@@ -335,13 +335,20 @@ reward metadata passes. The default SFT config trains the full dataset
 (`--max-steps -1`) with bf16/tf32, LoRA rank 32, and Modal GPU fallback
 `H200 -> H100 -> A100-80GB -> L40S`. TRL does not support packing or
 assistant-only loss for the Gemma 4 vision-language loader, so both remain
-disabled for this model. Dataset preprocessing disables multiprocessing because
-the Gemma/Unsloth config is not pickle-safe under TRL dataset workers. A warm run
-for the 300-400 episode dataset should usually finish in about 20-60 minutes;
-first image or model-cache builds can push that closer to 45-90 minutes.
 Continue GRPO from the SFT LoRA:
 ```bash
 uv run --extra modal modal run --detach scripts/modal_train_grpo.py \
   --initial-adapter-repo-id Humanlearning/CyberSecurity_OWASP-unsloth-gemma-4-e2b-it-sft-lora \

 (`--max-steps -1`) with bf16/tf32, LoRA rank 32, and Modal GPU fallback
 `H200 -> H100 -> A100-80GB -> L40S`. TRL does not support packing or
 assistant-only loss for the Gemma 4 vision-language loader, so both remain
+disabled for this model. The script pre-tokenizes the small JSONL dataset
+serially before constructing `SFTTrainer`, which avoids TRL multiprocessing
+around the Gemma/Unsloth config object. It also uses the base Transformers loss
+path to avoid a TRL entropy-metric incompatibility with Gemma 4 lazy logits. A
+warm run for the 300-400 episode dataset should usually finish in about 20-60
+minutes; first image or model-cache builds can push that closer to 45-90
+minutes.
 Continue GRPO from the SFT LoRA:
+The GRPO launcher downloads the Hub adapter, attaches a matching trainable
+Unsloth LoRA to Gemma 4, and then loads the adapter safetensors. This keeps the
+SFT handoff compatible with Gemma 4's Unsloth linear wrappers.
 ```bash
 uv run --extra modal modal run --detach scripts/modal_train_grpo.py \
   --initial-adapter-repo-id Humanlearning/CyberSecurity_OWASP-unsloth-gemma-4-e2b-it-sft-lora \

scripts/modal_train_grpo.py CHANGED Viewed

@@ -1081,11 +1081,12 @@ def train_cybersecurity_owasp_grpo(
     trace_log_every = max(0, int(trace_log_every))
     import torch
     from unsloth import FastVisionModel
     import transformers.utils.hub as transformers_hub
     from datasets import Dataset
     from huggingface_hub import snapshot_download, whoami
-    from peft import PeftModel
     from transformers import TrainerCallback
     from trl import GRPOConfig, GRPOTrainer, clone_chat_template
     try:
@@ -1869,7 +1870,61 @@ def train_cybersecurity_owasp_grpo(
         cache_volume.commit()
     if adapter_source:
         print(f"Loading initial SFT adapter for trainable GRPO continuation: {adapter_source}")
-        model = PeftModel.from_pretrained(model, adapter_source, is_trainable=True)
         if hasattr(model, "print_trainable_parameters"):
             model.print_trainable_parameters()
     else:

     trace_log_every = max(0, int(trace_log_every))
     import torch
+    from safetensors.torch import load_file as load_safetensors_file
     from unsloth import FastVisionModel
     import transformers.utils.hub as transformers_hub
     from datasets import Dataset
     from huggingface_hub import snapshot_download, whoami
+    from peft import set_peft_model_state_dict
     from transformers import TrainerCallback
     from trl import GRPOConfig, GRPOTrainer, clone_chat_template
     try:
         cache_volume.commit()
     if adapter_source:
         print(f"Loading initial SFT adapter for trainable GRPO continuation: {adapter_source}")
+        adapter_source_path = pathlib.Path(adapter_source)
+        adapter_config_path = adapter_source_path / "adapter_config.json"
+        if not adapter_config_path.exists():
+            raise RuntimeError(f"Initial SFT adapter config not found: {adapter_config_path}")
+        adapter_config = json.loads(adapter_config_path.read_text(encoding="utf-8"))
+        adapter_rank = int(adapter_config.get("r") or lora_rank)
+        adapter_alpha = int(adapter_config.get("lora_alpha") or adapter_rank * 2)
+        adapter_target_modules = adapter_config.get("target_modules") or [
+            "q_proj",
+            "k_proj",
+            "v_proj",
+            "o_proj",
+            "gate_proj",
+            "up_proj",
+            "down_proj",
+        ]
+        adapter_target_modules = list(adapter_target_modules)
+        print(
+            "Attaching Unsloth LoRA before loading SFT weights: "
+            f"rank={adapter_rank}, alpha={adapter_alpha}, targets={adapter_target_modules}"
+        )
+        model = model_api.get_peft_model(
+            model,
+            r=adapter_rank,
+            target_modules=adapter_target_modules,
+            lora_alpha=adapter_alpha,
+            use_gradient_checkpointing="unsloth",
+            random_state=3407,
+        )
+        adapter_weights_path = adapter_source_path / "adapter_model.safetensors"
+        if not adapter_weights_path.exists():
+            raise RuntimeError(f"Initial SFT adapter weights not found: {adapter_weights_path}")
+        adapter_state = load_safetensors_file(str(adapter_weights_path), device="cpu")
+        adapter_load_result = set_peft_model_state_dict(
+            model,
+            adapter_state,
+            adapter_name="default",
+        )
+        unexpected_adapter_keys = sorted(
+            key
+            for key in getattr(adapter_load_result, "unexpected_keys", [])
+            if "lora_" in key or "modules_to_save" in key
+        )
+        if unexpected_adapter_keys:
+            raise RuntimeError(
+                "Initial SFT adapter keys do not match the trainable Unsloth LoRA. "
+                f"Unexpected adapter keys: {unexpected_adapter_keys[:10]}"
+            )
+        missing_lora_keys = sorted(
+            key
+            for key in getattr(adapter_load_result, "missing_keys", [])
+            if "lora_" in key or "modules_to_save" in key
+        )
+        if missing_lora_keys:
+            print(f"Missing LoRA keys while loading SFT adapter: {missing_lora_keys[:10]}")
         if hasattr(model, "print_trainable_parameters"):
             model.print_trainable_parameters()
     else:

scripts/modal_train_sft.py CHANGED Viewed

@@ -373,8 +373,9 @@ def train_cybersecurity_owasp_sft(
 ) -> dict[str, Any]:
     import inspect
-    from datasets import load_dataset
     from huggingface_hub import snapshot_download
     from trl import SFTConfig, SFTTrainer
     try:
         from trl.chat_template_utils import add_response_schema
@@ -454,6 +455,47 @@ def train_cybersecurity_owasp_sft(
     except Exception as exc:
         print(f"Tokenizer response schema add skipped: {exc!r}")
     model = model_api.get_peft_model(
         model,
         r=lora_rank,
@@ -522,7 +564,20 @@ def train_cybersecurity_owasp_sft(
     )
     if skipped_trainer:
         print(f"Skipping unsupported SFTTrainer keys: {skipped_trainer}")
-    trainer = SFTTrainer(
         **{
             key: value
             for key, value in trainer_values.items()

 ) -> dict[str, Any]:
     import inspect
+    from datasets import Dataset, load_dataset
     from huggingface_hub import snapshot_download
+    from transformers import Trainer
     from trl import SFTConfig, SFTTrainer
     try:
         from trl.chat_template_utils import add_response_schema
     except Exception as exc:
         print(f"Tokenizer response schema add skipped: {exc!r}")
+    def _tokenize_sft_split(split_name: str, split_dataset) -> Dataset:
+        tokenized_rows: list[dict[str, list[int]]] = []
+        total_rows = len(split_dataset)
+        for row_index, example in enumerate(split_dataset, start=1):
+            messages = example["messages"]
+            if isinstance(messages, str):
+                messages = json.loads(messages)
+            rendered = tokenizer.apply_chat_template(
+                messages,
+                tokenize=False,
+                add_generation_prompt=False,
+            )
+            try:
+                encoded = tokenizer(
+                    rendered,
+                    add_special_tokens=False,
+                    truncation=True,
+                    max_length=max_seq_length,
+                )
+            except TypeError:
+                encoded = tokenizer(
+                    text=rendered,
+                    add_special_tokens=False,
+                    truncation=True,
+                    max_length=max_seq_length,
+                )
+            input_ids = encoded["input_ids"]
+            if input_ids and isinstance(input_ids[0], list):
+                input_ids = input_ids[0]
+            input_ids = [int(token_id) for token_id in input_ids[:max_seq_length]]
+            if not input_ids:
+                raise RuntimeError(f"{split_name} row {row_index} produced no tokens.")
+            tokenized_rows.append({"input_ids": input_ids, "labels": list(input_ids)})
+            if row_index % 500 == 0 or row_index == total_rows:
+                print(f"Tokenized {split_name} rows: {row_index}/{total_rows}")
+        return Dataset.from_list(tokenized_rows)
+    dataset["train"] = _tokenize_sft_split("train", dataset["train"])
+    if has_validation:
+        dataset["validation"] = _tokenize_sft_split("validation", dataset["validation"])
     model = model_api.get_peft_model(
         model,
         r=lora_rank,
     )
     if skipped_trainer:
         print(f"Skipping unsupported SFTTrainer keys: {skipped_trainer}")
+    class CyberSecurityOWASPSFTTrainer(SFTTrainer):
+        def compute_loss(
+            self,
+            model,
+            inputs,
+            return_outputs: bool = False,
+            num_items_in_batch=None,
+        ):
+            compute_loss_kwargs = {"return_outputs": return_outputs}
+            if "num_items_in_batch" in inspect.signature(Trainer.compute_loss).parameters:
+                compute_loss_kwargs["num_items_in_batch"] = num_items_in_batch
+            return Trainer.compute_loss(self, model, inputs, **compute_loss_kwargs)
+    trainer = CyberSecurityOWASPSFTTrainer(
         **{
             key: value
             for key, value in trainer_values.items()

tests/test_modal_scenario_cache_static.py CHANGED Viewed

@@ -59,6 +59,10 @@ def test_modal_sft_defaults_match_300_episode_fast_handoff_plan():
     assert '"packing": False' in source
     assert '"packing_strategy": "bfd"' not in source
     assert '"dataset_num_proc": None' in source
     assert '"bf16": True' in source
     assert '"tf32": True' in source
     assert '"hub_strategy": "every_save"' in source
@@ -74,4 +78,7 @@ def test_modal_grpo_loads_sft_adapter_from_hub_as_trainable_lora():
     assert "initial_adapter_repo_id" in source
     assert "Downloading initial SFT adapter" in source
     assert "snapshot_download(" in source
-    assert "PeftModel.from_pretrained(model, adapter_source, is_trainable=True)" in source

     assert '"packing": False' in source
     assert '"packing_strategy": "bfd"' not in source
     assert '"dataset_num_proc": None' in source
+    assert "Dataset.from_list(tokenized_rows)" in source
+    assert "tokenizer.apply_chat_template" in source
+    assert "class CyberSecurityOWASPSFTTrainer(SFTTrainer)" in source
+    assert "Trainer.compute_loss(self, model, inputs" in source
     assert '"bf16": True' in source
     assert '"tf32": True' in source
     assert '"hub_strategy": "every_save"' in source
     assert "initial_adapter_repo_id" in source
     assert "Downloading initial SFT adapter" in source
     assert "snapshot_download(" in source
+    assert "Attaching Unsloth LoRA before loading SFT weights" in source
+    assert "load_safetensors_file(str(adapter_weights_path), device=\"cpu\")" in source
+    assert "set_peft_model_state_dict(" in source
+    assert "unexpected_adapter_keys" in source