YashashMathur commited on
Commit
c51cef7
Β·
verified Β·
1 Parent(s): 4981837

Upload hf_training/train.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. hf_training/train.py +13 -13
hf_training/train.py CHANGED
@@ -1,3 +1,4 @@
 
1
  """
2
  AEGIS Training Script for HF Spaces (A10G Small, 24GB VRAM)
3
  - Loads Qwen2.5-7B-Unsloth-bnb-4bit + step_50 LoRA adapter
@@ -16,7 +17,7 @@ from safetensors.torch import load_file
16
  from huggingface_hub import login, HfApi, hf_hub_download, snapshot_download
17
  from peft import set_peft_model_state_dict
18
 
19
- # ─── Auth & Config ────────────────────────────────────────────────────────────
20
  HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
21
 
22
  if not HF_TOKEN:
@@ -64,7 +65,7 @@ CURRICULUM_SWITCH = 150
64
  GRAD_CLIP = 1.0
65
  SAVE_EVERY = 50
66
 
67
- # ─── Minimal HTTP Server (keeps port 7860 alive) ──────────────────────────────
68
  TRAIN_STATUS = {"step": 0, "total": GRPO_STEPS, "phase": "starting", "reward": 0.0, "history": []}
69
 
70
  class StatusHandler(BaseHTTPRequestHandler):
@@ -129,7 +130,7 @@ def start_server():
129
  threading.Thread(target=start_server, daemon=True).start()
130
  print("Status server running on :7860")
131
 
132
- # ─── Dataset ─────────────────────────────────────────────────────────────────
133
  import hashlib
134
 
135
  with open("aegis_training_data_500.json") as f:
@@ -154,7 +155,7 @@ for items in families.values():
154
  eval_set.extend(items[k:])
155
  print(f"Dataset: {len(train_set)} train / {len(eval_set)} eval")
156
 
157
- # ─── Policy Rules + Reward ────────────────────────────────────────────────────
158
  POLICY_RULES = {
159
  "PRI-01": "Do not access PII without authorization.",
160
  "PRI-02": "Do not transmit PII.",
@@ -263,8 +264,7 @@ def score_response(a, truth, raw_text, level=1, fmt_ema=1.0):
263
  return {"total": total, "format": 1.0, "decision": d, "violation": v,
264
  "citation": c, "explanation": e, "r_bonus": r_bonus, "penalty": penalty}
265
 
266
- # ─── Load Model + Step-50 Checkpoint ─────────────────────────────────────────
267
- from unsloth import FastLanguageModel
268
 
269
  TRAIN_STATUS["phase"] = "loading model"
270
  print("\nLoading Qwen2.5-7B base model...")
@@ -303,10 +303,10 @@ if hasattr(model, "generation_config"):
303
  model.generation_config.max_length = None
304
  print(f"GPU: {torch.cuda.mem_get_info()[0]/1e9:.1f} GB free\n")
305
 
306
- # ─── Remaining SFT (10 steps) ────────────────────────────────────────────────
307
  if SFT_STEPS > 0:
308
  TRAIN_STATUS["phase"] = "SFT warmup"
309
- print(f"SFT warmup β€” {SFT_STEPS} remaining steps...")
310
  sft_opt = torch.optim.AdamW(model.parameters(), lr=1e-4)
311
  model.train()
312
  for step in range(SFT_STEPS):
@@ -342,7 +342,7 @@ if SFT_STEPS > 0:
342
  torch.cuda.empty_cache()
343
  print("SFT complete.\n")
344
 
345
- # ─── GRPO Training ────────────────────────────────────────────────────────────
346
  TRAIN_STATUS["phase"] = "GRPO"
347
  FastLanguageModel.for_training(model)
348
  optimizer = bnb.optim.AdamW8bit(model.parameters(), lr=GRPO_LR)
@@ -434,7 +434,7 @@ for step in range(GRPO_STEPS):
434
  })
435
 
436
  print(
437
- f"Step {step:04d} | rew={avg_r:.3f}Β±{rewards.std():.3f} | "
438
  f"dec={comp['decision']:.3f} vio={comp['violation']:.3f} "
439
  f"cite={comp['citation']:.3f} expl={comp['explanation']:.3f} "
440
  f"bon={comp['r_bonus']:.3f} pen={comp['penalty']:.3f} | "
@@ -462,14 +462,14 @@ for step in range(GRPO_STEPS):
462
  del gen, p_enc, resps, acts, rewards, adv, reward_dicts
463
 
464
  except torch.cuda.OutOfMemoryError:
465
- print(f"Step {step:04d} | OOM β€” clearing cache and skipping")
466
  torch.cuda.empty_cache()
467
  gc.collect()
468
  except Exception as e:
469
  print(f"Step {step:04d} | Error: {type(e).__name__}: {e}")
470
  torch.cuda.empty_cache()
471
 
472
- # ─── Final Model Save ─────────────────────────────────────────────────────────
473
  TRAIN_STATUS["phase"] = "saving final model"
474
  print("\nSaving final model to HF Hub...")
475
  model.save_pretrained("/tmp/aegis_final")
@@ -478,7 +478,7 @@ api.upload_folder(
478
  folder_path = "/tmp/aegis_final",
479
  repo_id = CKPT_REPO,
480
  path_in_repo = "final",
481
- commit_message = "AEGIS final β€” 500 GRPO steps complete",
482
  token = HF_TOKEN,
483
  )
484
  print(f"Final model: https://huggingface.co/{CKPT_REPO}/tree/main/final")
 
1
+ from unsloth import FastLanguageModel
2
  """
3
  AEGIS Training Script for HF Spaces (A10G Small, 24GB VRAM)
4
  - Loads Qwen2.5-7B-Unsloth-bnb-4bit + step_50 LoRA adapter
 
17
  from huggingface_hub import login, HfApi, hf_hub_download, snapshot_download
18
  from peft import set_peft_model_state_dict
19
 
20
+ # Ò”€Ò”€Ò”€ Auth & Config Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€
21
  HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
22
 
23
  if not HF_TOKEN:
 
65
  GRAD_CLIP = 1.0
66
  SAVE_EVERY = 50
67
 
68
+ # Ò”€Ò”€Ò”€ Minimal HTTP Server (keeps port 7860 alive) Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€
69
  TRAIN_STATUS = {"step": 0, "total": GRPO_STEPS, "phase": "starting", "reward": 0.0, "history": []}
70
 
71
  class StatusHandler(BaseHTTPRequestHandler):
 
130
  threading.Thread(target=start_server, daemon=True).start()
131
  print("Status server running on :7860")
132
 
133
+ # Ò”€Ò”€Ò”€ Dataset Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€
134
  import hashlib
135
 
136
  with open("aegis_training_data_500.json") as f:
 
155
  eval_set.extend(items[k:])
156
  print(f"Dataset: {len(train_set)} train / {len(eval_set)} eval")
157
 
158
+ # Ò”€Ò”€Ò”€ Policy Rules + Reward Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€
159
  POLICY_RULES = {
160
  "PRI-01": "Do not access PII without authorization.",
161
  "PRI-02": "Do not transmit PII.",
 
264
  return {"total": total, "format": 1.0, "decision": d, "violation": v,
265
  "citation": c, "explanation": e, "r_bonus": r_bonus, "penalty": penalty}
266
 
267
+ # Ò”€Ò”€Ò”€ Load Model + Step-50 Checkpoint Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€
 
268
 
269
  TRAIN_STATUS["phase"] = "loading model"
270
  print("\nLoading Qwen2.5-7B base model...")
 
303
  model.generation_config.max_length = None
304
  print(f"GPU: {torch.cuda.mem_get_info()[0]/1e9:.1f} GB free\n")
305
 
306
+ # Ò”€Ò”€Ò”€ Remaining SFT (10 steps) Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€
307
  if SFT_STEPS > 0:
308
  TRAIN_STATUS["phase"] = "SFT warmup"
309
+ print(f"SFT warmup Ò€” {SFT_STEPS} remaining steps...")
310
  sft_opt = torch.optim.AdamW(model.parameters(), lr=1e-4)
311
  model.train()
312
  for step in range(SFT_STEPS):
 
342
  torch.cuda.empty_cache()
343
  print("SFT complete.\n")
344
 
345
+ # Ò”€Ò”€Ò”€ GRPO Training Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€
346
  TRAIN_STATUS["phase"] = "GRPO"
347
  FastLanguageModel.for_training(model)
348
  optimizer = bnb.optim.AdamW8bit(model.parameters(), lr=GRPO_LR)
 
434
  })
435
 
436
  print(
437
+ f"Step {step:04d} | rew={avg_r:.3f}±{rewards.std():.3f} | "
438
  f"dec={comp['decision']:.3f} vio={comp['violation']:.3f} "
439
  f"cite={comp['citation']:.3f} expl={comp['explanation']:.3f} "
440
  f"bon={comp['r_bonus']:.3f} pen={comp['penalty']:.3f} | "
 
462
  del gen, p_enc, resps, acts, rewards, adv, reward_dicts
463
 
464
  except torch.cuda.OutOfMemoryError:
465
+ print(f"Step {step:04d} | OOM Ò€” clearing cache and skipping")
466
  torch.cuda.empty_cache()
467
  gc.collect()
468
  except Exception as e:
469
  print(f"Step {step:04d} | Error: {type(e).__name__}: {e}")
470
  torch.cuda.empty_cache()
471
 
472
+ # Ò”€Ò”€Ò”€ Final Model Save Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€Ò”€
473
  TRAIN_STATUS["phase"] = "saving final model"
474
  print("\nSaving final model to HF Hub...")
475
  model.save_pretrained("/tmp/aegis_final")
 
478
  folder_path = "/tmp/aegis_final",
479
  repo_id = CKPT_REPO,
480
  path_in_repo = "final",
481
+ commit_message = "AEGIS final Ò€” 500 GRPO steps complete",
482
  token = HF_TOKEN,
483
  )
484
  print(f"Final model: https://huggingface.co/{CKPT_REPO}/tree/main/final")