Spaces:
Runtime error
Runtime error
Upload hf_training/train.py with huggingface_hub
Browse files- hf_training/train.py +13 -13
hf_training/train.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
"""
|
| 2 |
AEGIS Training Script for HF Spaces (A10G Small, 24GB VRAM)
|
| 3 |
- Loads Qwen2.5-7B-Unsloth-bnb-4bit + step_50 LoRA adapter
|
|
@@ -16,7 +17,7 @@ from safetensors.torch import load_file
|
|
| 16 |
from huggingface_hub import login, HfApi, hf_hub_download, snapshot_download
|
| 17 |
from peft import set_peft_model_state_dict
|
| 18 |
|
| 19 |
-
#
|
| 20 |
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
|
| 21 |
|
| 22 |
if not HF_TOKEN:
|
|
@@ -64,7 +65,7 @@ CURRICULUM_SWITCH = 150
|
|
| 64 |
GRAD_CLIP = 1.0
|
| 65 |
SAVE_EVERY = 50
|
| 66 |
|
| 67 |
-
#
|
| 68 |
TRAIN_STATUS = {"step": 0, "total": GRPO_STEPS, "phase": "starting", "reward": 0.0, "history": []}
|
| 69 |
|
| 70 |
class StatusHandler(BaseHTTPRequestHandler):
|
|
@@ -129,7 +130,7 @@ def start_server():
|
|
| 129 |
threading.Thread(target=start_server, daemon=True).start()
|
| 130 |
print("Status server running on :7860")
|
| 131 |
|
| 132 |
-
#
|
| 133 |
import hashlib
|
| 134 |
|
| 135 |
with open("aegis_training_data_500.json") as f:
|
|
@@ -154,7 +155,7 @@ for items in families.values():
|
|
| 154 |
eval_set.extend(items[k:])
|
| 155 |
print(f"Dataset: {len(train_set)} train / {len(eval_set)} eval")
|
| 156 |
|
| 157 |
-
#
|
| 158 |
POLICY_RULES = {
|
| 159 |
"PRI-01": "Do not access PII without authorization.",
|
| 160 |
"PRI-02": "Do not transmit PII.",
|
|
@@ -263,8 +264,7 @@ def score_response(a, truth, raw_text, level=1, fmt_ema=1.0):
|
|
| 263 |
return {"total": total, "format": 1.0, "decision": d, "violation": v,
|
| 264 |
"citation": c, "explanation": e, "r_bonus": r_bonus, "penalty": penalty}
|
| 265 |
|
| 266 |
-
#
|
| 267 |
-
from unsloth import FastLanguageModel
|
| 268 |
|
| 269 |
TRAIN_STATUS["phase"] = "loading model"
|
| 270 |
print("\nLoading Qwen2.5-7B base model...")
|
|
@@ -303,10 +303,10 @@ if hasattr(model, "generation_config"):
|
|
| 303 |
model.generation_config.max_length = None
|
| 304 |
print(f"GPU: {torch.cuda.mem_get_info()[0]/1e9:.1f} GB free\n")
|
| 305 |
|
| 306 |
-
#
|
| 307 |
if SFT_STEPS > 0:
|
| 308 |
TRAIN_STATUS["phase"] = "SFT warmup"
|
| 309 |
-
print(f"SFT warmup
|
| 310 |
sft_opt = torch.optim.AdamW(model.parameters(), lr=1e-4)
|
| 311 |
model.train()
|
| 312 |
for step in range(SFT_STEPS):
|
|
@@ -342,7 +342,7 @@ if SFT_STEPS > 0:
|
|
| 342 |
torch.cuda.empty_cache()
|
| 343 |
print("SFT complete.\n")
|
| 344 |
|
| 345 |
-
#
|
| 346 |
TRAIN_STATUS["phase"] = "GRPO"
|
| 347 |
FastLanguageModel.for_training(model)
|
| 348 |
optimizer = bnb.optim.AdamW8bit(model.parameters(), lr=GRPO_LR)
|
|
@@ -434,7 +434,7 @@ for step in range(GRPO_STEPS):
|
|
| 434 |
})
|
| 435 |
|
| 436 |
print(
|
| 437 |
-
f"Step {step:04d} | rew={avg_r:.3f}Β±{rewards.std():.3f} | "
|
| 438 |
f"dec={comp['decision']:.3f} vio={comp['violation']:.3f} "
|
| 439 |
f"cite={comp['citation']:.3f} expl={comp['explanation']:.3f} "
|
| 440 |
f"bon={comp['r_bonus']:.3f} pen={comp['penalty']:.3f} | "
|
|
@@ -462,14 +462,14 @@ for step in range(GRPO_STEPS):
|
|
| 462 |
del gen, p_enc, resps, acts, rewards, adv, reward_dicts
|
| 463 |
|
| 464 |
except torch.cuda.OutOfMemoryError:
|
| 465 |
-
print(f"Step {step:04d} | OOM
|
| 466 |
torch.cuda.empty_cache()
|
| 467 |
gc.collect()
|
| 468 |
except Exception as e:
|
| 469 |
print(f"Step {step:04d} | Error: {type(e).__name__}: {e}")
|
| 470 |
torch.cuda.empty_cache()
|
| 471 |
|
| 472 |
-
#
|
| 473 |
TRAIN_STATUS["phase"] = "saving final model"
|
| 474 |
print("\nSaving final model to HF Hub...")
|
| 475 |
model.save_pretrained("/tmp/aegis_final")
|
|
@@ -478,7 +478,7 @@ api.upload_folder(
|
|
| 478 |
folder_path = "/tmp/aegis_final",
|
| 479 |
repo_id = CKPT_REPO,
|
| 480 |
path_in_repo = "final",
|
| 481 |
-
commit_message = "AEGIS final
|
| 482 |
token = HF_TOKEN,
|
| 483 |
)
|
| 484 |
print(f"Final model: https://huggingface.co/{CKPT_REPO}/tree/main/final")
|
|
|
|
| 1 |
+
from unsloth import FastLanguageModel
|
| 2 |
"""
|
| 3 |
AEGIS Training Script for HF Spaces (A10G Small, 24GB VRAM)
|
| 4 |
- Loads Qwen2.5-7B-Unsloth-bnb-4bit + step_50 LoRA adapter
|
|
|
|
| 17 |
from huggingface_hub import login, HfApi, hf_hub_download, snapshot_download
|
| 18 |
from peft import set_peft_model_state_dict
|
| 19 |
|
| 20 |
+
# Γ’ββ¬Γ’ββ¬Γ’ββ¬ Auth & Config Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬
|
| 21 |
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
|
| 22 |
|
| 23 |
if not HF_TOKEN:
|
|
|
|
| 65 |
GRAD_CLIP = 1.0
|
| 66 |
SAVE_EVERY = 50
|
| 67 |
|
| 68 |
+
# Γ’ββ¬Γ’ββ¬Γ’ββ¬ Minimal HTTP Server (keeps port 7860 alive) Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬
|
| 69 |
TRAIN_STATUS = {"step": 0, "total": GRPO_STEPS, "phase": "starting", "reward": 0.0, "history": []}
|
| 70 |
|
| 71 |
class StatusHandler(BaseHTTPRequestHandler):
|
|
|
|
| 130 |
threading.Thread(target=start_server, daemon=True).start()
|
| 131 |
print("Status server running on :7860")
|
| 132 |
|
| 133 |
+
# Γ’ββ¬Γ’ββ¬Γ’ββ¬ Dataset Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬
|
| 134 |
import hashlib
|
| 135 |
|
| 136 |
with open("aegis_training_data_500.json") as f:
|
|
|
|
| 155 |
eval_set.extend(items[k:])
|
| 156 |
print(f"Dataset: {len(train_set)} train / {len(eval_set)} eval")
|
| 157 |
|
| 158 |
+
# Γ’ββ¬Γ’ββ¬Γ’ββ¬ Policy Rules + Reward Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬
|
| 159 |
POLICY_RULES = {
|
| 160 |
"PRI-01": "Do not access PII without authorization.",
|
| 161 |
"PRI-02": "Do not transmit PII.",
|
|
|
|
| 264 |
return {"total": total, "format": 1.0, "decision": d, "violation": v,
|
| 265 |
"citation": c, "explanation": e, "r_bonus": r_bonus, "penalty": penalty}
|
| 266 |
|
| 267 |
+
# Γ’ββ¬Γ’ββ¬Γ’ββ¬ Load Model + Step-50 Checkpoint Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬
|
|
|
|
| 268 |
|
| 269 |
TRAIN_STATUS["phase"] = "loading model"
|
| 270 |
print("\nLoading Qwen2.5-7B base model...")
|
|
|
|
| 303 |
model.generation_config.max_length = None
|
| 304 |
print(f"GPU: {torch.cuda.mem_get_info()[0]/1e9:.1f} GB free\n")
|
| 305 |
|
| 306 |
+
# Γ’ββ¬Γ’ββ¬Γ’ββ¬ Remaining SFT (10 steps) Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬
|
| 307 |
if SFT_STEPS > 0:
|
| 308 |
TRAIN_STATUS["phase"] = "SFT warmup"
|
| 309 |
+
print(f"SFT warmup Γ’β¬β {SFT_STEPS} remaining steps...")
|
| 310 |
sft_opt = torch.optim.AdamW(model.parameters(), lr=1e-4)
|
| 311 |
model.train()
|
| 312 |
for step in range(SFT_STEPS):
|
|
|
|
| 342 |
torch.cuda.empty_cache()
|
| 343 |
print("SFT complete.\n")
|
| 344 |
|
| 345 |
+
# Γ’ββ¬Γ’ββ¬Γ’ββ¬ GRPO Training Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬
|
| 346 |
TRAIN_STATUS["phase"] = "GRPO"
|
| 347 |
FastLanguageModel.for_training(model)
|
| 348 |
optimizer = bnb.optim.AdamW8bit(model.parameters(), lr=GRPO_LR)
|
|
|
|
| 434 |
})
|
| 435 |
|
| 436 |
print(
|
| 437 |
+
f"Step {step:04d} | rew={avg_r:.3f}ΓΒ±{rewards.std():.3f} | "
|
| 438 |
f"dec={comp['decision']:.3f} vio={comp['violation']:.3f} "
|
| 439 |
f"cite={comp['citation']:.3f} expl={comp['explanation']:.3f} "
|
| 440 |
f"bon={comp['r_bonus']:.3f} pen={comp['penalty']:.3f} | "
|
|
|
|
| 462 |
del gen, p_enc, resps, acts, rewards, adv, reward_dicts
|
| 463 |
|
| 464 |
except torch.cuda.OutOfMemoryError:
|
| 465 |
+
print(f"Step {step:04d} | OOM Γ’β¬β clearing cache and skipping")
|
| 466 |
torch.cuda.empty_cache()
|
| 467 |
gc.collect()
|
| 468 |
except Exception as e:
|
| 469 |
print(f"Step {step:04d} | Error: {type(e).__name__}: {e}")
|
| 470 |
torch.cuda.empty_cache()
|
| 471 |
|
| 472 |
+
# Γ’ββ¬Γ’ββ¬Γ’ββ¬ Final Model Save Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬Γ’ββ¬
|
| 473 |
TRAIN_STATUS["phase"] = "saving final model"
|
| 474 |
print("\nSaving final model to HF Hub...")
|
| 475 |
model.save_pretrained("/tmp/aegis_final")
|
|
|
|
| 478 |
folder_path = "/tmp/aegis_final",
|
| 479 |
repo_id = CKPT_REPO,
|
| 480 |
path_in_repo = "final",
|
| 481 |
+
commit_message = "AEGIS final Γ’β¬β 500 GRPO steps complete",
|
| 482 |
token = HF_TOKEN,
|
| 483 |
)
|
| 484 |
print(f"Final model: https://huggingface.co/{CKPT_REPO}/tree/main/final")
|