CLIWorks commited on
Commit
d2893d4
·
verified ·
1 Parent(s): 737e1e7

Upload mythos-fineweb-moe.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. mythos-fineweb-moe.py +2 -1
mythos-fineweb-moe.py CHANGED
@@ -1035,6 +1035,7 @@ def main():
1035
  cfg.vocab_size = vocab_size
1036
 
1037
  bf16_ok = torch.cuda.is_available() and torch.cuda.is_bf16_supported()
 
1038
  amp_dtype = torch.bfloat16 if bf16_ok else torch.float16
1039
 
1040
  model = SpiderPortalForConditionalGeneration(cfg).to(torch.bfloat16)
@@ -1058,7 +1059,7 @@ def main():
1058
  model = model.to(device)
1059
 
1060
  if master:
1061
- logger.info("MoE mode: using native bf16 (MXFP8 disabled)")
1062
 
1063
  # MoE init checkpoint (skip dense conversion, load MoE weights directly)
1064
  moe_init_ckpt = os.environ.get("MOE_INIT_CKPT", "")
 
1035
  cfg.vocab_size = vocab_size
1036
 
1037
  bf16_ok = torch.cuda.is_available() and torch.cuda.is_bf16_supported()
1038
+ use_mxfp8 = os.environ.get("MXFP8", "0") == "1"
1039
  amp_dtype = torch.bfloat16 if bf16_ok else torch.float16
1040
 
1041
  model = SpiderPortalForConditionalGeneration(cfg).to(torch.bfloat16)
 
1059
  model = model.to(device)
1060
 
1061
  if master:
1062
+ logger.info(f"MoE mode: {amp_dtype} | MXFP8 hardware acceleration: {'ENABLED' if use_mxfp8 else 'disabled (set MXFP8=1)'}")
1063
 
1064
  # MoE init checkpoint (skip dense conversion, load MoE weights directly)
1065
  moe_init_ckpt = os.environ.get("MOE_INIT_CKPT", "")