asdf98
/

iris-image-gen

asdf98 commited on 8 days ago

Commit

4aebda2

verified ·

1 Parent(s): e90110a

Fix bf16 AMP detection for T4 in train_production.py

Files changed (1) hide show

iris/train_production.py CHANGED Viewed

@@ -38,7 +38,13 @@ def main():
     torch.manual_seed(args.seed)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     use_amp = device.type == "cuda"
-    amp_dtype = torch.bfloat16 if (use_amp and torch.cuda.is_bf16_supported()) else torch.float16 if use_amp else torch.float32
     print(f"IRIS Training - {args.config} | Device: {device}, AMP: {amp_dtype}")
     model_cfg = get_model_config(args.config)

     torch.manual_seed(args.seed)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     use_amp = device.type == "cuda"
+    # T4 (compute cap 7.5) reports bf16 supported but cuDNN conv kernels crash.
+    # Force fp16 on GPUs below Ampere (compute cap < 8.0).
+    if use_amp:
+        cc = torch.cuda.get_device_capability(0)
+        amp_dtype = torch.float16 if cc[0] < 8 else torch.bfloat16
+    else:
+        amp_dtype = torch.float32
     print(f"IRIS Training - {args.config} | Device: {device}, AMP: {amp_dtype}")
     model_cfg = get_model_config(args.config)