tazwarrrr commited on
Commit
5c0d4c4
·
1 Parent(s): 984e3c2

fix: ROCm compile flags for gfx942 MI300X - verified on AMD DevCloud

Browse files
backend/tools/hipify_wrapper.py CHANGED
@@ -44,14 +44,14 @@ class HipifyWrapper:
44
  # Use -- separator to pass compiler flags to the internal Clang parser
45
  # This is critical for Clang-based tools to distinguish tool flags from compiler flags.
46
  cmd = ["hipify-clang", tmp_path, "--",
47
- "-nocudalib", "-nocudainc", "-arch=sm_60"]
48
 
49
  # Debug log for build engineering
50
  print(f"DEBUG: Running hipify-clang command: {' '.join(cmd)}")
51
 
52
  # Set environment variable just in case hipify-clang invokes nvcc internally
53
  env = os.environ.copy()
54
- env['NVCC_APPEND_FLAGS'] = '-nocudalib -arch=sm_60'
55
 
56
  result = subprocess.run(
57
  cmd,
 
44
  # Use -- separator to pass compiler flags to the internal Clang parser
45
  # This is critical for Clang-based tools to distinguish tool flags from compiler flags.
46
  cmd = ["hipify-clang", tmp_path, "--",
47
+ "-nocudalib", "-nocudainc"]
48
 
49
  # Debug log for build engineering
50
  print(f"DEBUG: Running hipify-clang command: {' '.join(cmd)}")
51
 
52
  # Set environment variable just in case hipify-clang invokes nvcc internally
53
  env = os.environ.copy()
54
+ env['NVCC_APPEND_FLAGS'] = '-nocudalib'
55
 
56
  result = subprocess.run(
57
  cmd,
backend/tools/rocprof_wrapper.py CHANGED
@@ -27,14 +27,14 @@ class RocprofWrapper:
27
  if output_file is None:
28
  output_file = temp_file.replace('.hip', '.out')
29
 
30
- # Add -nocudalib and -arch=sm_60 to solve "Cannot find libdevice for sm_52" error
31
  # This ensures compilation works even if CUDA device libraries are missing.
32
  cmd = [self.hipcc_path, '-o', output_file,
33
- temp_file, '-nocudalib', '-arch=sm_60']
34
 
35
  # Set environment variable just in case hipcc invokes nvcc internally
36
  env = os.environ.copy()
37
- env['NVCC_APPEND_FLAGS'] = '-nocudalib -arch=sm_60'
38
 
39
  result = subprocess.run(
40
  cmd, capture_output=True, text=True, timeout=60, env=env, check=False)
 
27
  if output_file is None:
28
  output_file = temp_file.replace('.hip', '.out')
29
 
30
+ # Add and --offload-arch=gfx942 to solve "Cannot find libdevice for sm_52" error
31
  # This ensures compilation works even if CUDA device libraries are missing.
32
  cmd = [self.hipcc_path, '-o', output_file,
33
+ temp_file, '--offload-arch=gfx942']
34
 
35
  # Set environment variable just in case hipcc invokes nvcc internally
36
  env = os.environ.copy()
37
+ env['NVCC_APPEND_FLAGS'] = ' --offload-arch=gfx942'
38
 
39
  result = subprocess.run(
40
  cmd, capture_output=True, text=True, timeout=60, env=env, check=False)