decula commited on
Commit
43735ce
·
1 Parent(s): 4c45781

changed 2 gpu t4

Browse files
Files changed (1) hide show
  1. 7b_rag.py +30 -3
7b_rag.py CHANGED
@@ -22,6 +22,12 @@ try:
22
  if GPU_COUNT > 0:
23
  HAS_GPU = True
24
  gpu_h = nvmlDeviceGetHandleByIndex(0)
 
 
 
 
 
 
25
  except NVMLError as error:
26
  print(error)
27
 
@@ -33,13 +39,20 @@ MODEL_STRAT="cpu bf16"
33
  os.environ["RWKV_CUDA_ON"] = '0' # if '1' then use CUDA kernel for seq mode (much faster)
34
 
35
  # Switch to GPU mode
36
- if HAS_GPU == True :
37
  os.environ["RWKV_CUDA_ON"] = '1'
38
- MODEL_STRAT = "cuda bf16"
 
 
 
 
 
 
39
 
40
  # Load the model accordingly
41
  from rwkv.model import RWKV
42
  model_path = hf_hub_download(repo_id="a686d380/rwkv-5-h-world", filename=f"{model_file}.pth")
 
43
  model = RWKV(model=model_path, strategy=MODEL_STRAT)
44
  from rwkv.utils import PIPELINE, PIPELINE_ARGS
45
  pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
@@ -200,7 +213,21 @@ def evaluate(
200
  gc.collect()
201
 
202
  if HAS_GPU == True :
203
- torch.cuda.empty_cache()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
  yield out_str.strip()
206
 
 
22
  if GPU_COUNT > 0:
23
  HAS_GPU = True
24
  gpu_h = nvmlDeviceGetHandleByIndex(0)
25
+ print(f"检测到 {GPU_COUNT} 个GPU设备")
26
+ for i in range(GPU_COUNT):
27
+ handle = nvmlDeviceGetHandleByIndex(i)
28
+ info = nvmlDeviceGetMemoryInfo(handle)
29
+ name = nvmlDeviceGetName(handle)
30
+ print(f"GPU {i}: {name}, 总内存: {info.total / 1024**3:.2f} GB")
31
  except NVMLError as error:
32
  print(error)
33
 
 
39
  os.environ["RWKV_CUDA_ON"] = '0' # if '1' then use CUDA kernel for seq mode (much faster)
40
 
41
  # Switch to GPU mode
42
+ if HAS_GPU == True:
43
  os.environ["RWKV_CUDA_ON"] = '1'
44
+ if GPU_COUNT >= 2:
45
+ # 使用两块GPU进行模型加载
46
+ MODEL_STRAT = "cuda:0 fp16 *10 -> cuda:1 fp16"
47
+ print(f"使用多GPU策略: {MODEL_STRAT}")
48
+ else:
49
+ MODEL_STRAT = "cuda fp16"
50
+ print(f"使用单GPU策略: {MODEL_STRAT}")
51
 
52
  # Load the model accordingly
53
  from rwkv.model import RWKV
54
  model_path = hf_hub_download(repo_id="a686d380/rwkv-5-h-world", filename=f"{model_file}.pth")
55
+ print(f"加载模型: {model_path}")
56
  model = RWKV(model=model_path, strategy=MODEL_STRAT)
57
  from rwkv.utils import PIPELINE, PIPELINE_ARGS
58
  pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
 
213
  gc.collect()
214
 
215
  if HAS_GPU == True :
216
+ # 在evaluate函数结束部分添加GPU内存清理
217
+ if HAS_GPU == True:
218
+ if GPU_COUNT >= 2:
219
+ # 清理两块GPU的缓存
220
+ for i in range(GPU_COUNT):
221
+ with torch.cuda.device(f"cuda:{i}"):
222
+ torch.cuda.empty_cache()
223
+ if i < 2: # 只显示前两块GPU的信息
224
+ handle = nvmlDeviceGetHandleByIndex(i)
225
+ gpu_info = nvmlDeviceGetMemoryInfo(handle)
226
+ print(f'GPU {i} VRAM: 总计 {gpu_info.total/(1024**3):.2f}GB, 已用 {gpu_info.used/(1024**3):.2f}GB, 空闲 {gpu_info.free/(1024**3):.2f}GB')
227
+ else:
228
+ gpu_info = nvmlDeviceGetMemoryInfo(gpu_h)
229
+ print(f'GPU VRAM: 总计 {gpu_info.total/(1024**3):.2f}GB, 已用 {gpu_info.used/(1024**3):.2f}GB, 空闲 {gpu_info.free/(1024**3):.2f}GB')
230
+ torch.cuda.empty_cache()
231
 
232
  yield out_str.strip()
233