decula commited on
Commit ·
43735ce
1
Parent(s): 4c45781
changed 2 gpu t4
Browse files
7b_rag.py
CHANGED
|
@@ -22,6 +22,12 @@ try:
|
|
| 22 |
if GPU_COUNT > 0:
|
| 23 |
HAS_GPU = True
|
| 24 |
gpu_h = nvmlDeviceGetHandleByIndex(0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
except NVMLError as error:
|
| 26 |
print(error)
|
| 27 |
|
|
@@ -33,13 +39,20 @@ MODEL_STRAT="cpu bf16"
|
|
| 33 |
os.environ["RWKV_CUDA_ON"] = '0' # if '1' then use CUDA kernel for seq mode (much faster)
|
| 34 |
|
| 35 |
# Switch to GPU mode
|
| 36 |
-
if HAS_GPU == True
|
| 37 |
os.environ["RWKV_CUDA_ON"] = '1'
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
# Load the model accordingly
|
| 41 |
from rwkv.model import RWKV
|
| 42 |
model_path = hf_hub_download(repo_id="a686d380/rwkv-5-h-world", filename=f"{model_file}.pth")
|
|
|
|
| 43 |
model = RWKV(model=model_path, strategy=MODEL_STRAT)
|
| 44 |
from rwkv.utils import PIPELINE, PIPELINE_ARGS
|
| 45 |
pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
|
|
@@ -200,7 +213,21 @@ def evaluate(
|
|
| 200 |
gc.collect()
|
| 201 |
|
| 202 |
if HAS_GPU == True :
|
| 203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
|
| 205 |
yield out_str.strip()
|
| 206 |
|
|
|
|
| 22 |
if GPU_COUNT > 0:
|
| 23 |
HAS_GPU = True
|
| 24 |
gpu_h = nvmlDeviceGetHandleByIndex(0)
|
| 25 |
+
print(f"检测到 {GPU_COUNT} 个GPU设备")
|
| 26 |
+
for i in range(GPU_COUNT):
|
| 27 |
+
handle = nvmlDeviceGetHandleByIndex(i)
|
| 28 |
+
info = nvmlDeviceGetMemoryInfo(handle)
|
| 29 |
+
name = nvmlDeviceGetName(handle)
|
| 30 |
+
print(f"GPU {i}: {name}, 总内存: {info.total / 1024**3:.2f} GB")
|
| 31 |
except NVMLError as error:
|
| 32 |
print(error)
|
| 33 |
|
|
|
|
| 39 |
os.environ["RWKV_CUDA_ON"] = '0' # if '1' then use CUDA kernel for seq mode (much faster)
|
| 40 |
|
| 41 |
# Switch to GPU mode
|
| 42 |
+
if HAS_GPU == True:
|
| 43 |
os.environ["RWKV_CUDA_ON"] = '1'
|
| 44 |
+
if GPU_COUNT >= 2:
|
| 45 |
+
# 使用两块GPU进行模型加载
|
| 46 |
+
MODEL_STRAT = "cuda:0 fp16 *10 -> cuda:1 fp16"
|
| 47 |
+
print(f"使用多GPU策略: {MODEL_STRAT}")
|
| 48 |
+
else:
|
| 49 |
+
MODEL_STRAT = "cuda fp16"
|
| 50 |
+
print(f"使用单GPU策略: {MODEL_STRAT}")
|
| 51 |
|
| 52 |
# Load the model accordingly
|
| 53 |
from rwkv.model import RWKV
|
| 54 |
model_path = hf_hub_download(repo_id="a686d380/rwkv-5-h-world", filename=f"{model_file}.pth")
|
| 55 |
+
print(f"加载模型: {model_path}")
|
| 56 |
model = RWKV(model=model_path, strategy=MODEL_STRAT)
|
| 57 |
from rwkv.utils import PIPELINE, PIPELINE_ARGS
|
| 58 |
pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
|
|
|
|
| 213 |
gc.collect()
|
| 214 |
|
| 215 |
if HAS_GPU == True :
|
| 216 |
+
# 在evaluate函数结束部分添加GPU内存清理
|
| 217 |
+
if HAS_GPU == True:
|
| 218 |
+
if GPU_COUNT >= 2:
|
| 219 |
+
# 清理两块GPU的缓存
|
| 220 |
+
for i in range(GPU_COUNT):
|
| 221 |
+
with torch.cuda.device(f"cuda:{i}"):
|
| 222 |
+
torch.cuda.empty_cache()
|
| 223 |
+
if i < 2: # 只显示前两块GPU的信息
|
| 224 |
+
handle = nvmlDeviceGetHandleByIndex(i)
|
| 225 |
+
gpu_info = nvmlDeviceGetMemoryInfo(handle)
|
| 226 |
+
print(f'GPU {i} VRAM: 总计 {gpu_info.total/(1024**3):.2f}GB, 已用 {gpu_info.used/(1024**3):.2f}GB, 空闲 {gpu_info.free/(1024**3):.2f}GB')
|
| 227 |
+
else:
|
| 228 |
+
gpu_info = nvmlDeviceGetMemoryInfo(gpu_h)
|
| 229 |
+
print(f'GPU VRAM: 总计 {gpu_info.total/(1024**3):.2f}GB, 已用 {gpu_info.used/(1024**3):.2f}GB, 空闲 {gpu_info.free/(1024**3):.2f}GB')
|
| 230 |
+
torch.cuda.empty_cache()
|
| 231 |
|
| 232 |
yield out_str.strip()
|
| 233 |
|