MaskEdit / instruct-pix2pix-BioMedCLIP-concat-newdata-data1 /data_json /metadata /clip_-1000_1000.py
| import os | |
| import nibabel as nib | |
| import pandas as pd | |
| import numpy as np | |
| from tqdm import tqdm | |
| from concurrent.futures import ProcessPoolExecutor, as_completed | |
| # ===== 路径 ===== | |
| DATA_ROOT = "/home/shuhan/blobdata/CT-RATE/dataset/" | |
| TRAIN_META = "/home/shuhan/blobdata/CT-RATE/metadata/train_metadata.csv" | |
| VALID_META = "/home/shuhan/blobdata/CT-RATE/metadata/validation_metadata.csv" | |
| CLIP_RANGE = (-1000, 1000) | |
| NUM_WORKERS = max(1, (os.cpu_count() or 8) - 1) | |
| def build_full_path(basename): | |
| """根据basename构造完整路径""" | |
| # e.g. train_1661_b_1.nii.gz | |
| stem = basename.replace(".nii.gz", "") | |
| parts = stem.split("_") # ['train','1661','b','1'] | |
| if len(parts) < 3: | |
| raise ValueError(f"basename格式异常: {basename}") | |
| # train_fixed | |
| folder0 = f"{parts[0]}_fixed" | |
| # train_1661 | |
| folder1 = f"{parts[0]}_{parts[1]}" | |
| # train_1661_b | |
| folder2 = f"{parts[0]}_{parts[1]}_{parts[2]}" | |
| return os.path.join(DATA_ROOT, folder0, folder1, folder2, basename) | |
| def process_one(row): | |
| """处理单个volume""" | |
| basename = row["VolumeName"] | |
| nii_path = build_full_path(basename) | |
| if not os.path.exists(nii_path): | |
| return f"[WARN] 文件不存在: {nii_path}" | |
| try: | |
| nii = nib.load(nii_path) | |
| arr = nii.get_fdata().astype(np.float32) | |
| vmin, vmax = float(arr.min()), float(arr.max()) | |
| # 如果已经在范围内,跳过 | |
| if vmin >= CLIP_RANGE[0] and vmax <= CLIP_RANGE[1]: | |
| return f"[SKIP] {basename} 已在范围内 ({vmin:.1f},{vmax:.1f})" | |
| # Clip 并保存覆盖 | |
| hu = np.clip(arr, CLIP_RANGE[0], CLIP_RANGE[1]) | |
| new_img = nib.Nifti1Image(hu, nii.affine, nii.header) | |
| nib.save(new_img, nii_path) | |
| return f"[OK] {basename} -> ({vmin:.1f},{vmax:.1f}) clipped" | |
| except Exception as e: | |
| return f"[ERROR] {nii_path}: {e}" | |
| def process_meta(meta_csv): | |
| df = pd.read_csv(meta_csv) | |
| results = [] | |
| with ProcessPoolExecutor(max_workers=NUM_WORKERS) as ex: | |
| futures = {ex.submit(process_one, row): row for _, row in df.iterrows()} | |
| for fut in tqdm(as_completed(futures), total=len(futures), desc=f"Processing {os.path.basename(meta_csv)}"): | |
| results.append(fut.result()) | |
| return results | |
| if __name__ == "__main__": | |
| # res_train = process_meta(TRAIN_META) | |
| res_valid = process_meta(VALID_META) | |
| # # 保存日志 | |
| # with open("clip_log.txt", "w") as f: | |
| # f.write("\n".join(res_train + res_valid)) | |
| # print("完成。日志已保存 clip_log.txt") |