| import os |
| import numpy as np |
| import dask.array as da |
| import xarray as xr |
|
|
| def load_all_file(data_dir=""): |
| data_list = [] |
| filtered_files = [] |
| for filename in os.listdir(data_dir): |
| if filename.startswith("202306"): |
| filtered_files.append(filename) |
| |
| |
| sorted_files = sorted(filtered_files) |
| for item in sorted_files: |
| sub_dir = os.path.join(data_dir) |
| pathfile = sub_dir + "/" + item |
| file = np.load(pathfile) |
| data_list.extend([file]) |
| |
| lon = np.arange(103.5, 109.2, 0.00892) |
| lat = np.arange(8, 13.75, 0.00899) |
| |
| return data_list |
|
|
| def preprocess_data(data_list, out_dir=""): |
| patches = [] |
|
|
| |
| patch_size = 32 |
| |
| |
| for k in range(len(data_list)): |
| for i in range(0, 640, patch_size): |
| for j in range(0, 640, patch_size): |
| patch = data_list[k][i:i+patch_size, j:j+patch_size] |
| patches.append(patch) |
| |
| print(len(patches)) |
| data_shape = len(patches) |
| patches_array = np.array(patches, dtype=np.uint8) |
| temp_array = np.array(np.random.rand(data_shape, 2), dtype=np.uint16) |
| temp_array2 = np.arange(256, dtype=np.float32) |
| temp_array3 = np.arange(data_shape, dtype=np.int64) |
|
|
| data_da = da.from_array(patches_array, chunks=(data_shape,32,32)) |
| data_da2 = da.from_array(temp_array, chunks=(data_shape, 2)) |
| data_da3 = da.from_array(temp_array3, chunks=(data_shape, )) |
| data_da4 = da.from_array(temp_array2, chunks=(256, )) |
| |
| |
| patches = xr.DataArray(data_da, dims=("dim_patch", "dim_heigh", "dim_width")) |
| patch_coords = xr.DataArray(data_da2, dims=("dim_patch1", "dim_coord")) |
| patch_times = xr.DataArray(data_da3, dims=("dim_patch2")) |
| zero_patch_coords = xr.DataArray(data_da2, dims=("dim_zero_patch", "dim_coord")) |
| zero_patch_times = xr.DataArray(data_da3, dims=("dim_zero_patch1")) |
| scale = xr.DataArray(data_da4, dims=("dim_scale")) |
|
|
| ds = patches.to_dataset(name = 'patches') |
| ds['patch_coords'] = patch_coords |
| ds['patch_times'] = patch_times |
| ds['zero_patch_coords'] = zero_patch_coords |
| ds['zero_patch_times'] = zero_patch_times |
| ds['scale'] = scale |
|
|
| ds.attrs["zero_value"] = 1 |
| out_dir = out_dir + "/" + "RZC" |
| os.makedirs(out_dir, exist_ok=True) |
| file_name = os.path.join(out_dir, "patches_RV_202306.nc") |
| ds.to_netcdf(file_name) |
| |
| return len(data_list) |
|
|
| |
| list = load_all_file(data_dir="/data/data_WF/ldcast_precipitation/test") |
| print(preprocess_data(list, out_dir="/data/data_WF/ldcast_precipitation/preprocess_data_test")) |
|
|