File size: 4,577 Bytes
48be8c8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | """Bake DEP scenarios + Sandy extent to compact GeoTIFFs.
For each DEP scenario we produce a uint8 raster keyed by max
Flooding_Category (0=outside, 1/2/3 = depth class). Sandy is a uint8
0/1 mask. CRS is EPSG:2263 (feet) so callers project once and sample
at native units.
Resolution defaults to 10 ft. At that resolution a single pixel is
~smaller than a building footprint, which is more than fine for
point-in-polygon queries. NYC bbox at 10 ft fits comfortably in a
~12k x 16k uint8 array — a few hundred MB uncompressed but DEFLATE
compresses these heavily because most pixels are 0.
Run:
uv run python experiments/22_cornerstone_optim/bake_rasters.py
"""
from __future__ import annotations
import sys
import time
from pathlib import Path
import numpy as np
import rasterio
from rasterio import features
from rasterio.transform import from_origin
REPO = Path(__file__).resolve().parents[2]
sys.path.insert(0, str(REPO))
from app.flood_layers import dep_stormwater, sandy_inundation # noqa: E402
NYC_CRS = "EPSG:2263"
RES_FT = 10.0 # raster cell size in feet
OUT_DIR = REPO / "experiments" / "22_cornerstone_optim" / "baked"
def nyc_grid(res_ft: float = RES_FT):
"""Return (transform, width, height) covering all of NYC + harbor.
Bounds chosen wide enough to cover every Cornerstone source.
"""
minx, miny = 910_000.0, 110_000.0 # SW of Staten Island
maxx, maxy = 1_080_000.0, 280_000.0 # NE of Bronx
width = int(np.ceil((maxx - minx) / res_ft))
height = int(np.ceil((maxy - miny) / res_ft))
transform = from_origin(minx, maxy, res_ft, res_ft)
return transform, width, height
def burn(gdf, value_col_or_const, out_path: Path, transform, width, height):
if isinstance(value_col_or_const, str):
shapes = ((geom, int(val)) for geom, val
in zip(gdf.geometry, gdf[value_col_or_const]))
else:
v = int(value_col_or_const)
shapes = ((geom, v) for geom in gdf.geometry)
arr = features.rasterize(
shapes=shapes,
out_shape=(height, width),
transform=transform,
fill=0,
dtype="uint8",
merge_alg=rasterio.enums.MergeAlg.replace,
)
out_path.parent.mkdir(parents=True, exist_ok=True)
profile = {
"driver": "GTiff",
"dtype": "uint8",
"count": 1,
"width": width,
"height": height,
"transform": transform,
"crs": NYC_CRS,
"compress": "deflate",
"predictor": 2,
"tiled": True,
"blockxsize": 512,
"blockysize": 512,
"nodata": 0,
}
with rasterio.open(out_path, "w", **profile) as dst:
dst.write(arr, 1)
return arr
def bake_dep(scenario: str, transform, width, height) -> dict:
print(f" baking {scenario}...", end=" ", flush=True)
t0 = time.perf_counter()
g = dep_stormwater.load(scenario).copy()
g["Flooding_Category"] = g["Flooding_Category"].astype(int)
# rasterize lowest first so highest category wins at overlaps
g = g.sort_values("Flooding_Category", ascending=True)
out = OUT_DIR / f"{scenario}.tif"
arr = burn(g, "Flooding_Category", out, transform, width, height)
dt = time.perf_counter() - t0
size_mb = out.stat().st_size / 1e6
nz = int((arr > 0).sum())
print(f"{dt:5.1f}s {size_mb:5.1f} MB on disk nonzero={nz:,}")
return {"path": str(out), "elapsed_s": dt, "size_mb": size_mb, "nonzero_px": nz}
def bake_sandy(transform, width, height) -> dict:
print(" baking sandy...", end=" ", flush=True)
t0 = time.perf_counter()
g = sandy_inundation.load().copy()
out = OUT_DIR / "sandy.tif"
arr = burn(g, 1, out, transform, width, height)
dt = time.perf_counter() - t0
size_mb = out.stat().st_size / 1e6
nz = int((arr > 0).sum())
print(f"{dt:5.1f}s {size_mb:5.1f} MB on disk nonzero={nz:,}")
return {"path": str(out), "elapsed_s": dt, "size_mb": size_mb, "nonzero_px": nz}
def main():
transform, width, height = nyc_grid(RES_FT)
print(f"Grid: {width} x {height} px @ {RES_FT} ft/px (~{width*height/1e6:.0f} M cells)")
print(f"Output: {OUT_DIR}")
print()
bake_dep("dep_extreme_2080", transform, width, height)
bake_dep("dep_moderate_2050", transform, width, height)
bake_dep("dep_moderate_current", transform, width, height)
bake_sandy(transform, width, height)
total_mb = sum(p.stat().st_size for p in OUT_DIR.glob("*.tif")) / 1e6
print(f"\nTotal baked: {total_mb:.1f} MB")
if __name__ == "__main__":
main()
|