Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| AlgaeGuard — Autonomous HAB Detection from Orbit | |
| HuggingFace Spaces Gradio Application | |
| Tabs: | |
| 1. Live Demo — preset NDCI colormaps + optional VLM inference | |
| 2. HAB Timeline — historical bloom severity charts for 6 water bodies | |
| 3. Custom Inference — upload image or fetch live from SimSat by coordinates | |
| 4. About — project summary and links | |
| """ | |
| from __future__ import annotations | |
| import base64 | |
| import csv | |
| import json | |
| import os | |
| import re | |
| from datetime import datetime | |
| from pathlib import Path | |
| from typing import Optional | |
| import cv2 | |
| import gradio as gr | |
| import numpy as np | |
| import plotly.graph_objects as go | |
| import requests | |
| from PIL import Image | |
| # ── ZeroGPU (HF Spaces) — optional ──────────────────────────────────────────── | |
| try: | |
| import spaces | |
| HAS_SPACES = True | |
| except ImportError: | |
| HAS_SPACES = False | |
| # ── Paths ────────────────────────────────────────────────────────────────────── | |
| ROOT = Path(__file__).parent | |
| DATA_DIR = ROOT / "data" | |
| EXAMPLES_DIR = ROOT / "examples" | |
| # ── Constants ────────────────────────────────────────────────────────────────── | |
| MODEL_ID = "debrajsingha/algaeguard-lfm2-5-vl-450m" | |
| SYSTEM_PROMPT = ( | |
| "You are AlgaeGuard, an autonomous on-board satellite AI for Harmful Algal Bloom " | |
| "(HAB) early-warning using Sentinel-2 NDCI imagery. You analyze NDCI colormap images " | |
| "and produce structured bloom assessments for water utility operators. Your assessments " | |
| "guide 6–12 hour treatment protocol decisions — be precise, actionable, and structured." | |
| ) | |
| HAB_LOCATIONS = [ | |
| {"name": "lake_erie", "display": "Lake Erie (USA)", "lat": 41.66, "lon": -83.55}, | |
| {"name": "lake_taihu", "display": "Lake Taihu (China)", "lat": 31.20, "lon": 120.00}, | |
| {"name": "chesapeake_bay", "display": "Chesapeake Bay (USA)", "lat": 38.50, "lon": -76.40}, | |
| {"name": "okeechobee", "display": "Lake Okeechobee (USA)", "lat": 26.90, "lon": -80.80}, | |
| {"name": "curonian", "display": "Curonian Lagoon (Lithuania)","lat": 55.40, "lon": 21.10}, | |
| {"name": "murray_darling", "display": "Murray-Darling (Australia)", "lat": -34.10, "lon": 141.90}, | |
| ] | |
| LOC_DISPLAY = {l["name"]: l["display"] for l in HAB_LOCATIONS} | |
| LOC_BY_KEY = {l["name"]: l for l in HAB_LOCATIONS} | |
| SEVERITY_CFG = { | |
| "CLEAR": {"color": "#4ade80", "bg": "#052e16", "emoji": "✅", "action": "No action required"}, | |
| "LOW": {"color": "#a3e635", "bg": "#1a2e05", "emoji": "🟡", "action": "Monitor — rescan next pass"}, | |
| "MEDIUM": {"color": "#facc15", "bg": "#2d2000", "emoji": "🟠", "action": "Issue caution advisory to water utility"}, | |
| "HIGH": {"color": "#f97316", "bg": "#2d0c00", "emoji": "🔴", "action": "Alert water utility — activate response"}, | |
| "CRITICAL": {"color": "#ef4444", "bg": "#1c0000", "emoji": "🚨", "action": "IMMEDIATE ACTION — emergency protocol"}, | |
| } | |
| SEVERITY_ORDER = ["CLEAR", "LOW", "MEDIUM", "HIGH", "CRITICAL"] | |
| REAL_WORLD_EVENTS = [ | |
| {"location": "lake_erie", "date": "2014-08-02", "label": "Toledo Crisis 2014", "severity": "CRITICAL", | |
| "desc": "400,000 residents lost safe water for 3 days. Bloom visible 2 weeks earlier in Sentinel-2 data."}, | |
| {"location": "lake_erie", "date": "2019-07-15", "label": "Erie Bloom 2019", "severity": "HIGH", | |
| "desc": "620 sq mile bloom — largest ever recorded on Lake Erie at the time."}, | |
| {"location": "lake_taihu", "date": "2007-05-29", "label": "Taihu Crisis 2007", "severity": "CRITICAL", | |
| "desc": "Tap water cut for 2M residents in Wuxi, China. Cyanobacteria toxin levels 1000× safe limit."}, | |
| {"location": "okeechobee", "date": "2018-08-01", "label": "Florida Emergency", "severity": "CRITICAL", | |
| "desc": "Governor declared state of emergency. Toxic algae spread to Atlantic and Gulf coasts."}, | |
| {"location": "chesapeake_bay", "date": "2011-07-01", "label": "Chesapeake Dead Zone","severity": "HIGH", | |
| "desc": "Record dead zone — 1.91 cubic miles of hypoxic water. Massive fish and shellfish kills."}, | |
| {"location": "murray_darling", "date": "2010-01-10", "label": "Murray-Darling 2010","severity": "CRITICAL", | |
| "desc": "Largest bloom ever recorded — cyanobacteria stretched 1,000 km along the river system."}, | |
| ] | |
| # ── Load timeseries ──────────────────────────────────────────────────────────── | |
| def _load_timeseries() -> dict: | |
| ts_path = DATA_DIR / "timeseries.json" | |
| if ts_path.exists(): | |
| return json.loads(ts_path.read_text()) | |
| # fallback: build from CSV | |
| csv_path = DATA_DIR / "processed_index.csv" | |
| if not csv_path.exists(): | |
| return {} | |
| from collections import defaultdict | |
| ts: dict = defaultdict(list) | |
| for row in csv.DictReader(open(csv_path)): | |
| stem = row["stem"] | |
| m = re.search(r"(\d{4}-\d{2}-\d{2})$", stem) | |
| if not m: | |
| continue | |
| date = m.group(1) | |
| loc = stem[: m.start()].rstrip("_") | |
| ts[loc].append({ | |
| "date": date, "severity": row["severity"], | |
| "ndci_mean": round(float(row["ndci_mean"]), 4), | |
| "ndci_max": round(float(row["ndci_max"]), 4), | |
| "bloom_pct": round(float(row["bloom_pct"]), 2), | |
| "severe_pct":round(float(row["severe_pct"]), 2), | |
| "image": f"{stem}_cmap.png", | |
| }) | |
| for loc in ts: | |
| ts[loc].sort(key=lambda x: x["date"]) | |
| return dict(ts) | |
| TIMESERIES = _load_timeseries() | |
| # ── Model (lazy-loaded) ──────────────────────────────────────────────────────── | |
| _processor = None | |
| _model = None | |
| def _load_model(): | |
| global _processor, _model | |
| if _model is not None: | |
| return _processor, _model | |
| import torch | |
| from transformers import AutoModelForImageTextToText, AutoProcessor | |
| print(f"Loading {MODEL_ID} ...") | |
| _processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True) | |
| dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32 | |
| _model = AutoModelForImageTextToText.from_pretrained( | |
| MODEL_ID, torch_dtype=dtype, device_map="auto", trust_remote_code=True | |
| ) | |
| _model.eval() | |
| print(f"✅ Model ready on {next(_model.parameters()).device}") | |
| return _processor, _model | |
| # ── Spectral helpers ─────────────────────────────────────────────────────────── | |
| def _ndci_from_bands(bands: np.ndarray) -> np.ndarray: | |
| """bands: (H, W, ≥2) float32 — channel 0 = B04 red, channel 1 = B05 rededge""" | |
| B04, B05 = bands[:, :, 0], bands[:, :, 1] | |
| return (B05 - B04) / (B05 + B04 + 1e-8) | |
| def _ndci_to_colormap(ndci_map: np.ndarray, size: int = 512) -> np.ndarray: | |
| """Returns BGR uint8 (H, W, 3)""" | |
| clipped = np.clip(np.nan_to_num(ndci_map, nan=0.0), -1.0, 1.0) | |
| norm = ((clipped + 1) / 2 * 255).astype(np.uint8) | |
| cmap = cv2.applyColorMap(norm, cv2.COLORMAP_JET) | |
| if cmap.shape[0] != size: | |
| cmap = cv2.resize(cmap, (size, size), interpolation=cv2.INTER_LINEAR) | |
| return cmap | |
| def _classify_bloom(ndci_map: np.ndarray) -> dict: | |
| valid = ndci_map[~np.isnan(ndci_map)] if np.isnan(ndci_map).any() else ndci_map.ravel() | |
| if len(valid) == 0: | |
| return {"severity": "CLEAR", "ndci_mean": 0.0, "ndci_max": 0.0, | |
| "bloom_pct": 0.0, "severe_pct": 0.0} | |
| bloom_pct = float((valid > 0.10).mean() * 100) | |
| severe_pct = float((valid > 0.25).mean() * 100) | |
| if severe_pct > 15: severity = "CRITICAL" | |
| elif bloom_pct > 25: severity = "HIGH" | |
| elif bloom_pct > 8: severity = "MEDIUM" | |
| elif bloom_pct > 1: severity = "LOW" | |
| else: severity = "CLEAR" | |
| return { | |
| "severity": severity, | |
| "ndci_mean": float(np.nanmean(ndci_map)), | |
| "ndci_max": float(np.nanmax(ndci_map)), | |
| "bloom_pct": bloom_pct, | |
| "severe_pct": severe_pct, | |
| } | |
| # ── VLM inference ────────────────────────────────────────────────────────────── | |
| def _do_inference(pil_img: Image.Image, location: str, date: str, | |
| ndci_mean: float, bloom_pct: float) -> str: | |
| import torch | |
| processor, model = _load_model() | |
| user_text = ( | |
| f"Location: {location}\nDate: {date}\n" | |
| f"NDCI Mean: {ndci_mean:.3f} | Bloom Coverage: {bloom_pct:.1f}%\n\n" | |
| "Classification thresholds:\n" | |
| " CLEAR <1% | LOW 1–8% | MEDIUM 8–25% | HIGH >25% | CRITICAL if severe_pct >15%\n\n" | |
| "Analyze this Sentinel-2 NDCI colormap and issue an AlgaeGuard bloom assessment." | |
| ) | |
| messages = [ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| {"role": "user", "content": [ | |
| {"type": "image"}, | |
| {"type": "text", "text": user_text}, | |
| ]}, | |
| ] | |
| text = processor.apply_chat_template(messages, add_generation_prompt=True) | |
| inputs = processor(text=text, images=[pil_img], return_tensors="pt").to( | |
| next(model.parameters()).device | |
| ) | |
| with torch.no_grad(): | |
| out = model.generate(**inputs, max_new_tokens=350, do_sample=False, | |
| temperature=None, top_p=None) | |
| return processor.decode(out[0, inputs["input_ids"].shape[1]:], skip_special_tokens=True) | |
| if HAS_SPACES: | |
| run_vlm = spaces.GPU(_do_inference) | |
| else: | |
| run_vlm = _do_inference | |
| # ── SimSat fetch ─────────────────────────────────────────────────────────────── | |
| def _fetch_simsat(lat: float, lon: float, date: str) -> Optional[tuple[np.ndarray, dict]]: | |
| url = os.environ.get("SIMSAT_API_URL", "").rstrip("/") | |
| if not url: | |
| return None | |
| params = { | |
| "lat": lat, "lon": lon, | |
| "timestamp": f"{date}T12:00:00", | |
| "spectral_bands": ["red", "rededge", "nir", "green", "blue"], | |
| "size_km": 10.0, "return_type": "array", "window_seconds": 864000, | |
| } | |
| try: | |
| r = requests.get(f"{url}/data/image/sentinel", params=params, timeout=60) | |
| r.raise_for_status() | |
| data = r.json() | |
| except Exception as e: | |
| print(f"SimSat error: {e}") | |
| return None | |
| meta = data.get("sentinel_metadata", {}) | |
| if not meta.get("image_available"): | |
| return None | |
| img_block = data["image"] | |
| raw = base64.b64decode(img_block["image"]) | |
| shape = img_block["metadata"]["shape"] | |
| dtype = np.dtype(img_block["metadata"]["dtype"]) | |
| bands = np.frombuffer(raw, dtype=dtype).reshape(shape).astype(np.float32) | |
| bands = np.moveaxis(bands, 0, -1) # (H, W, 5) | |
| ndci_map = _ndci_from_bands(bands) | |
| return ndci_map, _classify_bloom(ndci_map) | |
| # ── HTML helpers ─────────────────────────────────────────────────────────────── | |
| def _alert_card(severity: str, bloom_pct: float, ndci_mean: float, ndci_max: float, | |
| location: str, date: str, vlm_text: str = "", source: str = "") -> str: | |
| c = SEVERITY_CFG[severity] | |
| vlm_block = ( | |
| f'<div style="margin-top:14px;background:#0d0d1a;border-radius:8px;padding:14px;' | |
| f'color:#ccc;white-space:pre-wrap;font-size:0.88em;line-height:1.6">' | |
| f'{vlm_text}</div>' | |
| ) if vlm_text else "" | |
| src_badge = ( | |
| f'<span style="font-size:0.75em;color:#888;margin-left:8px">[{source}]</span>' | |
| ) if source else "" | |
| return f""" | |
| <div style="background:#0a0a0f;border:1px solid {c['color']}55;border-radius:12px; | |
| padding:20px;font-family:'Courier New',monospace;color:#eee"> | |
| <div style="display:flex;justify-content:space-between;align-items:flex-start;margin-bottom:16px"> | |
| <div> | |
| <div style="color:#666;font-size:0.72em;text-transform:uppercase;letter-spacing:2px"> | |
| AlgaeGuard · Sentinel-2 Assessment</div> | |
| <div style="color:#fff;font-size:1.05em;margin-top:2px">{location}{src_badge}</div> | |
| <div style="color:#888;font-size:0.85em">{date}</div> | |
| </div> | |
| <div style="padding:10px 18px;border-radius:8px;background:{c['bg']}; | |
| border:2px solid {c['color']};font-size:1.4em;font-weight:bold; | |
| color:{c['color']};white-space:nowrap"> | |
| {c['emoji']} {severity} | |
| </div> | |
| </div> | |
| <div style="display:grid;grid-template-columns:1fr 1fr 1fr;gap:10px;margin-bottom:14px"> | |
| <div style="background:#111;border-radius:8px;padding:12px;text-align:center"> | |
| <div style="color:#666;font-size:0.72em;text-transform:uppercase">Bloom Coverage</div> | |
| <div style="color:{c['color']};font-size:1.9em;font-weight:bold;margin-top:4px">{bloom_pct:.1f}%</div> | |
| </div> | |
| <div style="background:#111;border-radius:8px;padding:12px;text-align:center"> | |
| <div style="color:#666;font-size:0.72em;text-transform:uppercase">NDCI Mean</div> | |
| <div style="color:#60a5fa;font-size:1.9em;font-weight:bold;margin-top:4px">{ndci_mean:.3f}</div> | |
| </div> | |
| <div style="background:#111;border-radius:8px;padding:12px;text-align:center"> | |
| <div style="color:#666;font-size:0.72em;text-transform:uppercase">NDCI Max</div> | |
| <div style="color:#60a5fa;font-size:1.9em;font-weight:bold;margin-top:4px">{ndci_max:.3f}</div> | |
| </div> | |
| </div> | |
| <div style="background:{c['bg']};border-left:4px solid {c['color']}; | |
| padding:10px 14px;border-radius:0 6px 6px 0"> | |
| <span style="color:{c['color']};font-weight:bold">Recommended Action: </span> | |
| <span style="color:#ddd">{c['action']}</span> | |
| </div> | |
| {vlm_block} | |
| </div>""" | |
| def _empty_alert() -> str: | |
| return """ | |
| <div style="background:#0a0a0f;border:1px solid #222;border-radius:12px;padding:40px; | |
| text-align:center;color:#555;font-family:'Courier New',monospace"> | |
| Select a location and date, then click <b style="color:#888">Load</b> to see the | |
| spectral assessment or <b style="color:#888">Run VLM</b> for the full AI report. | |
| </div>""" | |
| # ── Tab 1: Live Demo ─────────────────────────────────────────────────────────── | |
| def _get_dates(location_key: str) -> list[str]: | |
| return [e["date"] for e in TIMESERIES.get(location_key, [])] | |
| def tab1_load(location_key: str, date: str): | |
| """Load preset — no model, instant.""" | |
| entries = TIMESERIES.get(location_key, []) | |
| entry = next((e for e in entries if e["date"] == date), None) | |
| if entry is None: | |
| return None, _empty_alert() | |
| img_path = EXAMPLES_DIR / entry["image"] | |
| pil_img = Image.open(img_path).convert("RGB") if img_path.exists() else None | |
| loc = LOC_DISPLAY.get(location_key, location_key) | |
| alert = _alert_card(entry["severity"], entry["bloom_pct"], | |
| entry["ndci_mean"], entry["ndci_max"], loc, date, | |
| source="Spectral only") | |
| return pil_img, alert | |
| def tab1_run_vlm(location_key: str, date: str): | |
| """Load preset + run VLM inference.""" | |
| entries = TIMESERIES.get(location_key, []) | |
| entry = next((e for e in entries if e["date"] == date), None) | |
| if entry is None: | |
| return None, "<p style='color:#f87171'>No data for this date.</p>" | |
| img_path = EXAMPLES_DIR / entry["image"] | |
| if not img_path.exists(): | |
| return None, "<p style='color:#f87171'>Example image not found.</p>" | |
| pil_img = Image.open(img_path).convert("RGB") | |
| loc = LOC_DISPLAY.get(location_key, location_key) | |
| vlm_text = run_vlm(pil_img, loc, date, entry["ndci_mean"], entry["bloom_pct"]) | |
| alert = _alert_card(entry["severity"], entry["bloom_pct"], | |
| entry["ndci_mean"], entry["ndci_max"], loc, date, | |
| vlm_text, source="AlgaeGuard VLM") | |
| return pil_img, alert | |
| def tab1_update_dates(location_key: str): | |
| dates = _get_dates(location_key) | |
| return gr.Dropdown(choices=dates, value=dates[-1] if dates else None) | |
| # ── Tab 2: Historical Timeline ───────────────────────────────────────────────── | |
| _SEV_COLORS = { | |
| "CLEAR": "#4ade80", "LOW": "#a3e635", "MEDIUM": "#facc15", | |
| "HIGH": "#f97316", "CRITICAL": "#ef4444", | |
| } | |
| def tab2_build_chart(location_key: str): | |
| entries = TIMESERIES.get(location_key, []) | |
| if not entries: | |
| return go.Figure() | |
| dates = [e["date"] for e in entries] | |
| bloom_pcts = [e["bloom_pct"] for e in entries] | |
| severities = [e["severity"] for e in entries] | |
| ndci_means = [e["ndci_mean"] for e in entries] | |
| fig = go.Figure() | |
| # Background area fill | |
| fig.add_trace(go.Scatter( | |
| x=dates, y=bloom_pcts, fill="tozeroy", | |
| fillcolor="rgba(96,165,250,0.06)", line=dict(color="#60a5fa", width=1.5), | |
| mode="lines", name="Bloom %", showlegend=False, | |
| )) | |
| # Per-severity markers | |
| for sev in SEVERITY_ORDER: | |
| idx = [i for i, s in enumerate(severities) if s == sev] | |
| if not idx: | |
| continue | |
| fig.add_trace(go.Scatter( | |
| x=[dates[i] for i in idx], | |
| y=[bloom_pcts[i] for i in idx], | |
| mode="markers", name=sev, | |
| marker=dict(color=_SEV_COLORS[sev], size=9, symbol="circle", | |
| line=dict(color="#000", width=0.5)), | |
| text=[f"<b>{sev}</b><br>{dates[i]}<br>Bloom: {bloom_pcts[i]:.1f}%<br>NDCI: {ndci_means[i]:.3f}" | |
| for i in idx], | |
| hovertemplate="%{text}<extra></extra>", | |
| )) | |
| # Severity threshold lines | |
| thresholds = [ | |
| (1, "#a3e635", "LOW"), | |
| (8, "#facc15", "MEDIUM"), | |
| (25, "#f97316", "HIGH"), | |
| ] | |
| for y, color, label in thresholds: | |
| fig.add_hline(y=y, line=dict(color=color, dash="dot", width=1), | |
| annotation=dict(text=label, font=dict(color=color, size=10), | |
| bgcolor="#0a0a0f", x=1.0)) | |
| # Real-world crisis markers | |
| events = [ev for ev in REAL_WORLD_EVENTS if ev["location"] == location_key] | |
| for ev in events: | |
| fig.add_vline( | |
| x=ev["date"], line=dict(color="#ef4444", dash="dash", width=1.5), | |
| annotation=dict(text=f"⚠ {ev['label']}", textangle=-90, | |
| font=dict(color="#ef4444", size=10), bgcolor="#0a0a0f"), | |
| ) | |
| loc = LOC_DISPLAY.get(location_key, location_key) | |
| fig.update_layout( | |
| title=dict(text=f"HAB Timeline — {loc}", font=dict(color="#e2e8f0", size=15)), | |
| xaxis=dict(title="Date", gridcolor="#1e1e2e", tickformat="%b %Y", | |
| tickfont=dict(color="#888")), | |
| yaxis=dict(title="Bloom Coverage (%)", gridcolor="#1e1e2e", tickfont=dict(color="#888")), | |
| paper_bgcolor="#0a0a0f", plot_bgcolor="#0d0d1a", | |
| font=dict(color="#ccc", family="Courier New"), | |
| legend=dict(bgcolor="#0d0d1a", bordercolor="#333", x=0, y=1), | |
| hovermode="closest", margin=dict(r=80), | |
| ) | |
| return fig | |
| def tab2_events_html(location_key: str) -> str: | |
| events = [ev for ev in REAL_WORLD_EVENTS if ev["location"] == location_key] | |
| if not events: | |
| return "<p style='color:#555'>No documented crises for this location in our dataset.</p>" | |
| rows = "" | |
| for ev in events: | |
| c = SEVERITY_CFG[ev["severity"]] | |
| rows += f""" | |
| <tr> | |
| <td style="color:#888;padding:8px 12px;border-bottom:1px solid #1e1e2e">{ev['date']}</td> | |
| <td style="padding:8px 12px;border-bottom:1px solid #1e1e2e"> | |
| <span style="color:{c['color']};font-weight:bold">{ev['label']}</span> | |
| </td> | |
| <td style="color:#aaa;padding:8px 12px;border-bottom:1px solid #1e1e2e;font-size:0.9em">{ev['desc']}</td> | |
| </tr>""" | |
| return f""" | |
| <table style="width:100%;border-collapse:collapse;font-family:'Courier New',monospace; | |
| background:#0d0d1a;border-radius:8px;overflow:hidden"> | |
| <thead> | |
| <tr style="background:#151520"> | |
| <th style="color:#666;text-align:left;padding:10px 12px;font-size:0.8em;text-transform:uppercase">Date</th> | |
| <th style="color:#666;text-align:left;padding:10px 12px;font-size:0.8em;text-transform:uppercase">Event</th> | |
| <th style="color:#666;text-align:left;padding:10px 12px;font-size:0.8em;text-transform:uppercase">Impact</th> | |
| </tr> | |
| </thead> | |
| <tbody>{rows}</tbody> | |
| </table>""" | |
| def tab2_update(location_key: str): | |
| return tab2_build_chart(location_key), tab2_events_html(location_key) | |
| # ── Tab 3: Custom Inference ──────────────────────────────────────────────────── | |
| def _geocode(name: str) -> tuple[Optional[float], Optional[float], str]: | |
| try: | |
| from geopy.geocoders import Nominatim | |
| geo = Nominatim(user_agent="algaeguard-hab") | |
| result = geo.geocode(name, timeout=10) | |
| if result: | |
| return result.latitude, result.longitude, result.address | |
| except Exception: | |
| pass | |
| return None, None, "Could not geocode — try entering coordinates directly" | |
| def _folium_map(lat: float, lon: float, zoom: int = 4) -> str: | |
| import folium | |
| m = folium.Map(location=[lat, lon], zoom_start=zoom, tiles="CartoDB dark_matter", | |
| width="100%", height=300) | |
| folium.Marker( | |
| [lat, lon], | |
| popup=f"<b>Target</b><br>{lat:.4f}°, {lon:.4f}°", | |
| tooltip="Target location", | |
| icon=folium.Icon(color="red", icon="exclamation-sign"), | |
| ).add_to(m) | |
| for loc in HAB_LOCATIONS: | |
| folium.CircleMarker( | |
| [loc["lat"], loc["lon"]], radius=6, | |
| color="#60a5fa", fill=True, fill_opacity=0.35, | |
| tooltip=loc["display"], | |
| ).add_to(m) | |
| return m._repr_html_() | |
| def tab3_geocode(location_name: str): | |
| if not location_name.strip(): | |
| return "", "", "Enter a location name to geocode" | |
| lat, lon, addr = _geocode(location_name.strip()) | |
| if lat is None: | |
| return "", "", addr | |
| return str(round(lat, 4)), str(round(lon, 4)), addr | |
| def tab3_infer(image_input, location_name: str, lat_str: str, lon_str: str, date: str): | |
| pil_img = None | |
| ndci_map = None | |
| stats = {} | |
| lat, lon = None, None | |
| # Resolve coordinates | |
| if lat_str and lon_str: | |
| try: | |
| lat, lon = float(lat_str), float(lon_str) | |
| except ValueError: | |
| pass | |
| # Determine location display name | |
| loc_display = location_name.strip() or (f"{lat:.4f}°N, {lon:.4f}°E" if lat else "Unknown") | |
| date_str = date or datetime.now().strftime("%Y-%m-%d") | |
| # Path A: SimSat live fetch (requires SIMSAT_API_URL env var) | |
| simsat_available = bool(os.environ.get("SIMSAT_API_URL")) | |
| source = "" | |
| map_html = "" | |
| if lat and lon and not image_input and simsat_available: | |
| result = _fetch_simsat(lat, lon, date_str) | |
| if result: | |
| ndci_map, stats = result | |
| cmap_bgr = _ndci_to_colormap(ndci_map) | |
| pil_img = Image.fromarray(cv2.cvtColor(cmap_bgr, cv2.COLOR_BGR2RGB)) | |
| source = "SimSat · Sentinel-2" | |
| else: | |
| source = "SimSat — no image available for this date/location" | |
| # Path B: uploaded image | |
| if image_input is not None: | |
| if isinstance(image_input, np.ndarray): | |
| pil_img = Image.fromarray(image_input).convert("RGB") | |
| else: | |
| pil_img = image_input.convert("RGB") | |
| source = "Uploaded NDCI colormap" | |
| # Build map | |
| if lat and lon: | |
| try: | |
| map_html = _folium_map(lat, lon) | |
| except Exception: | |
| map_html = "" | |
| if pil_img is None: | |
| msg = ( | |
| "<p style='color:#f87171'>No image available.<br>" | |
| + ("Upload an NDCI colormap image." if not simsat_available | |
| else "Upload an NDCI colormap or provide coordinates + date.") | |
| + "</p>" | |
| ) | |
| return None, map_html, msg | |
| # VLM inference | |
| ndci_mean = stats.get("ndci_mean", 0.0) | |
| bloom_pct = stats.get("bloom_pct", 0.0) | |
| severity = stats.get("severity", "MEDIUM") | |
| ndci_max = stats.get("ndci_max", 0.0) | |
| vlm_text = run_vlm(pil_img, loc_display, date_str, ndci_mean, bloom_pct) | |
| # Re-parse severity from VLM output if we don't have spectral stats | |
| if not stats: | |
| for sev in ["CRITICAL", "HIGH", "MEDIUM", "LOW", "CLEAR"]: | |
| if sev in vlm_text.upper(): | |
| severity = sev | |
| break | |
| alert_html = _alert_card(severity, bloom_pct, ndci_mean, ndci_max, | |
| loc_display, date_str, vlm_text, source) | |
| return pil_img, map_html, alert_html | |
| # ── CSS ──────────────────────────────────────────────────────────────────────── | |
| CSS = """ | |
| body, .gradio-container { background: #080810 !important; } | |
| .tab-nav button { font-family: 'Courier New', monospace !important; font-size: 0.95em !important; } | |
| .tab-nav button.selected { color: #60a5fa !important; border-color: #60a5fa !important; } | |
| #header { text-align: center; padding: 24px 0 8px; } | |
| #header h1 { font-family: 'Courier New', monospace; font-size: 2.2em; | |
| background: linear-gradient(135deg, #60a5fa, #34d399); | |
| -webkit-background-clip: text; -webkit-text-fill-color: transparent; margin: 0; } | |
| #header p { color: #666; font-family: 'Courier New', monospace; font-size: 0.85em; margin: 6px 0 0; } | |
| .gr-button-primary { background: #1d4ed8 !important; border: none !important; } | |
| .gr-button-secondary { background: #1e1e2e !important; border: 1px solid #333 !important; } | |
| """ | |
| # ── Build UI ─────────────────────────────────────────────────────────────────── | |
| SIMSAT_LIVE = bool(os.environ.get("SIMSAT_API_URL")) | |
| LOC_CHOICES = [(l["display"], l["name"]) for l in HAB_LOCATIONS] | |
| with gr.Blocks(theme=gr.themes.Base( | |
| primary_hue="blue", neutral_hue="slate", | |
| font=[gr.themes.GoogleFont("Inter"), "monospace"], | |
| ), css=CSS, title="AlgaeGuard — HAB Detection from Orbit") as demo: | |
| gr.HTML(""" | |
| <div id="header"> | |
| <h1>🛰️ AlgaeGuard</h1> | |
| <p>Autonomous Harmful Algal Bloom Detection from Orbit · LFM2.5-VL-450M fine-tuned on Sentinel-2 NDCI imagery</p> | |
| </div>""") | |
| with gr.Tabs(): | |
| # ── Tab 1: Live Demo ─────────────────────────────────────────────────── | |
| with gr.Tab("🛰️ Live Demo"): | |
| gr.Markdown( | |
| "Select a monitored water body and date. **Load** shows spectral stats instantly. " | |
| "**Run VLM** loads the fine-tuned model and generates the full operator report (~30–45s on CPU, ~4s on GPU)." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| t1_loc = gr.Dropdown(choices=LOC_CHOICES, value="lake_erie", | |
| label="Water Body", interactive=True) | |
| t1_date = gr.Dropdown(choices=_get_dates("lake_erie"), | |
| value=_get_dates("lake_erie")[-1], | |
| label="Date", interactive=True) | |
| with gr.Row(): | |
| t1_load_btn = gr.Button("Load", variant="secondary") | |
| t1_vlm_btn = gr.Button("⚡ Run VLM", variant="primary") | |
| t1_img = gr.Image(label="NDCI Colormap (JET)", height=360) | |
| with gr.Column(scale=2): | |
| t1_alert = gr.HTML(value=_empty_alert()) | |
| t1_loc.change(tab1_update_dates, t1_loc, t1_date) | |
| t1_load_btn.click(tab1_load, [t1_loc, t1_date], [t1_img, t1_alert]) | |
| t1_vlm_btn.click(tab1_run_vlm, [t1_loc, t1_date], [t1_img, t1_alert]) | |
| # Pre-load first example on startup | |
| demo.load(tab1_load, | |
| inputs=[gr.State("lake_erie"), gr.State(_get_dates("lake_erie")[-1])], | |
| outputs=[t1_img, t1_alert]) | |
| # ── Tab 2: Historical HAB Timeline ───────────────────────────────────── | |
| with gr.Tab("📈 HAB Timeline"): | |
| gr.Markdown( | |
| "218 real Sentinel-2 observations across 6 water bodies (2022–2024). " | |
| "Red dashed lines mark documented crisis events." | |
| ) | |
| t2_loc = gr.Dropdown(choices=LOC_CHOICES, value="lake_erie", | |
| label="Water Body", interactive=True) | |
| t2_chart = gr.Plot(label="Bloom Coverage Over Time") | |
| t2_events = gr.HTML() | |
| t2_loc.change(tab2_update, t2_loc, [t2_chart, t2_events]) | |
| demo.load(tab2_update, | |
| inputs=gr.State("lake_erie"), | |
| outputs=[t2_chart, t2_events]) | |
| # ── Tab 3: Custom Inference ───────────────────────────────────────────── | |
| with gr.Tab("🔍 Custom Inference"): | |
| gr.Markdown( | |
| "**Path A — Upload** an NDCI colormap PNG and run inference directly.\n\n" | |
| + ("**Path B — Live satellite fetch:** Enter a location + date. " | |
| "AlgaeGuard will pull Sentinel-2 bands from SimSat, compute NDCI, " | |
| "and run the VLM automatically." | |
| if SIMSAT_LIVE else | |
| "**Path B (SimSat):** Set the `SIMSAT_API_URL` Space secret to enable " | |
| "live satellite fetch for any coordinates on Earth.") | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| t3_img_upload = gr.Image(label="Upload NDCI Colormap (optional)", type="pil", | |
| height=260) | |
| gr.Markdown("**— or enter location —**") | |
| t3_loc_name = gr.Textbox(label="Location name", placeholder="e.g. Lake Balaton, Hungary") | |
| with gr.Row(): | |
| t3_lat = gr.Textbox(label="Latitude", placeholder="41.66") | |
| t3_lon = gr.Textbox(label="Longitude", placeholder="-83.55") | |
| t3_geo_btn = gr.Button("Geocode →", variant="secondary", size="sm") | |
| t3_geo_msg = gr.Textbox(label="Resolved address", interactive=False, lines=1) | |
| t3_date = gr.Textbox(label="Date (YYYY-MM-DD)", | |
| value=datetime.now().strftime("%Y-%m-%d")) | |
| t3_run_btn = gr.Button("⚡ Run AlgaeGuard", variant="primary") | |
| with gr.Column(scale=2): | |
| t3_map = gr.HTML(label="Location map") | |
| t3_result_img = gr.Image(label="NDCI Colormap", height=220, visible=True) | |
| t3_alert = gr.HTML() | |
| t3_geo_btn.click(tab3_geocode, [t3_loc_name], [t3_lat, t3_lon, t3_geo_msg]) | |
| t3_run_btn.click(tab3_infer, | |
| [t3_img_upload, t3_loc_name, t3_lat, t3_lon, t3_date], | |
| [t3_result_img, t3_map, t3_alert]) | |
| # ── Tab 4: About ──────────────────────────────────────────────────────── | |
| with gr.Tab("ℹ️ About"): | |
| gr.Markdown(f""" | |
| ## AlgaeGuard — Autonomous HAB Detection from Orbit | |
| **AI in Space Hackathon** · Liquid AI Challenge · DPhi Space Track | |
| ### The Problem | |
| The 2014 Toledo water crisis left 400,000 people without safe drinking water. | |
| Sentinel-2 NDCI data shows the bloom forming and intensifying on Lake Erie for | |
| **two full weeks before** that crisis. The data existed. The detection didn't happen | |
| in time because every existing pipeline routes raw imagery to the ground, queues it | |
| for analyst review, and produces a report 24–72 hours later. | |
| AlgaeGuard solves this by running inference **on-board**: satellite overpass → NDCI | |
| computation → VLM classification → 200-byte JSON alert downlinked to water utility ops. | |
| One orbit pass, one alert, latency under 90 minutes. | |
| ### Architecture | |
| ``` | |
| Satellite Overpass → Band Extraction → NDCI Colormap → LFM2.5-VL-450M → Alert | |
| SimSat polling B04/B05/B08/B03 JET 512×512 LoRA SFT JSON | |
| (T+0 min) spectral indices (T+10 min) on-board (T+90 min) | |
| ``` | |
| ### Model Performance (v2) | |
| | Metric | v1 | v2 | | |
| |--------|----|----| | |
| | Holdout Accuracy | 66.7% | **76.9%** (20/26) | | |
| | Eval Loss | 0.467 | **0.066** | | |
| | MEDIUM F1 | 0.00 | **0.87** | | |
| | CLEAR↔CRITICAL errors | present | **0** | | |
| All 6 errors are adjacent-class (e.g. HIGH predicted as MEDIUM). | |
| Zero CLEAR↔CRITICAL confusions across the holdout set. | |
| ### Published Artifacts | |
| | Artifact | Link | | |
| |----------|------| | |
| | Fine-tuned model | [debrajsingha/algaeguard-lfm2-5-vl-450m](https://huggingface.co/debrajsingha/algaeguard-lfm2-5-vl-450m) · 856MB · CC BY 4.0 | | |
| | Training dataset | [debrajsingha/algaeguard-hab-ndci](https://huggingface.co/datasets/debrajsingha/algaeguard-hab-ndci) · 398 samples | | |
| | Source code | [github.com/debpks/algaeguard-llm](https://github.com/debpks/algaeguard-llm) | | |
| | SimSat | [github.com/debpks/SimSat](https://github.com/debpks/SimSat) | | |
| ### Stack | |
| | Component | Technology | | |
| |-----------|-----------| | |
| | Satellite imagery | DPhi Space SimSat (Sentinel-2 simulation) | | |
| | Spectral indices | NumPy — NDCI, FAI, NDWI | | |
| | Base VLM | Liquid AI LFM2.5-VL-450M | | |
| | Fine-tuning | Liquid AI LEAP SDK (LoRA r=16, 15 epochs) | | |
| | Training compute | Modal A10G GPU | | |
| | Monitoring | WandB — `algaeguard_hab_detection` | | |
| """) | |
| demo.launch() | |