Spaces:

SolarWine
/

api

Running

App Files Files Community

Eli Safra commited on Mar 18

Commit

938949f

1 Parent(s): a04833b

Deploy SolarWine API (FastAPI + Docker, port 7860)

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Dockerfile +27 -0
README.md +22 -7
backend/__init__.py +0 -0
backend/api/__init__.py +0 -0
backend/api/auth.py +120 -0
backend/api/deps.py +21 -0
backend/api/main.py +120 -0
backend/api/routes/__init__.py +0 -0
backend/api/routes/biology.py +38 -0
backend/api/routes/chatbot.py +79 -0
backend/api/routes/control.py +77 -0
backend/api/routes/energy.py +45 -0
backend/api/routes/health.py +49 -0
backend/api/routes/login.py +58 -0
backend/api/routes/photosynthesis.py +30 -0
backend/api/routes/sensors.py +47 -0
backend/api/routes/weather.py +48 -0
backend/requirements.txt +7 -0
backend/workers/__init__.py +0 -0
backend/workers/control_tick.py +124 -0
backend/workers/daily_planner.py +72 -0
config/settings.py +204 -0
requirements.txt +18 -0
src/__init__.py +52 -0
src/advisor/__init__.py +1 -0
src/advisor/day_ahead_advisor.py +632 -0
src/advisor/safety_rails.py +179 -0
src/baseline_predictor.py +248 -0
src/canopy_photosynthesis.py +2 -0
src/chatbot/__init__.py +1 -0
src/chatbot/feedback.py +104 -0
src/chatbot/guardrails.py +363 -0
src/chatbot/llm_data_engineer.py +559 -0
src/chatbot/routing_agent.py +233 -0
src/chatbot/vineyard_chatbot.py +939 -0
src/chronos_forecaster.py +2 -0
src/command_arbiter.py +327 -0
src/control_loop.py +779 -0
src/data/__init__.py +1 -0
src/data/data_providers.py +1180 -0
src/data/data_schema.py +519 -0
src/data/ims_client.py +215 -0
src/data/redis_cache.py +152 -0
src/data/sensor_data_loader.py +87 -0
src/data/thingsboard_client.py +1058 -0
src/data_providers.py +2 -0
src/data_schema.py +2 -0
src/day_ahead_advisor.py +2 -0
src/day_ahead_planner.py +580 -0
src/energy_budget.py +309 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+FROM python:3.12-slim
+WORKDIR /app
+# Install dependencies first (layer caching)
+COPY requirements.txt .
+COPY backend/requirements.txt backend/
+RUN pip install --no-cache-dir -r requirements.txt -r backend/requirements.txt
+# Non-root user for security
+RUN groupadd -r solarwine && useradd -r -g solarwine solarwine
+# Copy application code (no Data/ — mount or fetch at runtime)
+COPY src/ src/
+COPY config/ config/
+COPY backend/ backend/
+ENV PYTHONPATH=/app
+# Switch to non-root
+USER solarwine
+# HuggingFace Spaces requires port 7860
+EXPOSE 7860
+HEALTHCHECK --interval=30s --timeout=5s --retries=3 \
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7860/api/health')" || exit 1
+CMD ["uvicorn", "backend.api.main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,11 +1,26 @@
 ---
-title: Api
-emoji: 👁
-colorFrom: indigo
-colorTo: pink
 sdk: docker
-pinned: false
-license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: SolarWine API
+emoji: 🌿
+colorFrom: green
+colorTo: yellow
 sdk: docker
+app_port: 7860
+private: true
 ---
+# SolarWine API
+FastAPI backend for the SolarWine agrivoltaic vineyard control system.
+## Endpoints
+- `GET /api/health` — health check
+- `GET /api/weather/current` — current weather (IMS station 43)
+- `GET /api/sensors/snapshot` — vine sensor readings (ThingsBoard)
+- `GET /api/energy/current` — current power output
+- `GET /api/photosynthesis/current` — photosynthesis rate (FvCB/ML)
+- `GET /api/control/status` — last control loop tick
+- `POST /api/chatbot/message` — AI vineyard advisor
+- `GET /api/biology/rules` — biology rules
+Interactive docs at `/docs`.

backend/__init__.py ADDED Viewed

File without changes

backend/api/__init__.py ADDED Viewed

File without changes

backend/api/auth.py ADDED Viewed

	@@ -0,0 +1,120 @@

+"""
+JWT authentication for the SolarWine API.
+Initially optional — endpoints work without auth.
+Enable by setting JWT_SECRET in environment.
+Usage in routes::
+    from backend.api.auth import require_auth
+    @router.get("/protected")
+    async def protected(user: dict = Depends(require_auth)):
+        return {"user": user}
+"""
+from __future__ import annotations
+import os
+import time
+import logging
+from typing import Optional
+from fastapi import Depends, HTTPException, status
+from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
+log = logging.getLogger(__name__)
+_security = HTTPBearer(auto_error=False)
+JWT_SECRET = os.environ.get("JWT_SECRET", "")
+JWT_ALGORITHM = "HS256"
+JWT_EXPIRY_HOURS = 24
+if not JWT_SECRET:
+    log.warning("JWT_SECRET not set — authentication is DISABLED (all requests get guest/admin access)")
+def _get_jwt():
+    """Lazy import PyJWT."""
+    try:
+        import jwt
+        return jwt
+    except ImportError:
+        log.warning("PyJWT not installed — auth disabled")
+        return None
+# ---------------------------------------------------------------------------
+# Token creation
+# ---------------------------------------------------------------------------
+def create_token(username: str, role: str = "user") -> Optional[str]:
+    """Create a signed JWT token."""
+    jwt = _get_jwt()
+    if not jwt or not JWT_SECRET:
+        return None
+    payload = {
+        "sub": username,
+        "role": role,
+        "iat": int(time.time()),
+        "exp": int(time.time()) + JWT_EXPIRY_HOURS * 3600,
+    }
+    return jwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM)
+# ---------------------------------------------------------------------------
+# Token validation (FastAPI dependency)
+# ---------------------------------------------------------------------------
+async def require_auth(
+    credentials: Optional[HTTPAuthorizationCredentials] = Depends(_security),
+) -> dict:
+    """Validate JWT and return payload. Raises 401 if invalid.
+    When JWT_SECRET is not set, auth is bypassed (returns guest user).
+    """
+    # Auth disabled — allow all
+    if not JWT_SECRET:
+        return {"sub": "guest", "role": "admin"}
+    if not credentials:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Missing authorization header",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
+    jwt = _get_jwt()
+    if not jwt:
+        return {"sub": "guest", "role": "admin"}
+    try:
+        payload = jwt.decode(
+            credentials.credentials,
+            JWT_SECRET,
+            algorithms=[JWT_ALGORITHM],
+        )
+        return payload
+    except jwt.ExpiredSignatureError:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Token expired",
+        )
+    except jwt.InvalidTokenError:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Invalid token",
+        )
+async def optional_auth(
+    credentials: Optional[HTTPAuthorizationCredentials] = Depends(_security),
+) -> dict:
+    """Like require_auth but returns guest if no token provided or invalid."""
+    if not credentials or not JWT_SECRET:
+        return {"sub": "guest", "role": "guest"}
+    try:
+        return await require_auth(credentials)
+    except HTTPException:
+        return {"sub": "guest", "role": "guest"}

backend/api/deps.py ADDED Viewed

	@@ -0,0 +1,21 @@

+"""
+Shared FastAPI dependencies — DataHub singleton + Redis.
+"""
+from __future__ import annotations
+from functools import lru_cache
+from src.data.data_providers import DataHub
+from src.data.redis_cache import get_redis
+@lru_cache(maxsize=1)
+def get_datahub() -> DataHub:
+    """Return a singleton DataHub (all services with Redis-backed caches)."""
+    return DataHub.default()
+def get_redis_client():
+    """Return the Redis client (or None)."""
+    return get_redis()

backend/api/main.py ADDED Viewed

	@@ -0,0 +1,120 @@

+"""
+FastAPI application — SolarWine API Gateway.
+Deployed on HuggingFace Spaces (Docker SDK, port 7860).
+"""
+from __future__ import annotations
+import logging
+import os
+import time
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, Request, Response
+from fastapi.middleware.cors import CORSMiddleware
+from slowapi import Limiter, _rate_limit_exceeded_handler
+from slowapi.errors import RateLimitExceeded
+from slowapi.util import get_remote_address
+from backend.api.routes import health, weather, sensors, energy, photosynthesis, control, chatbot, biology, login
+# ---------------------------------------------------------------------------
+# Structured logging
+# ---------------------------------------------------------------------------
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+)
+log = logging.getLogger("solarwine.api")
+# ---------------------------------------------------------------------------
+# Sentry (optional — set SENTRY_DSN env var to enable)
+# ---------------------------------------------------------------------------
+_sentry_dsn = os.environ.get("SENTRY_DSN", "")
+if _sentry_dsn:
+    try:
+        import sentry_sdk
+        from sentry_sdk.integrations.fastapi import FastApiIntegration
+        from sentry_sdk.integrations.starlette import StarletteIntegration
+        sentry_sdk.init(
+            dsn=_sentry_dsn,
+            integrations=[StarletteIntegration(), FastApiIntegration()],
+            traces_sample_rate=0.1,
+            environment=os.environ.get("SENTRY_ENV", "production"),
+        )
+        log.info("Sentry enabled (env=%s)", os.environ.get("SENTRY_ENV", "production"))
+    except ImportError:
+        log.warning("SENTRY_DSN set but sentry-sdk not installed — skipping")
+# ---------------------------------------------------------------------------
+# Lifespan — one-time startup / shutdown
+# ---------------------------------------------------------------------------
+_start_time: float = 0.0
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global _start_time
+    _start_time = time.time()
+    log.info("SolarWine API starting (port 7860)")
+    yield
+    log.info("SolarWine API shutting down (uptime=%.0fs)", get_uptime())
+def get_uptime() -> float:
+    return time.time() - _start_time
+# ---------------------------------------------------------------------------
+# App
+# ---------------------------------------------------------------------------
+limiter = Limiter(key_func=get_remote_address, default_limits=["60/minute"])
+app = FastAPI(
+    title="SolarWine API",
+    version="0.1.0",
+    description="Agrivoltaic vineyard control system API",
+    lifespan=lifespan,
+)
+app.state.limiter = limiter
+app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+# -- Request logging --------------------------------------------------------
+@app.middleware("http")
+async def log_requests(request: Request, call_next):
+    start = time.time()
+    response = await call_next(request)
+    duration = (time.time() - start) * 1000
+    if request.url.path != "/api/health":  # skip noisy health checks
+        log.info("%s %s %d %.0fms", request.method, request.url.path, response.status_code, duration)
+    return response
+# -- CORS -------------------------------------------------------------------
+allowed_origins = os.environ.get("ALLOWED_ORIGINS", "http://localhost:3000,http://localhost:5173").split(",")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=[o.strip() for o in allowed_origins],
+    allow_credentials=True,
+    allow_methods=["GET", "POST", "OPTIONS"],
+    allow_headers=["Content-Type", "Authorization"],
+)
+# -- Routes -----------------------------------------------------------------
+app.include_router(health.router, prefix="/api", tags=["health"])
+app.include_router(weather.router, prefix="/api/weather", tags=["weather"])
+app.include_router(sensors.router, prefix="/api/sensors", tags=["sensors"])
+app.include_router(energy.router, prefix="/api/energy", tags=["energy"])
+app.include_router(photosynthesis.router, prefix="/api/photosynthesis", tags=["photosynthesis"])
+app.include_router(control.router, prefix="/api/control", tags=["control"])
+app.include_router(chatbot.router, prefix="/api/chatbot", tags=["chatbot"])
+app.include_router(biology.router, prefix="/api/biology", tags=["biology"])
+app.include_router(login.router, prefix="/api/auth", tags=["auth"])

backend/api/routes/__init__.py ADDED Viewed

File without changes

backend/api/routes/biology.py ADDED Viewed

	@@ -0,0 +1,38 @@

+"""Biology endpoints — wraps BiologyService."""
+from __future__ import annotations
+import logging
+from fastapi import APIRouter, Depends
+from fastapi.responses import JSONResponse
+from backend.api.deps import get_datahub
+from src.data.data_providers import DataHub
+log = logging.getLogger(__name__)
+router = APIRouter()
+@router.get("/phenology")
+async def phenology(hub: DataHub = Depends(get_datahub)):
+    """Current phenological stage (GDD-based)."""
+    try:
+        from src.models.phenology import estimate_stage_combined
+        stage = estimate_stage_combined()
+        return {"stage": stage.name if hasattr(stage, "name") else str(stage)}
+    except Exception as exc:
+        log.error("Phenology estimation failed: %s", exc)
+        return JSONResponse(status_code=500, content={"error": "Phenology estimation failed"})
+@router.get("/rules")
+async def biology_rules(hub: DataHub = Depends(get_datahub)):
+    """List all biology rules."""
+    return hub.biology.list_rules()
+@router.get("/rules/{rule_name}")
+async def biology_rule_detail(rule_name: str, hub: DataHub = Depends(get_datahub)):
+    """Explain a specific biology rule."""
+    return hub.biology.explain_rule(rule_name)

backend/api/routes/chatbot.py ADDED Viewed

	@@ -0,0 +1,79 @@

+"""Chatbot endpoints — wraps VineyardChatbot."""
+from __future__ import annotations
+import logging
+import threading
+from pydantic import BaseModel, Field
+from fastapi import APIRouter, Depends, Request
+from fastapi.responses import JSONResponse
+from slowapi import Limiter
+from slowapi.util import get_remote_address
+from backend.api.deps import get_datahub
+from src.data.data_providers import DataHub
+log = logging.getLogger(__name__)
+limiter = Limiter(key_func=get_remote_address)
+router = APIRouter()
+class ChatRequest(BaseModel):
+    message: str = Field(..., min_length=1, max_length=4000)
+    session_id: str = "default"
+class FeedbackRequest(BaseModel):
+    session_id: str
+    message_id: str
+    rating: str = Field(..., pattern=r"^(up|down|flag)$")
+    comment: str = Field("", max_length=2000)
+# Thread-safe lazy chatbot init
+_chatbot = None
+_chatbot_lock = threading.Lock()
+def _get_chatbot(hub: DataHub):
+    global _chatbot
+    if _chatbot is not None:
+        return _chatbot
+    with _chatbot_lock:
+        if _chatbot is None:
+            from src.chatbot.vineyard_chatbot import VineyardChatbot
+            _chatbot = VineyardChatbot(hub=hub)
+    return _chatbot
+@router.post("/message")
+@limiter.limit("10/minute")
+async def chat_message(request: Request, req: ChatRequest, hub: DataHub = Depends(get_datahub)):
+    bot = _get_chatbot(hub)
+    response = bot.chat(req.message)
+    return {
+        "message": response.message,
+        "confidence": getattr(response, "confidence", None),
+        "sources": getattr(response, "sources", []),
+        "caveats": getattr(response, "caveats", []),
+        "rule_violations": getattr(response, "rule_violations", []),
+        "response_mode": getattr(response, "response_mode", "info"),
+    }
+@router.post("/feedback")
+@limiter.limit("60/minute")
+async def chat_feedback(request: Request, req: FeedbackRequest):
+    try:
+        from src.chatbot.feedback import log_feedback
+        log_feedback(
+            session_id=req.session_id,
+            message_id=req.message_id,
+            rating=req.rating,
+            comment=req.comment,
+        )
+        return {"status": "ok"}
+    except Exception as exc:
+        log.error("Feedback logging failed: %s", exc)
+        return JSONResponse(status_code=500, content={"error": "Feedback logging failed"})

backend/api/routes/control.py ADDED Viewed

	@@ -0,0 +1,77 @@

+"""Control system endpoints — reads state from Redis."""
+from __future__ import annotations
+import logging
+from fastapi import APIRouter
+from fastapi.responses import JSONResponse
+from backend.api.deps import get_datahub, get_redis_client
+log = logging.getLogger(__name__)
+router = APIRouter()
+@router.get("/status")
+async def control_status():
+    """Last ControlLoop tick result (stored in Redis by the worker)."""
+    redis = get_redis_client()
+    if redis:
+        data = redis.get_json("control:last_tick")
+        if data:
+            return data
+    return JSONResponse(
+        status_code=503,
+        content={"error": "No tick result available (worker may not have run yet)"},
+    )
+@router.get("/plan")
+async def control_plan():
+    """Current day-ahead plan."""
+    redis = get_redis_client()
+    if redis:
+        data = redis.get_json("control:plan")
+        if data:
+            return data
+    # Fallback: try loading from file
+    try:
+        import json
+        from config.settings import DAILY_PLAN_PATH
+        with open(DAILY_PLAN_PATH) as f:
+            return json.load(f)
+    except FileNotFoundError:
+        return JSONResponse(status_code=404, content={"error": "No plan available"})
+    except Exception as exc:
+        log.error("Failed to load plan from file: %s", exc)
+        return JSONResponse(status_code=500, content={"error": "Plan loading failed"})
+@router.get("/budget")
+async def control_budget():
+    """Current energy budget state."""
+    redis = get_redis_client()
+    if redis:
+        data = redis.get_json("control:budget")
+        if data:
+            return data
+    return JSONResponse(
+        status_code=503,
+        content={"error": "No budget data available"},
+    )
+@router.get("/trackers")
+async def control_trackers():
+    """Live tracker angles from ThingsBoard."""
+    hub = get_datahub()
+    try:
+        snapshot = hub.vine_sensors.get_snapshot(light=True)
+        return {"trackers": snapshot.get("trackers", {}), "source": "ThingsBoard"}
+    except Exception as exc:
+        log.error("Tracker fetch failed: %s", exc)
+        return JSONResponse(
+            status_code=502,
+            content={"error": "Tracker fetch failed"},
+        )

backend/api/routes/energy.py ADDED Viewed

	@@ -0,0 +1,45 @@

+"""Energy endpoints — wraps EnergyService."""
+from __future__ import annotations
+import re
+from fastapi import APIRouter, Depends, HTTPException, Query
+from backend.api.deps import get_datahub
+from src.data.data_providers import DataHub
+router = APIRouter()
+_DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$")
+def _validate_date(value: str) -> str:
+    if not _DATE_RE.match(value):
+        raise HTTPException(status_code=400, detail=f"Invalid date format: {value!r}. Expected YYYY-MM-DD")
+    return value
+@router.get("/current")
+async def energy_current(hub: DataHub = Depends(get_datahub)):
+    return hub.energy.get_current()
+@router.get("/daily/{target_date}")
+async def energy_daily(target_date: str, hub: DataHub = Depends(get_datahub)):
+    _validate_date(target_date)
+    return hub.energy.get_daily_production(target_date=target_date)
+@router.get("/history")
+async def energy_history(
+    hours: int = Query(24, ge=1, le=8760, description="Hours of history (1–8760)"),
+    hub: DataHub = Depends(get_datahub),
+):
+    return hub.energy.get_history(hours_back=hours)
+@router.get("/predict/{target_date}")
+async def energy_predict(target_date: str, hub: DataHub = Depends(get_datahub)):
+    _validate_date(target_date)
+    return hub.energy.predict(target_date=target_date)

backend/api/routes/health.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""Health check endpoint."""
+from __future__ import annotations
+import asyncio
+import os
+from fastapi import APIRouter
+from backend.api.deps import get_redis_client
+router = APIRouter()
+async def _check_thingsboard() -> bool:
+    """Check ThingsBoard connectivity without blocking the event loop."""
+    tb_host = os.environ.get("THINGSBOARD_HOST", "")
+    if not tb_host:
+        return False
+    try:
+        import urllib.request
+        url = f"{tb_host.rstrip('/')}/api/noauth/health"
+        # Run sync urllib in thread pool to avoid blocking event loop
+        loop = asyncio.get_event_loop()
+        resp = await loop.run_in_executor(
+            None, lambda: urllib.request.urlopen(url, timeout=3)
+        )
+        return resp.status == 200
+    except Exception:
+        return False
+@router.get("/health")
+async def health():
+    redis = get_redis_client()
+    redis_ok = redis.ping() if redis else False
+    from backend.api.main import get_uptime
+    tb_ok = await _check_thingsboard()
+    return {
+        "status": "ok",
+        "uptime_seconds": round(get_uptime(), 1),
+        "redis_connected": redis_ok,
+        "thingsboard_reachable": tb_ok,
+        "ims_configured": bool(os.environ.get("IMS_API_TOKEN")),
+        "gemini_configured": bool(os.environ.get("GOOGLE_API_KEY")),
+    }

backend/api/routes/login.py ADDED Viewed

	@@ -0,0 +1,58 @@

+"""Login endpoint — issues JWT tokens."""
+from __future__ import annotations
+import logging
+import os
+import secrets
+from pydantic import BaseModel, Field
+from fastapi import APIRouter, HTTPException, Request
+from slowapi import Limiter
+from slowapi.util import get_remote_address
+from backend.api.auth import create_token
+log = logging.getLogger(__name__)
+limiter = Limiter(key_func=get_remote_address)
+router = APIRouter()
+# Simple user store — replace with a real DB later.
+# For now, a single admin user configured via environment.
+_ADMIN_USER = os.environ.get("ADMIN_USERNAME", "admin")
+_ADMIN_PASS = os.environ.get("ADMIN_PASSWORD", "")
+if not _ADMIN_PASS:
+    log.warning("ADMIN_PASSWORD not set — login endpoint will return 503")
+class LoginRequest(BaseModel):
+    username: str = Field(..., min_length=1, max_length=100)
+    password: str = Field(..., min_length=1, max_length=200)
+class LoginResponse(BaseModel):
+    access_token: str
+    token_type: str = "bearer"
+@router.post("/login", response_model=LoginResponse)
+@limiter.limit("5/minute")
+async def login(request: Request, req: LoginRequest):
+    """Authenticate and return a JWT token."""
+    if not _ADMIN_PASS:
+        raise HTTPException(status_code=503, detail="Auth not configured")
+    # Constant-time comparison to prevent timing attacks
+    user_ok = secrets.compare_digest(req.username, _ADMIN_USER)
+    pass_ok = secrets.compare_digest(req.password, _ADMIN_PASS)
+    if not user_ok or not pass_ok:
+        raise HTTPException(status_code=401, detail="Invalid credentials")
+    token = create_token(username=req.username, role="admin")
+    if not token:
+        raise HTTPException(status_code=503, detail="JWT not configured")
+    return LoginResponse(access_token=token)

backend/api/routes/photosynthesis.py ADDED Viewed

	@@ -0,0 +1,30 @@

+"""Photosynthesis endpoints — wraps PhotosynthesisService."""
+from __future__ import annotations
+from enum import Enum
+from fastapi import APIRouter, Depends, Query
+from backend.api.deps import get_datahub
+from src.data.data_providers import DataHub
+router = APIRouter()
+class PSModel(str, Enum):
+    fvcb = "fvcb"
+    ml = "ml"
+@router.get("/current")
+async def ps_current(
+    model: PSModel = Query(PSModel.fvcb, description="Model: fvcb or ml"),
+    hub: DataHub = Depends(get_datahub),
+):
+    return hub.photosynthesis.get_current(model=model.value)
+@router.get("/forecast")
+async def ps_forecast(hub: DataHub = Depends(get_datahub)):
+    return hub.photosynthesis.forecast_day_ahead()

backend/api/routes/sensors.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""Vine sensor endpoints — wraps VineSensorService."""
+from __future__ import annotations
+from enum import Enum
+from typing import Optional
+from fastapi import APIRouter, Depends, Query
+from backend.api.deps import get_datahub
+from src.data.data_providers import DataHub
+router = APIRouter()
+class DeviceType(str, Enum):
+    crop = "crop"
+    air = "air"
+    soil = "soil"
+class AreaType(str, Enum):
+    treatment = "treatment"
+    reference = "reference"
+    ambient = "ambient"
+@router.get("/snapshot")
+async def sensors_snapshot(
+    light: bool = False,
+    hub: DataHub = Depends(get_datahub),
+):
+    return hub.vine_sensors.get_snapshot(light=light)
+@router.get("/history")
+async def sensors_history(
+    type: DeviceType = Query(DeviceType.crop, description="Device type"),
+    area: Optional[AreaType] = Query(None, description="Area filter"),
+    hours: int = Query(24, ge=1, le=8760, description="Hours of history (1–8760)"),
+    hub: DataHub = Depends(get_datahub),
+):
+    return hub.vine_sensors.get_history(
+        device_type=type.value,
+        area=area.value if area else None,
+        hours_back=hours,
+    )

backend/api/routes/weather.py ADDED Viewed

	@@ -0,0 +1,48 @@

+"""Weather endpoints — wraps WeatherService."""
+from __future__ import annotations
+import re
+from datetime import date, timedelta
+from fastapi import APIRouter, Depends, HTTPException, Query
+from fastapi.responses import JSONResponse
+from backend.api.deps import get_datahub
+from src.data.data_providers import DataHub
+router = APIRouter()
+_DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$")
+def _validate_date(value: str) -> str:
+    if not _DATE_RE.match(value):
+        raise HTTPException(status_code=400, detail=f"Invalid date format: {value!r}. Expected YYYY-MM-DD")
+    return value
+@router.get("/current")
+async def weather_current(hub: DataHub = Depends(get_datahub)):
+    return hub.weather.get_current()
+@router.get("/history")
+async def weather_history(
+    start_date: str = Query(None, description="Start date (YYYY-MM-DD). Default: 7 days ago"),
+    end_date: str = Query(None, description="End date (YYYY-MM-DD). Default: today"),
+    hub: DataHub = Depends(get_datahub),
+):
+    end = end_date or str(date.today())
+    start = start_date or str(date.today() - timedelta(days=7))
+    _validate_date(start)
+    _validate_date(end)
+    return hub.weather.get_history(start_date=start, end_date=end)
+@router.get("/forecast")
+async def weather_forecast(hub: DataHub = Depends(get_datahub)):
+    try:
+        return hub.weather.get_forecast()
+    except AttributeError:
+        return JSONResponse(status_code=501, content={"error": "Forecast not implemented yet"})

backend/requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+# Backend-specific dependencies (on top of root requirements.txt)
+fastapi>=0.115.0
+uvicorn[standard]>=0.34.0
+pydantic>=2.0
+slowapi>=0.2.0
+PyJWT>=2.8.0
+sentry-sdk[fastapi]>=2.0  # optional: set SENTRY_DSN to enable

backend/workers/__init__.py ADDED Viewed

File without changes

backend/workers/control_tick.py ADDED Viewed

	@@ -0,0 +1,124 @@

+"""
+ControlLoop single-tick worker.
+Entry point for GitHub Actions cron (every 15 min).
+Usage:
+    python -m backend.workers.control_tick
+    python -m backend.workers.control_tick --dry-run
+"""
+from __future__ import annotations
+import argparse
+import json
+import logging
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+# Ensure project root is on sys.path
+PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+# Load .env if present (local dev)
+try:
+    from dotenv import load_dotenv
+    load_dotenv(PROJECT_ROOT / ".env")
+except ImportError:
+    pass
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+)
+log = logging.getLogger("control_tick")
+def main():
+    parser = argparse.ArgumentParser(description="Run one ControlLoop tick")
+    parser.add_argument("--dry-run", action="store_true", help="Compute decisions without dispatching")
+    args = parser.parse_args()
+    from src.control_loop import ControlLoop
+    from src.data.redis_cache import get_redis
+    log.info("Starting control tick (dry_run=%s)", args.dry_run)
+    loop = ControlLoop(dry_run=args.dry_run)
+    result = loop.tick()
+    # Serialise result
+    result_dict = result.__dict__ if hasattr(result, "__dict__") else {"raw": str(result)}
+    result_dict["_timestamp"] = datetime.now(timezone.utc).isoformat()
+    result_dict["_dry_run"] = args.dry_run
+    # Store in Redis for the API to read
+    redis = get_redis()
+    if redis:
+        # Convert to JSON-safe dict
+        safe = json.loads(json.dumps(result_dict, default=str))
+        redis.set_json("control:last_tick", safe, ttl=1200)  # 20 min TTL
+        log.info("Tick result saved to Redis")
+    else:
+        log.warning("Redis not available — tick result not persisted")
+    log.info("Tick complete: %s", json.dumps(result_dict, default=str, indent=2)[:500])
+    # Budget alert: warn if >80% spent before 14:00 IST
+    _check_budget_alert(result_dict)
+def _check_budget_alert(tick: dict) -> None:
+    """Log a warning (visible in GitHub Actions) if budget is nearly exhausted."""
+    import os
+    try:
+        from datetime import datetime, timezone, timedelta
+        now_utc = datetime.now(timezone.utc)
+        now_israel = now_utc + timedelta(hours=2)  # approximate IST
+        remaining = tick.get("budget_remaining_kwh", None)
+        if remaining is None or remaining == 0:
+            return  # no budget data or dormant season
+        # Only alert before 14:00 IST (still daylight hours left)
+        if now_israel.hour >= 14:
+            return
+        # Get today's total budget from Redis
+        from src.data.redis_cache import get_redis
+        redis = get_redis()
+        if not redis:
+            return
+        budget_data = redis.get_json("control:budget")
+        if not budget_data or "plan" not in budget_data:
+            return
+        plan = budget_data["plan"]
+        total = sum(plan.get("slot_budgets", {}).values()) + plan.get("daily_margin_remaining_kWh", 0)
+        spent = plan.get("cumulative_spent", 0)
+        if total > 0 and spent / (total + spent) > 0.8:
+            log.warning(
+                "BUDGET ALERT: %.1f%% of daily budget spent before %02d:00 IST "
+                "(spent=%.3f kWh, remaining=%.3f kWh)",
+                spent / (total + spent) * 100,
+                now_israel.hour,
+                spent,
+                remaining,
+            )
+            # Future: send webhook/email here
+            webhook_url = os.environ.get("BUDGET_ALERT_WEBHOOK")
+            if webhook_url:
+                import requests
+                requests.post(webhook_url, json={
+                    "text": f"SolarWine Budget Alert: {spent/(total+spent)*100:.0f}% spent before {now_israel.hour}:00 IST",
+                    "spent_kwh": round(spent, 3),
+                    "remaining_kwh": round(remaining, 3),
+                }, timeout=5)
+    except Exception as exc:
+        log.debug("Budget alert check failed: %s", exc)
+if __name__ == "__main__":
+    main()

backend/workers/daily_planner.py ADDED Viewed

	@@ -0,0 +1,72 @@

+"""
+Day-ahead planner worker.
+Entry point for GitHub Actions cron (daily 05:00 IST = 02:00 UTC).
+Usage:
+    python -m backend.workers.daily_planner
+"""
+from __future__ import annotations
+import json
+import logging
+import sys
+from datetime import date, datetime, timezone
+from pathlib import Path
+# Ensure project root is on sys.path
+PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+# Load .env if present (local dev)
+try:
+    from dotenv import load_dotenv
+    load_dotenv(PROJECT_ROOT / ".env")
+except ImportError:
+    pass
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+)
+log = logging.getLogger("daily_planner")
+def main():
+    from src.day_ahead_planner import DayAheadPlanner
+    from src.data.redis_cache import get_redis
+    from config.settings import DAILY_PLAN_PATH
+    target = date.today()
+    log.info("Computing day-ahead plan for %s", target)
+    planner = DayAheadPlanner()
+    plan = planner.plan(target_date=target)
+    plan_dict = plan.to_dict() if hasattr(plan, "to_dict") else {"raw": str(plan)}
+    plan_dict["_computed_at"] = datetime.now(timezone.utc).isoformat()
+    # Save to file (backup)
+    try:
+        Path(DAILY_PLAN_PATH).parent.mkdir(parents=True, exist_ok=True)
+        with open(DAILY_PLAN_PATH, "w") as f:
+            json.dump(plan_dict, f, default=str, indent=2)
+        log.info("Plan saved to %s", DAILY_PLAN_PATH)
+    except Exception as exc:
+        log.error("Failed to save plan file: %s", exc)
+    # Save to Redis
+    redis = get_redis()
+    if redis:
+        safe = json.loads(json.dumps(plan_dict, default=str))
+        redis.set_json("control:plan", safe, ttl=86400)  # 24h TTL
+        log.info("Plan saved to Redis")
+    else:
+        log.warning("Redis not available — plan not shared")
+    log.info("Plan complete: %d slots", len(plan_dict.get("slots", [])))
+if __name__ == "__main__":
+    main()

config/settings.py ADDED Viewed

	@@ -0,0 +1,204 @@

+# Configuration: paths, IMS station/channel config, model params
+from pathlib import Path
+PROJECT_ROOT = Path(__file__).resolve().parent.parent
+DATA_DIR = PROJECT_ROOT / "Data"
+IMS_CACHE_DIR = DATA_DIR / "ims"
+PROCESSED_DIR = DATA_DIR / "processed"
+OUTPUTS_DIR = PROJECT_ROOT / "outputs"
+# On-site sensor data (Stage 1)
+SEYMOUR_DIR = DATA_DIR / "Seymour"
+SENSORS_WIDE_PATH = SEYMOUR_DIR / "sensors_wide.csv"
+SENSORS_WIDE_SAMPLE_PATH = SEYMOUR_DIR / "sensors_wide_sample.csv"
+SENSORS_WIDE_METADATA_PATH = SEYMOUR_DIR / "sensors_wide_metadata.csv"
+# IMS API (station 43 - Sde Boker)
+IMS_STATION_ID = 43
+IMS_BASE_URL = "https://api.ims.gov.il/v1/envista/stations"
+# Station 43 channel IDs -> output column names (from --list-channels)
+IMS_CHANNEL_MAP = {
+    6: "air_temperature_c",   # TD
+    8: "tdmax_c",             # TDmax
+    9: "tdmin_c",             # TDmin
+    10: "ghi_w_m2",           # Grad (GHI)
+    7: "rh_percent",          # RH
+    20: "rain_mm",            # Rain
+    3: "wind_speed_ms",       # WS
+    # Station 43 has no BP; WD optional: 4
+}
+# Preprocessor
+TRAIN_RATIO = 0.75
+# Growing season: vine is dormant Oct–April (no photosynthesis). Keep May–September only.
+GROWING_SEASON_MONTHS = (5, 6, 7, 8, 9)  # May through September
+# Site location (Sde Boker, Israel)
+SITE_LATITUDE = 30.87
+SITE_LONGITUDE = 34.79
+SITE_ALTITUDE = 475.0  # meters
+# Agrivoltaic panel geometry
+PANEL_WIDTH = 1.13      # m (E-W dimension)
+PANEL_HEIGHT = 2.05     # m above ground
+ROW_SPACING = 3.0       # m between vine row centers
+CANOPY_HEIGHT = 1.2     # m (VSP trellis)
+CANOPY_WIDTH = 0.6      # m
+ROW_AZIMUTH = 315.0     # degrees CW from north (NW–SE row orientation)
+# === TRACKER CONSTRAINTS ===
+TRACKER_MAX_ANGLE = 60.0  # degrees — mechanical limit of single-axis tracker
+TRACKER_GCR = 0.377       # ground coverage ratio (panel_width / row_spacing)
+# === TRACKER ID MAPPING ===
+# Canonical mapping between integer IDs (DB/fleet) and string names (ThingsBoard)
+TRACKER_ID_MAP = {
+    501: "Tracker501",
+    502: "Tracker502",
+    503: "Tracker503",
+    509: "Tracker509",
+}
+# ---------------------------------------------------------------------------
+# SolarWine 2.0 — Control System Parameters
+# ---------------------------------------------------------------------------
+# === PV SYSTEM ===
+SYSTEM_CAPACITY_KW = 48.0            # DC nameplate capacity (from ThingsBoard Digital Twin)
+STC_IRRADIANCE_W_M2 = 1000.0         # Standard Test Conditions irradiance for normalisation
+# === ENERGY BUDGET ===
+# Hard ceiling: fraction of annual PV generation the vines can "spend" on shading.
+MAX_ENERGY_REDUCTION_PCT = 5.0    # % of annual generation (user's hard ceiling)
+ANNUAL_RESERVE_PCT = 15.0         # emergency reserve — not allocated to any month
+WEEKLY_RESERVE_PCT = 20.0         # within-week flexibility buffer
+DAILY_MARGIN_PCT = 20.0           # real-time response pool within the day
+# Monthly budget weights — must sum to 1.0 across growing season.
+# May budget is very low (extreme heat emergency only); the 3D model will
+# naturally produce no effective dose in most May slots because fruit-set
+# geometry and low stress do not warrant intervention.
+MONTHLY_BUDGET_WEIGHTS = {
+    5: 0.02,   # May    — near-zero; extreme emergency only (fruit-set geometry protects naturally)
+    6: 0.05,   # June   — rare; only extreme heat spikes
+    7: 0.45,   # July   — peak heat; primary shading window
+    8: 0.40,   # August — sustained heat; fruit ripening / sunburn risk
+    9: 0.08,   # Sept   — occasional late heat waves
+}
+# === NO-SHADE WINDOWS (hard constraints — shading PROHIBITED) ===
+# These are enforced by the InterventionGate AND the chatbot guardrails.
+NO_SHADE_BEFORE_HOUR = 10         # local solar time — morning light is sacred for carbon fixation
+NO_SHADE_MONTHS = [5]             # May — full spring exposure for flowering / fruit set
+NO_SHADE_GHI_BELOW = 300          # W/m² — overcast, already diffuse; no stress to relieve
+NO_SHADE_TLEAF_BELOW = 28.0       # °C — below RuBP→Rubisco transition zone; vine wants light
+# === SHADE-ELIGIBLE CONDITIONS (ALL must be true to allow intervention) ===
+SHADE_ELIGIBLE_TLEAF_ABOVE = 30.0 # °C — Semillon Rubisco transition (heat bottleneck)
+SHADE_ELIGIBLE_CWSI_ABOVE = 0.4   # moderate water stress confirmed by sensors
+SHADE_ELIGIBLE_GHI_ABOVE = 400    # W/m² — significant direct radiation load (night/deep-overcast guard)
+SHADE_ELIGIBLE_HOURS = (10, 16)   # local solar time window (10:00–16:00)
+# Minimum GHI below which the sun is too weak to cause stress (night, dense cloud).
+# No offset can help; skip shadow computation entirely.
+MIN_MEANINGFUL_GHI = 100          # W/m²
+# === FRUITING ZONE ===
+FRUITING_ZONE_INDEX = 1            # mid-canopy zone in the 3-zone ShadowModel (0=basal, 1=fruiting, 2=apical)
+FRUITING_ZONE_HEIGHT_M = 0.6      # center height of grape cluster zone (m)
+BERRY_SUNBURN_TEMP_C = 35.0       # berry surface temperature damage threshold (°C)
+FRUITING_ZONE_TARGET_PAR = 400    # µmol/m²/s — quality threshold; above this → sunburn risk
+# === TRADEOFF ENGINE ===
+# Candidate shading offsets tested in order (minimum-dose search: stop at first effective offset).
+CANDIDATE_OFFSETS = [0, 3, 5, 8, 10, 15, 20]   # degrees off astronomical position
+SIMULATION_TIMEOUT_SEC = 5                       # max seconds for one offset simulation
+# === SAFETY RAILS ===
+DIVERGENCE_THRESHOLD = 0.12       # 12% — if |FvCB_A - ML_A| / max > threshold → fallback to FvCB
+# === SEMILLON FvCB — Rubisco transition ===
+SEMILLON_TRANSITION_TEMP_C = 30.0  # °C — below: RuBP-limited (light bottleneck); above: Rubisco-limited (heat bottleneck)
+# === WEATHER PROTECTION / OPERATIONAL MODES ===
+WIND_STOW_SPEED_MS = 15.0         # m/s — panels stow flat (0°) above this wind speed
+HEAT_SHIELD_TEMP_C = 38.0         # °C — emergency heat shield: maximum shade regardless of budget
+HEAT_SHIELD_CWSI = 0.6            # CWSI threshold that activates heat shield
+# === MECHANICAL HARVESTING ===
+HARVEST_PARK_CLEARANCE_CM = 250   # cm — minimum clearance for harvesting machine
+HARVEST_LATERAL_WIDTH_CM = 18     # cm — lateral harvester arm width
+HARVESTER_RPM_RANGE = (430, 460)  # harvester operating RPM range
+# === HYSTERESIS (command arbiter) ===
+HYSTERESIS_WINDOW_MIN = 15        # minutes — minimum time between consecutive tilt changes
+ANGLE_TOLERANCE_DEG = 2.0         # degrees — changes smaller than this are suppressed
+# === PLAN DIVERGENCE RE-PLANNING ===
+PLAN_DIVERGENCE_THRESHOLD_KWH = 0.5   # cumulative |planned − actual| energy that triggers re-plan
+PLAN_DIVERGENCE_THRESHOLD_SLOTS = 4   # consecutive divergent slots that triggers re-plan
+PLAN_REPLAN_COOLDOWN_SLOTS = 8        # minimum slots between re-plans (~2 hours)
+# === ROI / LAND EQUIVALENT RATIO ===
+TARGET_LER = 1.5                  # Land Equivalent Ratio target (energy + crop combined)
+# ---------------------------------------------------------------------------
+# Agronomic Value Weighting
+# ---------------------------------------------------------------------------
+# Spatial zone weights for crop value calculation.
+# The 3-zone ShadowModel: zone 0 = basal/trunk (~0.2m), zone 1 = fruiting (~0.6m), zone 2 = apical (~1.0m).
+# During veraison, zone 2 (upper canopy) has the highest marginal value for sugar loading.
+ZONE_CROP_WEIGHTS = {
+    "pre_veraison":  [0.25, 0.35, 0.40],  # [zone0, zone1, zone2]
+    "veraison":      [0.10, 0.30, 0.60],  # apical leaves dominate sugar loading
+    "post_harvest":  [0.15, 0.15, 0.70],  # reserve building; top canopy matters most
+}
+# Temporal (phenological stage) crop value multipliers.
+# Applied on top of zone weights; reflects how much each unit of photosynthesis
+# contributes to final economic yield at different growth stages.
+STAGE_CROP_MULTIPLIER = {
+    "pre_flowering": 1.2,   # setting yield capacity (bunch number, berry set)
+    "fruit_set":     1.0,   # baseline — rapid cell division
+    "veraison":      1.5,   # sugar loading; highest crop value per unit carbon
+    "post_harvest":  0.5,   # reserve building only; energy production prioritized
+}
+# Growing Degree Day thresholds for Semillon at Sde Boker (base temperature 10°C).
+PHENOLOGY_GDD_THRESHOLDS = {
+    "budburst":    0,      # GDD accumulation starts ~March
+    "flowering":   350,    # ~May
+    "fruit_set":   500,    # ~early June
+    "veraison":    1200,   # ~mid July
+    "harvest":     1800,   # ~late August / early September
+}
+# ---------------------------------------------------------------------------
+# Day-Ahead DP Planner
+# ---------------------------------------------------------------------------
+DP_SLOTS_PER_DAY = 96              # 15-min intervals × 24 h
+DP_SLOT_DURATION_MIN = 15          # minutes per slot
+DP_MOVEMENT_COST = 0.5             # penalty per degree of tilt change (kWh-equivalent)
+                                   # biases optimizer toward smooth trajectories
+# Flat energy price (ILS/kWh) used when real-time tariff is unavailable.
+# Replace with time-of-use tariff schedule for production.
+DP_FLAT_ENERGY_PRICE_ILS_KWH = 0.35
+# Base crop value (ILS / µmol CO₂ m⁻² s⁻¹ per 15-min slot) used in the
+# DP utility function U_t(θ) = Price_energy · E_t(θ) + Price_crop · A_t(θ).
+# Calibrate from vineyard revenue per kg grape × expected yield per A unit.
+DP_BASE_CROP_VALUE = 0.10
+# ---------------------------------------------------------------------------
+# Simulation Log Storage
+# ---------------------------------------------------------------------------
+SIMULATION_LOG_DIR = DATA_DIR / "simulation_logs"
+SIMULATION_LOG_PATH = SIMULATION_LOG_DIR / "control_loop.parquet"
+DAILY_PLAN_PATH = DATA_DIR / "daily_plan.json"

requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+# Photosynthesis Prediction Model - dependencies
+# Install: pip install -r requirements.txt
+pandas==2.3.3
+numpy==2.4.2
+scikit-learn==1.8.0
+matplotlib==3.10.8
+seaborn==0.13.2
+requests==2.32.5
+python-dotenv==1.2.1
+streamlit==1.54.0
+plotly==6.5.2
+xgboost>=2.0
+pvlib>=0.10.0
+astral>=3.2
+chronos-forecasting>=2.0
+torch>=2.0
+google-genai>=1.0

src/__init__.py ADDED Viewed

	@@ -0,0 +1,52 @@

+# SolarWine src package — re-exports for backward compatibility
+# Modules live in subpackages: data, models, forecasting, shading, advisor, chatbot, genai
+import sys
+# Map old flat names to new subpackage locations
+_REDIRECTS = {
+    # data
+    "ims_client": "src.data.ims_client",
+    "sensor_data_loader": "src.data.sensor_data_loader",
+    "data_schema": "src.data.data_schema",
+    "thingsboard_client": "src.data.thingsboard_client",
+    "data_providers": "src.data.data_providers",
+    # models
+    "farquhar_model": "src.models.farquhar_model",
+    "canopy_photosynthesis": "src.models.canopy_photosynthesis",
+    "phenology": "src.models.phenology",
+    # forecasting
+    "predictor": "src.forecasting.predictor",
+    "ts_predictor": "src.forecasting.ts_predictor",
+    "chronos_forecaster": "src.forecasting.chronos_forecaster",
+    "preprocessor": "src.forecasting.preprocessor",
+    "time_features": "src.forecasting.time_features",
+    # shading
+    "solar_geometry": "src.shading.solar_geometry",
+    "tracker_optimizer": "src.shading.tracker_optimizer",
+    "vine_3d_scene": "src.shading.vine_3d_scene",
+    "tradeoff_engine": "src.shading.tradeoff_engine",
+    # advisor
+    "day_ahead_advisor": "src.advisor.day_ahead_advisor",
+    "safety_rails": "src.advisor.safety_rails",
+    # chatbot
+    "vineyard_chatbot": "src.chatbot.vineyard_chatbot",
+    "routing_agent": "src.chatbot.routing_agent",
+    "llm_data_engineer": "src.chatbot.llm_data_engineer",
+    # genai
+    "genai_utils": "src.genai.utils",
+}
+def __getattr__(name: str):
+    if name in _REDIRECTS:
+        import importlib
+        mod = importlib.import_module(_REDIRECTS[name])
+        sys.modules[f"{__name__}.{name}"] = mod
+        return mod
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+def __dir__():
+    return sorted(_REDIRECTS.keys())

src/advisor/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Advisor: day-ahead, safety rails."""

src/advisor/day_ahead_advisor.py ADDED Viewed

	@@ -0,0 +1,632 @@

+"""
+DayAheadAdvisor: Gemini-powered qualitative day-ahead stress advisory for
+Semillon grapevine in the SolarWine agrivoltaic system.
+Analyzes IMS weather forecast through vine biology rules to produce:
+  - Hourly stress profile (RuBP vs Rubisco limitation)
+  - Energy budget recommendations (time-block distribution)
+  - Model routing preferences (FvCB vs ML by time of day)
+  - Chronos forecast sanity check (optional)
+Sits between raw forecast data and the future Phase 3.5 day-ahead planner.
+"""
+from __future__ import annotations
+import json
+from dataclasses import dataclass, field, asdict
+from typing import Optional
+import numpy as np
+import pandas as pd
+from src.genai_utils import extract_json_object, get_genai_client, get_google_api_key
+# ---------------------------------------------------------------------------
+# Data structures
+# ---------------------------------------------------------------------------
+@dataclass
+class HourlyStressEntry:
+    hour: int
+    limiting_state: str          # "rubp" | "rubisco" | "transition"
+    stress_severity: str         # "none" | "low" | "moderate" | "high" | "extreme"
+    shading_recommended: bool
+@dataclass
+class StressProfile:
+    rubisco_limited_hours: int
+    peak_stress_hour: int
+    peak_stress_severity: str
+    hourly_detail: list[HourlyStressEntry]
+    summary: str
+@dataclass
+class BudgetRecommendation:
+    daily_budget_fraction: float           # 0–1 of remaining weekly budget
+    time_block_pct: dict[str, float]       # e.g. {"10-11": 5, "11-14": 60, ...}
+    rationale: str
+@dataclass
+class ModelRoutingPreference:
+    morning: str       # "fvcb" or "ml"
+    midday: str
+    afternoon: str
+    rationale: str
+@dataclass
+class ChronosSanityCheck:
+    plausible: bool
+    flags: list[str]
+    overall_assessment: str
+@dataclass
+class AdvisorReport:
+    date: str
+    phenological_stage: str
+    stress_profile: StressProfile
+    budget_recommendation: BudgetRecommendation
+    model_routing: ModelRoutingPreference
+    chronos_sanity: Optional[ChronosSanityCheck]
+    confidence_notes: str
+    raw_llm_response: str = ""
+# ---------------------------------------------------------------------------
+# System prompt — encodes vine biology rules
+# ---------------------------------------------------------------------------
+SYSTEM_PROMPT = """\
+You are an agrivoltaic advisor for a Semillon grapevine vineyard in the Negev \
+desert (Sde Boker, Israel). You analyze day-ahead weather forecasts and produce \
+structured stress assessments for the tracker control system.
+CONTROL OBJECTIVE:
+- Primary goal: maximise annual PV energy production.
+- Secondary goal: protect vines from heat, water stress, and sunburn using a \
+limited shading budget (see energy budget rule).
+- When in doubt and there is no clear sign of dangerous stress, prefer keeping \
+panels in their energy-maximising position.
+BIOLOGICAL GUIDELINES (strong constraints; balance them with the energy objective):
+1. TEMPERATURE TRANSITION: Below 30°C, Semillon photosynthesis is RuBP-limited \
+(light is the bottleneck — shading HURTS). Above 30°C, it becomes Rubisco-limited \
+(heat is the bottleneck — shading MAY help). The transition is gradual (28–32°C).
+2. NO SHADE BEFORE 10:00: Morning light is critical for carbon fixation. Avoid \
+recommending shading before 10:00 unless there is an extreme heat or safety event.
+3. MAY SENSITIVITY: May is the flowering/fruit-set period. Yield protection has \
+priority: avoid shading in May under normal conditions because even small losses \
+can reduce cluster number and berry set. Only recommend shade in May as a last \
+resort in extreme heat to prevent serious damage (e.g. severe sunburn or lethal stress).
+4. CWSI THRESHOLD: Crop Water Stress Index > 0.4 indicates real water stress. \
+Below 0.4, the vine is coping adequately.
+5. BERRY SUNBURN: Direct exposure at air temperature > 35°C risks berry sunburn, \
+especially on the southwest-facing side of clusters in the afternoon.
+6. ENERGY BUDGET: Annual energy sacrifice ceiling is 5%. Suggested monthly caps: \
+May=0%, Jun=15%, Jul=30%, Aug=30%, Sep=20%, Oct=5%. Treat these as soft caps: \
+stay below them unless there is an exceptional agronomic reason.
+7. MODEL ROUTING: Use FvCB (Farquhar model) for standard conditions (T < 30°C, \
+VPD < 2.5 kPa, adequate water). Use ML ensemble for stress conditions (T > 30°C, \
+high VPD, water stress, or any non-linear regime).
+8. PHENOLOGICAL MULTIPLIER: Stress during veraison (berry ripening) is 1.5× more \
+damaging than during vegetative growth. Protect veraison at higher cost.
+SEVERITY SCALE (anchored to air temperature):
+- none: T < 28°C
+- low: 28-30°C
+- moderate: 30-33°C
+- high: 33-37°C
+- extreme: T > 37°C
+OUTPUT FORMAT — Return ONLY a JSON object (no markdown fences, no explanation) \
+with this exact schema:
+{
+  "stress_profile": {
+    "rubisco_limited_hours": <int>,
+    "peak_stress_hour": <int 0-23>,
+    "peak_stress_severity": "<none|low|moderate|high|extreme>",
+    "hourly_detail": [
+      {"hour": <int>, "limiting_state": "<rubp|rubisco|transition>", \
+"stress_severity": "<severity>", "shading_recommended": <bool>}
+    ],
+    "summary": "<2-3 sentence natural language summary>"
+  },
+  "budget_recommendation": {
+    "daily_budget_fraction": <float 0-1>,
+    "time_block_pct": {"10-11": <float>, "11-14": <float>, "14-16": <float>, \
+"16+": <float>},
+    "rationale": "<1-2 sentences>"
+  },
+  "model_routing": {
+    "morning": "<fvcb|ml>",
+    "midday": "<fvcb|ml>",
+    "afternoon": "<fvcb|ml>",
+    "rationale": "<1 sentence>"
+  },
+  "chronos_sanity": {
+    "plausible": <bool>,
+    "flags": ["<flag1>", ...],
+    "overall_assessment": "<1 sentence>"
+  },
+  "confidence_notes": "<any caveats about forecast quality or unusual conditions>"
+}
+Include hourly_detail entries only for hours 6-20 (daytime). \
+If no Chronos forecast is provided, set chronos_sanity to null.
+"""
+# ---------------------------------------------------------------------------
+# Helper: robust JSON extraction from LLM response
+# ---------------------------------------------------------------------------
+def _extract_json(text: str) -> dict:
+    """Thin wrapper around the shared genai_utils implementation."""
+    return extract_json_object(text)
+# ---------------------------------------------------------------------------
+# Main class
+# ---------------------------------------------------------------------------
+class DayAheadAdvisor:
+    """
+    Gemini-powered day-ahead stress advisory for agrivoltaic tracker control.
+    Usage
+    -----
+    advisor = DayAheadAdvisor()
+    report = advisor.advise(
+        date="2025-07-15",
+        weather_forecast=df_ims,
+        phenological_stage="veraison",
+        remaining_weekly_budget_kWh=12.5,
+        remaining_monthly_budget_kWh=45.0,
+    )
+    """
+    def __init__(
+        self,
+        model_name: str = "gemini-2.5-flash",
+        api_key: Optional[str] = None,
+        verbose: bool = True,
+    ):
+        self.model_name = model_name
+        self._api_key = api_key
+        self._client = None
+        self.verbose = verbose
+        # Cache advisory per date+stage (same day = same forecast)
+        self._report_cache: dict[str, AdvisorReport] = {}
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+    @property
+    def api_key(self) -> str:
+        return get_google_api_key(self._api_key)
+    @property
+    def client(self):
+        if self._client is None:
+            self._client = get_genai_client(self._api_key)
+        return self._client
+    def _call_gemini(self, user_prompt: str) -> str:
+        """Send a prompt to Gemini and return the raw text response."""
+        response = self.client.models.generate_content(
+            model=self.model_name,
+            contents=user_prompt,
+            config={"system_instruction": SYSTEM_PROMPT},
+        )
+        return response.text
+    def _log(self, msg: str) -> None:
+        if self.verbose:
+            print(f"[DayAheadAdvisor] {msg}")
+    # ------------------------------------------------------------------
+    # Forecast formatting
+    # ------------------------------------------------------------------
+    def _format_weather_forecast(self, weather_df: pd.DataFrame) -> str:
+        """Aggregate 15-min IMS data to hourly and format as text for Gemini."""
+        df = weather_df.copy()
+        # Ensure datetime index
+        if not isinstance(df.index, pd.DatetimeIndex):
+            for col in ["timestamp_utc", "time", "datetime", "timestamp"]:
+                if col in df.columns:
+                    df.index = pd.to_datetime(df[col], utc=True)
+                    break
+        # Map common column names
+        col_map = {}
+        for c in df.columns:
+            cl = c.lower()
+            if "temp" in cl and "dew" not in cl:
+                col_map["temperature_c"] = c
+            elif "ghi" in cl or "radiation" in cl or "irradiance" in cl:
+                col_map["ghi_w_m2"] = c
+            elif "rh" in cl or "humid" in cl:
+                col_map["rh_percent"] = c
+            elif "wind" in cl and "speed" in cl:
+                col_map["wind_speed_ms"] = c
+            elif "vpd" in cl:
+                col_map["vpd_kpa"] = c
+        # Resample to hourly
+        hourly = df.resample("1h").mean(numeric_only=True)
+        lines = ["HOURLY WEATHER FORECAST:"]
+        lines.append(f"{'Hour':>4}  {'T(°C)':>7}  {'GHI':>7}  {'RH(%)':>7}  {'Wind':>7}")
+        lines.append("-" * 45)
+        temp_col = col_map.get("temperature_c")
+        ghi_col = col_map.get("ghi_w_m2")
+        rh_col = col_map.get("rh_percent")
+        wind_col = col_map.get("wind_speed_ms")
+        for idx, row in hourly.iterrows():
+            hour = idx.hour if hasattr(idx, "hour") else "?"
+            t = f"{row[temp_col]:.1f}" if temp_col and temp_col in row.index else "N/A"
+            g = f"{row[ghi_col]:.0f}" if ghi_col and ghi_col in row.index else "N/A"
+            r = f"{row[rh_col]:.0f}" if rh_col and rh_col in row.index else "N/A"
+            w = f"{row[wind_col]:.1f}" if wind_col and wind_col in row.index else "N/A"
+            lines.append(f"{hour:>4}  {t:>7}  {g:>7}  {r:>7}  {w:>7}")
+        # Summary stats
+        if temp_col and temp_col in hourly.columns:
+            temps = hourly[temp_col].dropna()
+            if not temps.empty:
+                lines.append(f"\nSummary: Tmax={temps.max():.1f}°C, "
+                             f"Tmin={temps.min():.1f}°C, "
+                             f"Hours above 30°C: {int((temps > 30).sum())}, "
+                             f"Hours above 35°C: {int((temps > 35).sum())}")
+        return "\n".join(lines)
+    def _format_chronos_forecast(self, chronos_df: pd.DataFrame) -> str:
+        """Format Chronos A forecast as text for Gemini."""
+        df = chronos_df.copy()
+        if not isinstance(df.index, pd.DatetimeIndex):
+            for col in ["timestamp_utc", "time", "datetime", "timestamp"]:
+                if col in df.columns:
+                    df.index = pd.to_datetime(df[col], utc=True)
+                    break
+        # Resample to hourly
+        hourly = df.resample("1h").agg({
+            c: "median" for c in df.select_dtypes(include=[np.number]).columns
+        })
+        # Look for A / prediction columns
+        a_col = None
+        for c in df.columns:
+            cl = c.lower()
+            if cl in ("a", "a_n", "predicted_a", "forecast", "median"):
+                a_col = c
+                break
+        if a_col is None and len(df.select_dtypes(include=[np.number]).columns) > 0:
+            a_col = df.select_dtypes(include=[np.number]).columns[0]
+        if a_col is None:
+            return "CHRONOS FORECAST: No numeric prediction column found."
+        lines = ["CHRONOS A FORECAST (hourly median):"]
+        for idx, row in hourly.iterrows():
+            hour = idx.hour if hasattr(idx, "hour") else "?"
+            val = row[a_col] if a_col in row.index else float("nan")
+            lines.append(f"  Hour {hour:2d}: A = {val:.2f} µmol m⁻² s⁻¹")
+        a_vals = hourly[a_col].dropna()
+        if not a_vals.empty:
+            lines.append(f"\nPeak A: {a_vals.max():.2f} at hour "
+                         f"{hourly[a_col].idxmax().hour if hasattr(hourly[a_col].idxmax(), 'hour') else '?'}")
+        return "\n".join(lines)
+    # ------------------------------------------------------------------
+    # Default (fallback) report
+    # ------------------------------------------------------------------
+    def _default_report(self, date: str, stage: str) -> AdvisorReport:
+        """
+        Conservative fallback report when Gemini is unavailable.
+        Assumes moderate midday stress, standard budget distribution,
+        FvCB morning + ML midday/afternoon.
+        """
+        self._log("Using conservative fallback report (API unavailable).")
+        hourly = []
+        for h in range(6, 21):
+            if h < 10:
+                entry = HourlyStressEntry(h, "rubp", "none", False)
+            elif h < 12:
+                entry = HourlyStressEntry(h, "transition", "low", False)
+            elif h < 16:
+                entry = HourlyStressEntry(h, "rubisco", "moderate", True)
+            else:
+                entry = HourlyStressEntry(h, "transition", "low", False)
+            hourly.append(entry)
+        return AdvisorReport(
+            date=date,
+            phenological_stage=stage,
+            stress_profile=StressProfile(
+                rubisco_limited_hours=4,
+                peak_stress_hour=14,
+                peak_stress_severity="moderate",
+                hourly_detail=hourly,
+                summary=(
+                    "Fallback estimate: moderate midday stress assumed (12:00-16:00). "
+                    "Conservative shading recommended during peak hours. "
+                    "Actual conditions may differ — advisory generated without API access."
+                ),
+            ),
+            budget_recommendation=BudgetRecommendation(
+                daily_budget_fraction=0.15,
+                time_block_pct={"10-11": 5, "11-14": 60, "14-16": 30, "16+": 5},
+                rationale="Standard budget distribution (fallback). "
+                          "Concentrates 60% of daily budget in the 11-14 peak stress window.",
+            ),
+            model_routing=ModelRoutingPreference(
+                morning="fvcb",
+                midday="ml",
+                afternoon="ml",
+                rationale="FvCB for cool morning (T < 30°C), ML for midday/afternoon stress (fallback).",
+            ),
+            chronos_sanity=None,
+            confidence_notes="Fallback report — Gemini API was unavailable. "
+                             "Using biologically conservative defaults.",
+        )
+    # ------------------------------------------------------------------
+    # Parse Gemini JSON response → AdvisorReport
+    # ------------------------------------------------------------------
+    def _parse_report(
+        self, date: str, stage: str, parsed: dict, raw_response: str
+    ) -> AdvisorReport:
+        """Convert parsed JSON dict to AdvisorReport with safe defaults."""
+        # --- Stress profile ---
+        sp = parsed.get("stress_profile", {})
+        hourly_raw = sp.get("hourly_detail", [])
+        hourly_entries = []
+        for h in hourly_raw:
+            hourly_entries.append(HourlyStressEntry(
+                hour=h.get("hour", 0),
+                limiting_state=h.get("limiting_state", "rubp"),
+                stress_severity=h.get("stress_severity", "none"),
+                shading_recommended=h.get("shading_recommended", False),
+            ))
+        stress_profile = StressProfile(
+            rubisco_limited_hours=sp.get("rubisco_limited_hours", 0),
+            peak_stress_hour=sp.get("peak_stress_hour", 12),
+            peak_stress_severity=sp.get("peak_stress_severity", "none"),
+            hourly_detail=hourly_entries,
+            summary=sp.get("summary", "No summary provided."),
+        )
+        # --- Budget recommendation ---
+        br = parsed.get("budget_recommendation", {})
+        budget_rec = BudgetRecommendation(
+            daily_budget_fraction=br.get("daily_budget_fraction", 0.15),
+            time_block_pct=br.get("time_block_pct", {"10-11": 5, "11-14": 60, "14-16": 30, "16+": 5}),
+            rationale=br.get("rationale", "No rationale provided."),
+        )
+        # --- Model routing ---
+        mr = parsed.get("model_routing", {})
+        model_routing = ModelRoutingPreference(
+            morning=mr.get("morning", "fvcb"),
+            midday=mr.get("midday", "ml"),
+            afternoon=mr.get("afternoon", "ml"),
+            rationale=mr.get("rationale", "No rationale provided."),
+        )
+        # --- Chronos sanity check (optional) ---
+        cs = parsed.get("chronos_sanity")
+        chronos_sanity = None
+        if cs is not None:
+            chronos_sanity = ChronosSanityCheck(
+                plausible=cs.get("plausible", True),
+                flags=cs.get("flags", []),
+                overall_assessment=cs.get("overall_assessment", "No assessment."),
+            )
+        return AdvisorReport(
+            date=date,
+            phenological_stage=stage,
+            stress_profile=stress_profile,
+            budget_recommendation=budget_rec,
+            model_routing=model_routing,
+            chronos_sanity=chronos_sanity,
+            confidence_notes=parsed.get("confidence_notes", ""),
+            raw_llm_response=raw_response,
+        )
+    # ------------------------------------------------------------------
+    # Main advisory method
+    # ------------------------------------------------------------------
+    def advise(
+        self,
+        date: str,
+        weather_forecast: pd.DataFrame,
+        phenological_stage: str = "vegetative",
+        remaining_weekly_budget_kWh: float = 20.0,
+        remaining_monthly_budget_kWh: float = 80.0,
+        chronos_forecast: Optional[pd.DataFrame] = None,
+        gdd_cumulative: Optional[float] = None,
+        vine_snapshot: Optional[object] = None,
+    ) -> AdvisorReport:
+        """
+        Analyze day-ahead weather forecast and produce structured advisory.
+        Parameters
+        ----------
+        date : target date string (e.g. "2025-07-15")
+        weather_forecast : DataFrame of IMS weather data (15-min or hourly)
+        phenological_stage : current vine stage (vegetative/flowering/veraison/harvest)
+        remaining_weekly_budget_kWh : remaining shading budget for the week
+        remaining_monthly_budget_kWh : remaining shading budget for the month
+        chronos_forecast : optional Chronos A prediction DataFrame
+        gdd_cumulative : optional cumulative growing degree days
+        vine_snapshot : optional VineSnapshot from ThingsBoardClient.get_vine_snapshot();
+            seeds the advisory with current on-site sensor state (soil moisture,
+            fruiting-zone PAR, treatment vs reference comparison)
+        Returns
+        -------
+        AdvisorReport with stress profile, budget, routing, and sanity check
+        """
+        self._log(f"Generating advisory for {date} (stage: {phenological_stage})")
+        # Return cached report if same date+stage already advised
+        cache_key = f"{date}|{phenological_stage}"
+        if cache_key in self._report_cache:
+            self._log("Returning cached advisory for this date+stage.")
+            return self._report_cache[cache_key]
+        # Build user prompt
+        weather_text = self._format_weather_forecast(weather_forecast)
+        prompt_parts = [
+            f"DATE: {date}",
+            f"PHENOLOGICAL STAGE: {phenological_stage}",
+            f"REMAINING WEEKLY BUDGET: {remaining_weekly_budget_kWh:.1f} kWh",
+            f"REMAINING MONTHLY BUDGET: {remaining_monthly_budget_kWh:.1f} kWh",
+        ]
+        if gdd_cumulative is not None:
+            prompt_parts.append(f"CUMULATIVE GDD: {gdd_cumulative:.0f}")
+        if vine_snapshot is not None:
+            prompt_parts.append("")
+            try:
+                prompt_parts.append(vine_snapshot.to_advisor_text())
+            except Exception:
+                pass
+        prompt_parts.append("")
+        prompt_parts.append(weather_text)
+        if chronos_forecast is not None:
+            prompt_parts.append("")
+            prompt_parts.append(self._format_chronos_forecast(chronos_forecast))
+        else:
+            prompt_parts.append("\nNo Chronos forecast available — set chronos_sanity to null.")
+        user_prompt = "\n".join(prompt_parts)
+        # Call Gemini
+        try:
+            raw = self._call_gemini(user_prompt)
+            parsed = _extract_json(raw)
+            report = self._parse_report(date, phenological_stage, parsed, raw)
+            self._report_cache[cache_key] = report
+            self._log("Advisory generated successfully via Gemini.")
+            return report
+        except Exception as exc:
+            self._log(f"Gemini API error: {exc}")
+            return self._default_report(date, phenological_stage)
+    # ------------------------------------------------------------------
+    # Serialization
+    # ------------------------------------------------------------------
+    @staticmethod
+    def report_to_dict(report: AdvisorReport) -> dict:
+        """Convert AdvisorReport to a plain dict (JSON-serializable)."""
+        return asdict(report)
+    @staticmethod
+    def report_to_json(report: AdvisorReport, indent: int = 2) -> str:
+        """Convert AdvisorReport to a JSON string."""
+        return json.dumps(asdict(report), indent=indent, default=str)
+# ---------------------------------------------------------------------------
+# CLI entry point
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    from pathlib import Path
+    IMS_CSV = Path(__file__).resolve().parent.parent / "Data" / "ims" / "ims_merged_15min.csv"
+    if not IMS_CSV.exists():
+        print("No IMS cache data found. Cannot run advisory demo.")
+        print(f"Looked in: {IMS_CSV}")
+        raise SystemExit(1)
+    print(f"Loading IMS data from: {IMS_CSV.name}")
+    df = pd.read_csv(IMS_CSV, parse_dates=True)
+    # Try to parse datetime
+    for col in ["timestamp_utc", "datetime", "time", "timestamp"]:
+        if col in df.columns:
+            df.index = pd.to_datetime(df[col])
+            break
+    # Use last day of data
+    if isinstance(df.index, pd.DatetimeIndex):
+        last_date = df.index.date[-1]
+        day_data = df[df.index.date == last_date]
+        date_str = str(last_date)
+    else:
+        day_data = df.tail(96)  # ~24h of 15-min data
+        date_str = "unknown"
+    print(f"Date: {date_str}, rows: {len(day_data)}")
+    advisor = DayAheadAdvisor(verbose=True)
+    report = advisor.advise(
+        date=date_str,
+        weather_forecast=day_data,
+        phenological_stage="veraison",
+        remaining_weekly_budget_kWh=15.0,
+        remaining_monthly_budget_kWh=50.0,
+    )
+    print("\n" + "=" * 60)
+    print("DAY-AHEAD STRESS ADVISORY")
+    print("=" * 60)
+    print(f"Date: {report.date}")
+    print(f"Stage: {report.phenological_stage}")
+    print(f"\nStress Summary: {report.stress_profile.summary}")
+    print(f"Rubisco-limited hours: {report.stress_profile.rubisco_limited_hours}")
+    print(f"Peak stress: {report.stress_profile.peak_stress_severity} "
+          f"at hour {report.stress_profile.peak_stress_hour}")
+    print(f"\nBudget: {report.budget_recommendation.daily_budget_fraction:.0%} "
+          f"of weekly budget")
+    print(f"Time blocks: {report.budget_recommendation.time_block_pct}")
+    print(f"Rationale: {report.budget_recommendation.rationale}")
+    print(f"\nModel routing: morning={report.model_routing.morning}, "
+          f"midday={report.model_routing.midday}, "
+          f"afternoon={report.model_routing.afternoon}")
+    if report.chronos_sanity:
+        print(f"\nChronos sanity: plausible={report.chronos_sanity.plausible}")
+        print(f"  Flags: {report.chronos_sanity.flags}")
+    print(f"\nConfidence: {report.confidence_notes}")
+    print("\n--- Full JSON ---")
+    print(DayAheadAdvisor.report_to_json(report))

src/advisor/safety_rails.py ADDED Viewed

	@@ -0,0 +1,179 @@

+"""
+SafetyRails: FvCB vs ML divergence guard for the SolarWine 2.0 control loop.
+Position in the control loop (Phase 3, Step 7):
+  After TradeoffEngine selects a minimum dose, SafetyRails validates that
+  the FvCB and ML photosynthesis predictions are sufficiently consistent.
+  If the two models disagree by more than DIVERGENCE_THRESHOLD (12%), the
+  system cannot confidently predict that shading will help, so it falls back
+  to full astronomical tracking (zero energy sacrifice, zero risk).
+Rationale
+---------
+The FvCB mechanistic model and ML ensemble are calibrated on different
+assumptions:
+  - FvCB is reliable in standard conditions (T < 30°C, moderate VPD).
+  - ML handles non-linear stress regimes better.
+When both agree → high confidence → proceed with intervention.
+When they disagree significantly → sensor fault, regime change, or edge
+case not covered by calibration. The safe default is no intervention.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Optional
+from config.settings import DIVERGENCE_THRESHOLD
+# ---------------------------------------------------------------------------
+# Result dataclass
+# ---------------------------------------------------------------------------
+@dataclass
+class SafetyCheckResult:
+    """Outcome of a single FvCB vs ML divergence check."""
+    passed: bool
+    fvcb_a: float
+    ml_a: float
+    divergence_pct: float          # |fvcb_a - ml_a| / max(fvcb_a, ml_a) × 100
+    fallback_needed: bool          # True when divergence > threshold
+    reason: str                    # human-readable explanation
+    def __str__(self) -> str:
+        status = "PASS" if self.passed else "FAIL → fallback to θ_astro"
+        return (
+            f"SafetyRails [{status}] "
+            f"FvCB={self.fvcb_a:.2f}  ML={self.ml_a:.2f}  "
+            f"divergence={self.divergence_pct:.1f}%  "
+            f"(threshold={DIVERGENCE_THRESHOLD * 100:.0f}%)"
+        )
+# ---------------------------------------------------------------------------
+# SafetyRails
+# ---------------------------------------------------------------------------
+class SafetyRails:
+    """
+    Validates that FvCB and ML model outputs are consistent before any
+    shading command is issued.
+    Usage
+    -----
+    rails = SafetyRails()
+    result = rails.check(fvcb_a=14.3, ml_a=14.8)
+    if result.fallback_needed:
+        # stay at θ_astro, log result
+    """
+    def __init__(self, threshold: Optional[float] = None) -> None:
+        """
+        Parameters
+        ----------
+        threshold : divergence fraction (0–1) that triggers fallback.
+                    Defaults to DIVERGENCE_THRESHOLD (0.12) from settings.
+        """
+        self.threshold = threshold if threshold is not None else DIVERGENCE_THRESHOLD
+    def check(
+        self,
+        fvcb_a: float,
+        ml_a: float,
+        context: Optional[str] = None,
+    ) -> SafetyCheckResult:
+        """
+        Compare FvCB and ML photosynthesis outputs.
+        Parameters
+        ----------
+        fvcb_a  : net A from FarquharModel (µmol CO₂ m⁻² s⁻¹)
+        ml_a    : net A from ML ensemble (µmol CO₂ m⁻² s⁻¹)
+        context : optional string for logging (e.g. "2025-07-15 13:00")
+        Returns
+        -------
+        SafetyCheckResult
+        """
+        denominator = max(abs(fvcb_a), abs(ml_a), 1e-6)
+        divergence = abs(fvcb_a - ml_a) / denominator
+        divergence_pct = divergence * 100.0
+        fallback_needed = divergence > self.threshold
+        if fallback_needed:
+            reason = (
+                f"Models diverge by {divergence_pct:.1f}% "
+                f"(FvCB={fvcb_a:.2f}, ML={ml_a:.2f}) — "
+                f"exceeds {self.threshold * 100:.0f}% threshold. "
+                f"Falling back to full astronomical tracking."
+            )
+        elif fvcb_a < 0 and ml_a < 0:
+            reason = "Both models predict carbon loss (dark/night); no shading beneficial."
+            fallback_needed = True
+        else:
+            reason = (
+                f"Models agree within {self.threshold * 100:.0f}% threshold "
+                f"(FvCB={fvcb_a:.2f}, ML={ml_a:.2f}, "
+                f"divergence={divergence_pct:.1f}%). Proceeding."
+            )
+        return SafetyCheckResult(
+            passed=not fallback_needed,
+            fvcb_a=fvcb_a,
+            ml_a=ml_a,
+            divergence_pct=round(divergence_pct, 2),
+            fallback_needed=fallback_needed,
+            reason=reason,
+        )
+    def check_from_log(self, fvcb_a: Optional[float], ml_a: Optional[float]) -> SafetyCheckResult:
+        """
+        Variant that handles None inputs gracefully (e.g. ML model not loaded).
+        If either value is None, defaults to passing with a warning — the
+        calling code should use whichever model is available.
+        """
+        if fvcb_a is None or ml_a is None:
+            available = fvcb_a if fvcb_a is not None else ml_a
+            return SafetyCheckResult(
+                passed=True,
+                fvcb_a=fvcb_a or 0.0,
+                ml_a=ml_a or 0.0,
+                divergence_pct=0.0,
+                fallback_needed=False,
+                reason=(
+                    f"Only one model available (value={available:.2f}). "
+                    "Cannot check divergence; proceeding with available model."
+                ),
+            )
+        return self.check(fvcb_a, ml_a)
+# ---------------------------------------------------------------------------
+# CLI smoke test
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    rails = SafetyRails()
+    cases = [
+        (14.3, 14.8,  "Normal agreement (3.4%)"),
+        (14.3, 16.5,  "Borderline (15.4% — over threshold)"),
+        (14.3, 12.0,  "Below threshold (17.6% — over)"),
+        (14.3, 14.3,  "Perfect agreement"),
+        (14.3, None,  "ML unavailable"),
+        (-2.0, -1.8,  "Carbon loss (night)"),
+    ]
+    print(f"SafetyRails — threshold={rails.threshold * 100:.0f}%\n")
+    for fvcb, ml, label in cases:
+        result = rails.check_from_log(fvcb, ml)
+        status = "FALLBACK" if result.fallback_needed else "OK     "
+        print(f"  [{status}] {label}")
+        print(f"           {result.reason}")
+        print()

src/baseline_predictor.py ADDED Viewed

	@@ -0,0 +1,248 @@

+"""
+BaselinePredictor: hybrid FvCB + ML photosynthesis baseline for day-ahead planning.
+Provides a single ``predict_day()`` method that:
+  1. Runs FvCB (Farquhar–Greer–Weedon) for each slot using forecast weather
+  2. Optionally runs a trained ML model for the same slots
+  3. Uses the RoutingAgent's rule-based logic to pick the better prediction per slot
+  4. Returns a 96-slot profile of predicted photosynthesis rate A (µmol CO₂ m⁻² s⁻¹)
+This feeds into the DayAheadPlanner to estimate crop value for each slot,
+replacing the current temperature-only heuristic with an actual photosynthesis
+prediction that captures the Rubisco transition more accurately.
+"""
+from __future__ import annotations
+import logging
+import math
+from datetime import date
+from typing import List, Optional
+import numpy as np
+from config.settings import SEMILLON_TRANSITION_TEMP_C
+logger = logging.getLogger(__name__)
+class BaselinePredictor:
+    """Hybrid FvCB + ML photosynthesis prediction for day-ahead planning.
+    Parameters
+    ----------
+    fvcb_model : FarquharModel, optional
+        Lazy-initialised if not provided.
+    ml_predictor : PhotosynthesisPredictor, optional
+        Trained ML model. If None, FvCB-only mode is used.
+    routing_agent : RoutingAgent, optional
+        Model router for per-slot FvCB/ML selection.
+        If None, uses rule-based routing only (no API calls).
+    """
+    def __init__(
+        self,
+        fvcb_model=None,
+        ml_predictor=None,
+        routing_agent=None,
+    ):
+        self._fvcb = fvcb_model
+        self._ml = ml_predictor
+        self._router = routing_agent
+    @property
+    def fvcb(self):
+        if self._fvcb is None:
+            from src.models.farquhar_model import FarquharModel
+            self._fvcb = FarquharModel()
+        return self._fvcb
+    # ------------------------------------------------------------------
+    # Main API
+    # ------------------------------------------------------------------
+    def predict_day(
+        self,
+        forecast_temps: List[float],
+        forecast_ghi: List[float],
+        co2_ppm: float = 400.0,
+        rh_pct: float = 40.0,
+    ) -> List[float]:
+        """Predict photosynthesis rate A for each 15-min slot.
+        Parameters
+        ----------
+        forecast_temps : list of 96 floats
+            Forecast air temperature (°C) per slot.
+        forecast_ghi : list of 96 floats
+            Forecast GHI (W/m²) per slot.
+        co2_ppm : float
+            Atmospheric CO₂ concentration (default 400 ppm).
+        rh_pct : float
+            Relative humidity (%) for VPD estimation (default 40%).
+        Returns
+        -------
+        list of 96 floats
+            Predicted net photosynthesis A (µmol CO₂ m⁻² s⁻¹) per slot.
+            0.0 for nighttime slots.
+        """
+        assert len(forecast_temps) == 96 and len(forecast_ghi) == 96
+        # FvCB predictions for all 96 slots
+        fvcb_predictions = self._predict_fvcb(
+            forecast_temps, forecast_ghi, co2_ppm, rh_pct,
+        )
+        # If no ML model, return FvCB-only
+        if self._ml is None:
+            return fvcb_predictions
+        # ML predictions for all 96 slots
+        ml_predictions = self._predict_ml(forecast_temps, forecast_ghi)
+        # Route each slot
+        predictions = self._route_predictions(
+            forecast_temps, forecast_ghi,
+            fvcb_predictions, ml_predictions,
+        )
+        return predictions
+    # ------------------------------------------------------------------
+    # FvCB predictions
+    # ------------------------------------------------------------------
+    def _predict_fvcb(
+        self,
+        temps: List[float],
+        ghis: List[float],
+        co2_ppm: float,
+        rh_pct: float,
+    ) -> List[float]:
+        """Run FvCB for each slot. Returns 96 A values."""
+        predictions = []
+        for i in range(96):
+            temp = temps[i]
+            ghi = ghis[i]
+            # Nighttime or negligible light
+            if ghi < 50:
+                predictions.append(0.0)
+                continue
+            # Estimate PAR from GHI (roughly 2× conversion for photosynthetically active)
+            par = ghi * 2.0
+            # Estimate Tleaf from Tair (proxy: +2°C under sun)
+            tleaf = temp + 2.0
+            # Estimate VPD from temperature and RH
+            vpd = self._estimate_vpd(temp, rh_pct)
+            try:
+                result = self.fvcb.calc_photosynthesis_semillon(
+                    PAR=par,
+                    Tleaf=tleaf,
+                    CO2=co2_ppm,
+                    VPD=vpd,
+                    Tair=temp,
+                )
+                # Returns (A, limiting_state, shading_helps)
+                A = result[0] if isinstance(result, tuple) else result
+                predictions.append(max(0.0, float(A)))
+            except Exception as exc:
+                logger.debug("FvCB failed at slot %d: %s", i, exc)
+                predictions.append(0.0)
+        return predictions
+    @staticmethod
+    def _estimate_vpd(tair_c: float, rh_pct: float) -> float:
+        """Estimate VPD (kPa) from air temperature and relative humidity."""
+        # Tetens formula for saturated vapor pressure
+        es = 0.6108 * math.exp(17.27 * tair_c / (tair_c + 237.3))
+        ea = es * rh_pct / 100.0
+        return max(0.0, es - ea)
+    # ------------------------------------------------------------------
+    # ML predictions
+    # ------------------------------------------------------------------
+    def _predict_ml(
+        self,
+        temps: List[float],
+        ghis: List[float],
+    ) -> List[float]:
+        """Run ML model for each slot. Returns 96 A values."""
+        if self._ml is None:
+            return [0.0] * 96
+        try:
+            import pandas as pd
+            # Build feature DataFrame matching ML model's expected features
+            hours = [i * 0.25 for i in range(96)]
+            df = pd.DataFrame({
+                "air_temperature_c": temps,
+                "ghi_w_m2": ghis,
+                "hour": [int(h) for h in hours],
+                "minute": [int((h % 1) * 60) for h in hours],
+            })
+            # Try prediction with the best model
+            best_model = None
+            best_mae = float("inf")
+            for name, result in self._ml.results.items():
+                if result.get("mae", float("inf")) < best_mae:
+                    best_mae = result["mae"]
+                    best_model = name
+            if best_model and best_model in self._ml.models:
+                model = self._ml.models[best_model]
+                # Use whatever features the model was trained on
+                feature_cols = [c for c in df.columns if c in getattr(model, "feature_names_in_", df.columns)]
+                if feature_cols:
+                    preds = model.predict(df[feature_cols])
+                    return [max(0.0, float(p)) for p in preds]
+        except Exception as exc:
+            logger.warning("ML prediction failed: %s", exc)
+        return [0.0] * 96
+    # ------------------------------------------------------------------
+    # Routing
+    # ------------------------------------------------------------------
+    def _route_predictions(
+        self,
+        temps: List[float],
+        ghis: List[float],
+        fvcb_preds: List[float],
+        ml_preds: List[float],
+    ) -> List[float]:
+        """Pick FvCB or ML per slot using routing logic."""
+        from src.chatbot.routing_agent import RoutingAgent
+        predictions = []
+        for i in range(96):
+            telemetry = {
+                "temp_c": temps[i],
+                "ghi_w_m2": ghis[i],
+                "hour": i // 4,
+            }
+            # Use rule-based routing only (no API calls for batch prediction)
+            choice = RoutingAgent._rule_based_route(telemetry)
+            if choice is None:
+                # Transition zone: weight FvCB 60% / ML 40% as compromise
+                a = 0.6 * fvcb_preds[i] + 0.4 * ml_preds[i]
+            elif choice == "ml":
+                a = ml_preds[i]
+            else:
+                a = fvcb_preds[i]
+            predictions.append(a)
+        return predictions

src/canopy_photosynthesis.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """Backward-compatible re-export from src.models.canopy_photosynthesis."""
2	+ from src.models.canopy_photosynthesis import * # noqa: F401, F403

src/chatbot/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Chatbot: vineyard chat, routing agent, LLM data engineer."""

src/chatbot/feedback.py ADDED Viewed

	@@ -0,0 +1,104 @@

+"""
+Feedback storage for the Vineyard Advisor chatbot.
+Logs user feedback (thumbs up/down, flags) to a JSON-lines file.
+Each entry captures the query, response, tool results, rules applied,
+and the user's feedback action.
+"""
+from __future__ import annotations
+import json
+import logging
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Optional
+from config.settings import DATA_DIR
+logger = logging.getLogger(__name__)
+FEEDBACK_FILE = DATA_DIR / "advisor_feedback.jsonl"
+def log_feedback(
+    query: str,
+    response: str,
+    feedback: str,
+    confidence: str = "",
+    sources: Optional[list[str]] = None,
+    tool_calls: Optional[list[dict]] = None,
+    rule_violations: Optional[list[dict]] = None,
+    response_mode: str = "",
+    comment: str = "",
+) -> None:
+    """Append a feedback entry to the JSONL file.
+    Parameters
+    ----------
+    query : str
+        The user's original question.
+    response : str
+        The chatbot's response text.
+    feedback : str
+        One of: "thumbs_up", "thumbs_down", "flag_incorrect".
+    confidence, sources, tool_calls, rule_violations, response_mode :
+        Metadata from the ChatResponse.
+    comment : str
+        Optional free-text comment from the user.
+    """
+    entry = {
+        "timestamp": datetime.now(tz=timezone.utc).isoformat(),
+        "query": query,
+        "response": response[:500],  # truncate for storage
+        "feedback": feedback,
+        "confidence": confidence,
+        "sources": sources or [],
+        "tool_calls": [
+            {"name": tc.get("name", ""), "args": tc.get("args", {})}
+            for tc in (tool_calls or [])
+        ],
+        "rule_violations": rule_violations or [],
+        "response_mode": response_mode,
+        "comment": comment,
+    }
+    try:
+        FEEDBACK_FILE.parent.mkdir(parents=True, exist_ok=True)
+        with open(FEEDBACK_FILE, "a") as f:
+            f.write(json.dumps(entry, default=str) + "\n")
+        logger.info("Feedback logged: %s for query: %s", feedback, query[:50])
+    except Exception as exc:
+        logger.warning("Failed to log feedback: %s", exc)
+def load_feedback(limit: int = 100) -> list[dict]:
+    """Load recent feedback entries."""
+    if not FEEDBACK_FILE.exists():
+        return []
+    entries = []
+    try:
+        with open(FEEDBACK_FILE) as f:
+            for line in f:
+                line = line.strip()
+                if line:
+                    entries.append(json.loads(line))
+    except Exception as exc:
+        logger.warning("Failed to load feedback: %s", exc)
+    return entries[-limit:]
+def feedback_summary() -> dict:
+    """Return a summary of feedback stats."""
+    entries = load_feedback(limit=10000)
+    if not entries:
+        return {"total": 0}
+    return {
+        "total": len(entries),
+        "thumbs_up": sum(1 for e in entries if e.get("feedback") == "thumbs_up"),
+        "thumbs_down": sum(1 for e in entries if e.get("feedback") == "thumbs_down"),
+        "flagged": sum(1 for e in entries if e.get("feedback") == "flag_incorrect"),
+    }

src/chatbot/guardrails.py ADDED Viewed

	@@ -0,0 +1,363 @@

+"""
+Guardrails for the Vineyard Advisor chatbot.
+Three components:
+  1. QueryClassifier — determines if a query requires tool data or can be
+     answered from biology rules alone.
+  2. ResponseValidator — deterministic post-response check that catches
+     rule violations before the answer reaches the user.
+  3. confidence_from_context — estimates answer confidence based on data
+     freshness and availability.
+"""
+from __future__ import annotations
+import re
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Optional
+from config.settings import (
+    NO_SHADE_BEFORE_HOUR,
+    NO_SHADE_MONTHS,
+    NO_SHADE_TLEAF_BELOW,
+)
+# ---------------------------------------------------------------------------
+# 1. Query classifier — decides whether a tool call is mandatory
+# ---------------------------------------------------------------------------
+# Keywords that indicate user is asking about real-time / site-specific data
+_DATA_KEYWORDS = [
+    # Weather / environment
+    r"\btemperature\b", r"\btemp\b", r"\bhow hot\b", r"\bhow cold\b",
+    r"\bweather\b", r"\bforecast\b", r"\brain\b", r"\bwind\b",
+    r"\bhumidity\b", r"\bghi\b", r"\bradiation\b", r"\birradiance\b",
+    # Sensors
+    r"\bsensor\b", r"\bsoil\b", r"\bmoisture\b", r"\bleaf temp\b",
+    r"\bpar\b", r"\bndvi\b", r"\bcwsi\b", r"\bvpd\b",
+    # Photosynthesis / predictions
+    r"\bphotosynthesis\b", r"\bassimilation\b", r"\bpredict\b",
+    r"\bforecast\b", r"\bA rate\b", r"\bcarbon\b",
+    # Energy
+    r"\benergy\b", r"\bkwh\b", r"\bpower\b", r"\bgeneration\b",
+    r"\binverter\b",
+    # Irrigation
+    r"\birrigat\b", r"\bwater\b",
+    # Shading — action-oriented
+    r"\bshade\b", r"\bshading\b", r"\btilt\b", r"\bangle\b", r"\bpanel\b",
+    # Temporal / current state
+    r"\bright now\b", r"\bcurrent\b", r"\btoday\b", r"\btomorrow\b",
+    r"\byesterday\b", r"\bthis week\b", r"\blast \d+ (hour|day|minute)",
+    # Direct data ask
+    r"\bshow me\b", r"\bwhat is\b", r"\bwhat are\b", r"\bhow much\b",
+    r"\bcheck\b", r"\bstatus\b", r"\bstate\b",
+]
+# Compile once
+_DATA_PATTERNS = [re.compile(p, re.IGNORECASE) for p in _DATA_KEYWORDS]
+# Keywords for pure knowledge / biology rule questions (no tool needed)
+_KNOWLEDGE_KEYWORDS = [
+    r"\bwhy\b.*\brule\b", r"\bexplain\b.*\brule\b",
+    r"\bwhat is rubisco\b", r"\bwhat is fvcb\b", r"\bwhat is farquhar\b",
+    r"\btell me about\b.*\bbiology\b", r"\bhow does photosynthesis work\b",
+    r"\bwhat does .* mean\b",
+]
+_KNOWLEDGE_PATTERNS = [re.compile(p, re.IGNORECASE) for p in _KNOWLEDGE_KEYWORDS]
+@dataclass
+class QueryClass:
+    """Result of query classification."""
+    requires_data: bool           # True = tool call is mandatory
+    category: str                 # "data", "knowledge", "greeting", "ambiguous"
+    matched_keywords: list[str] = field(default_factory=list)
+def classify_query(user_message: str) -> QueryClass:
+    """Classify whether a user query requires tool-grounded data."""
+    msg = user_message.strip()
+    # Very short / greeting
+    if len(msg) < 5 or re.match(r"^(hi|hello|hey|thanks|thank you|ok|bye)\b", msg, re.I):
+        return QueryClass(requires_data=False, category="greeting")
+    # Check knowledge patterns first (more specific)
+    for pat in _KNOWLEDGE_PATTERNS:
+        if pat.search(msg):
+            return QueryClass(requires_data=False, category="knowledge")
+    # Check data patterns
+    matched = []
+    for pat in _DATA_PATTERNS:
+        m = pat.search(msg)
+        if m:
+            matched.append(m.group())
+    if matched:
+        # If the only match is a generic question word ("what is", "show me")
+        # but no domain-specific data keyword, treat as ambiguous
+        domain_matches = [m for m in matched if m.lower() not in
+                          {"what is", "what are", "show me", "how much", "check", "status", "state"}]
+        if not domain_matches:
+            return QueryClass(requires_data=False, category="ambiguous")
+        return QueryClass(requires_data=True, category="data", matched_keywords=matched)
+    # Default: ambiguous — allow LLM to decide
+    return QueryClass(requires_data=False, category="ambiguous")
+# ---------------------------------------------------------------------------
+# 2. Response validator — deterministic rule checks
+# ---------------------------------------------------------------------------
+@dataclass
+class RuleViolation:
+    """A detected rule violation in a chatbot response."""
+    rule_name: str
+    severity: str          # "block" or "warn"
+    message: str
+    correction: str        # What to tell the user instead
+def validate_response(
+    response_text: str,
+    action: Optional[str] = None,
+    context: Optional[dict] = None,
+) -> list[RuleViolation]:
+    """
+    Check a chatbot response for rule violations.
+    Parameters
+    ----------
+    response_text : str
+        The chatbot's response text.
+    action : str or None
+        Extracted action ("shade", "irrigate", "no_action", etc.).
+    context : dict or None
+        Current conditions: hour, month, temp_c, stage_id, etc.
+    Returns
+    -------
+    List of RuleViolation objects. Empty list = all good.
+    """
+    violations: list[RuleViolation] = []
+    ctx = context or {}
+    text_lower = response_text.lower()
+    hour = ctx.get("hour")
+    month = ctx.get("month")
+    temp_c = ctx.get("temp_c")
+    stage_id = ctx.get("stage_id")
+    # Detect if the response recommends shading
+    _recommends_shade = _text_recommends_shading(text_lower)
+    # Rule: No shading before NO_SHADE_BEFORE_HOUR
+    if _recommends_shade and hour is not None and hour < NO_SHADE_BEFORE_HOUR:
+        violations.append(RuleViolation(
+            rule_name="no_shade_before_10",
+            severity="block",
+            message=f"Response recommends shading before {NO_SHADE_BEFORE_HOUR}:00.",
+            correction=(
+                "Morning light is critical for carbon fixation. "
+                f"Shading should not be recommended before {NO_SHADE_BEFORE_HOUR}:00 regardless "
+                "of temperature. Panels should remain at full tracking."
+            ),
+        ))
+    # Rule: No shading in restricted months (unless extreme)
+    if _recommends_shade and month in NO_SHADE_MONTHS:
+        # Check if the response mentions extreme conditions
+        _mentions_extreme = any(w in text_lower for w in [
+            "extreme", "lethal", "emergency", "severe sunburn", "last resort",
+        ])
+        if not _mentions_extreme:
+            violations.append(RuleViolation(
+                rule_name="no_shade_in_may",
+                severity="block",
+                message="Response recommends shading in May without citing extreme conditions.",
+                correction=(
+                    "May is the flowering/fruit-set period. Shading should be "
+                    "avoided in May unless there is extreme heat causing lethal "
+                    "stress. Panels should remain at full tracking."
+                ),
+            ))
+    # Rule: Below transition temp shading hurts (RuBP-limited)
+    if _recommends_shade and temp_c is not None and temp_c < NO_SHADE_TLEAF_BELOW:
+        violations.append(RuleViolation(
+            rule_name="temperature_transition",
+            severity="warn",
+            message=f"Response recommends shading at {temp_c:.0f}°C (below 28°C transition zone).",
+            correction=(
+                f"At {temp_c:.0f}°C, photosynthesis is RuBP-limited — "
+                f"the vine needs light, not shade. Shading would reduce "
+                f"photosynthesis. Keep panels at full tracking."
+            ),
+        ))
+    # Rule: Dormant season — shading is irrelevant, not harmful
+    if stage_id in ("winter_dormancy",) and _recommends_shade:
+        violations.append(RuleViolation(
+            rule_name="no_leaves_no_shade_problem",
+            severity="warn",
+            message="Response discusses shading during dormancy.",
+            correction=(
+                "The vine is dormant with no leaves. Shading is irrelevant "
+                "(not harmful, just pointless). Panels should track for "
+                "maximum energy."
+            ),
+        ))
+    # Rule: "No shading" answers must explain why
+    _recommends_no_shade = _text_recommends_no_shading(text_lower)
+    if _recommends_no_shade:
+        _has_reason = any(reason in text_lower for reason in [
+            "light-limited", "rubp", "need light", "needs light",
+            "full sun", "below 30", "below 28",
+            "dormant", "no leaves", "no canopy",
+            "night", "dark", "no radiation", "ghi", "no sun",
+            "carbon fixation", "morning light",
+            "not photosynthesi", "not active",
+        ])
+        if not _has_reason:
+            violations.append(RuleViolation(
+                rule_name="no_shading_must_explain",
+                severity="warn",
+                message="Response says 'no shading' without explaining why.",
+                correction=(
+                    "When recommending no shading, always explain the reason: "
+                    "is the vine light-limited (T < 30°C), dormant (no leaves), "
+                    "or is there no radiation? The farmer needs to understand why."
+                ),
+            ))
+    return violations
+# Shared keyword lists for shading detection heuristics
+_POSITIVE_SHADE_PHRASES = [
+    "recommend shading", "should shade", "activate shading",
+    "tilt the panel", "move the panel", "adjust the panel",
+    "shade the vine", "shade your vine", "shading would help",
+    "shading is recommended", "suggest shading", "consider shading",
+    "apply shading", "deploy shading", "enable shading",
+    "recommend anti-tracking", "switch to anti-tracking",
+]
+_NEGATIVE_SHADE_PHRASES = [
+    "should not shade", "don't shade", "no shading",
+    "avoid shading", "shading is not", "not recommend shading",
+    "do not shade", "keep panels tracking", "full tracking",
+    "shading would reduce", "shading would hurt",
+    "shading is irrelevant", "shading is unnecessary",
+    "i would not recommend shading", "i don't recommend shading",
+    "no shading needed", "shading is not needed",
+    "no need to shade", "no need for shading",
+]
+def _text_recommends_shading(text_lower: str) -> bool:
+    """Heuristic: does the response recommend activating shade?"""
+    has_positive = any(p in text_lower for p in _POSITIVE_SHADE_PHRASES)
+    has_negative = any(p in text_lower for p in _NEGATIVE_SHADE_PHRASES)
+    # If both present, the negative usually wins (e.g. "some might suggest shading, but I don't recommend it")
+    return has_positive and not has_negative
+def _text_recommends_no_shading(text_lower: str) -> bool:
+    """Heuristic: does the response explicitly recommend NOT shading?"""
+    return any(p in text_lower for p in _NEGATIVE_SHADE_PHRASES)
+# ---------------------------------------------------------------------------
+# 3. Confidence estimation
+# ---------------------------------------------------------------------------
+def estimate_confidence(
+    tool_called: bool,
+    tool_succeeded: bool,
+    data_age_minutes: Optional[float],
+    tool_name: Optional[str] = None,
+) -> str:
+    """
+    Estimate response confidence based on data grounding.
+    Returns one of: "high", "medium", "low", "insufficient_data".
+    """
+    # No tool called at all
+    if not tool_called:
+        return "low"  # answering from system prompt / training data only
+    # Tool was called but failed
+    if not tool_succeeded:
+        return "insufficient_data"
+    # Tool succeeded — check data freshness
+    if data_age_minutes is None:
+        # Computed result (FvCB, shading sim) — no age concept
+        return "high"
+    if data_age_minutes <= 30:
+        return "high"
+    elif data_age_minutes <= 120:
+        return "medium"
+    else:
+        return "low"
+# ---------------------------------------------------------------------------
+# 4. Source tagging helper
+# ---------------------------------------------------------------------------
+# Map tool names to human-readable data sources
+_TOOL_SOURCES = {
+    "get_current_weather": "IMS Station 43 (Sde Boker)",
+    "get_weather_history": "IMS Station 43 (Sde Boker)",
+    "get_vine_state": "ThingsBoard sensors (on-site)",
+    "get_sensor_history": "ThingsBoard sensors (on-site)",
+    "calc_photosynthesis": "Farquhar FvCB model (computed)",
+    "predict_photosynthesis_ml": "ML ensemble (computed)",
+    "get_ps_forecast": "FvCB day-ahead forecast (computed)",
+    "simulate_shading": "Shadow model simulation (computed)",
+    "compare_tilt_angles": "Shadow model simulation (computed)",
+    "get_daily_schedule": "Shadow model schedule (computed)",
+    "get_energy_generation": "IMS + analytical model (estimated)",
+    "get_energy_history": "IMS + analytical model (estimated)",
+    "predict_energy": "IMS + analytical model (estimated)",
+    "run_day_ahead_advisory": "Gemini day-ahead advisor",
+    "explain_biology_rule": "Built-in biology rules",
+    "get_photosynthesis_3d": "3D scene (computed)",
+}
+def get_source_label(tool_name: str) -> str:
+    """Return a human-readable source label for a tool."""
+    return _TOOL_SOURCES.get(tool_name, tool_name)
+def tag_tool_result(tool_name: str, tool_result: dict) -> dict:
+    """
+    Add source metadata to a tool result before sending to Gemini.
+    The tagged result helps Gemini cite sources in its response.
+    """
+    tagged = dict(tool_result)
+    tagged["_source"] = get_source_label(tool_name)
+    tagged["_tool"] = tool_name
+    # Extract data age if present
+    age = tool_result.get("age_minutes")
+    if age is not None:
+        tagged["_data_age_minutes"] = age
+        if age > 60:
+            tagged["_freshness_warning"] = (
+                f"This data is {age:.0f} minutes old. "
+                "Warn the user that conditions may have changed."
+            )
+    return tagged

src/chatbot/llm_data_engineer.py ADDED Viewed

	@@ -0,0 +1,559 @@

+"""
+LLMDataEngineer: Gemini-assisted sensor data cleaning and feature engineering
+for the SolarWine agrivoltaic pipeline.
+Phase 8B tasks:
+  - llm-data-cleaning : Gemini analyzes sensor stats, returns Z-score/IQR
+                        filter thresholds for automated anomaly detection.
+  - llm-feature-eng   : Gemini confirms feature formulae; module generates
+                        cyclical time features and a Stress Risk Score.
+"""
+from __future__ import annotations
+from typing import Optional
+import hashlib
+import numpy as np
+import pandas as pd
+from src.genai_utils import extract_json_object, get_genai_client, get_google_api_key
+from src.time_features import add_cyclical_time_features
+# ---------------------------------------------------------------------------
+# Domain knowledge injected into Gemini prompts
+# ---------------------------------------------------------------------------
+SENSOR_CONTEXT = {
+    "Air1_PAR_ref": {
+        "description": "Photosynthetically Active Radiation (PAR)",
+        "unit": "μmol photons m⁻² s⁻¹",
+        "physical_range": [0, 2500],
+        "notes": "Solar PAR at surface cannot exceed ~2200–2500 under any realistic sky. "
+                 "Values above 3000 are sensor artefacts.",
+    },
+    "Air1_leafTemperature_ref": {
+        "description": "Leaf (canopy) temperature",
+        "unit": "°C",
+        "physical_range": [-5, 55],
+        "notes": "Grape leaf temperature in the Negev can reach ~45°C on extreme days, "
+                 "but values above 55°C are physiologically impossible for a living leaf.",
+    },
+    "Air1_airTemperature_ref": {
+        "description": "Air temperature near canopy",
+        "unit": "°C",
+        "physical_range": [0, 50],
+        "notes": "Sde Boker record high is ~47°C. Values above 50°C or below 0°C "
+                 "during the growing season (May–Sep) are sensor faults.",
+    },
+    "Air1_VPD_ref": {
+        "description": "Vapour Pressure Deficit",
+        "unit": "kPa",
+        "physical_range": [0, 7],
+        "notes": "Desert VPD rarely exceeds 6–7 kPa even in extreme heat. "
+                 "Negative values and values above 8 kPa are sensor errors.",
+    },
+    "Air1_airHumidity_ref": {
+        "description": "Relative Humidity",
+        "unit": "%",
+        "physical_range": [0, 100],
+        "notes": "Must be in [0, 100]. Values outside this range are invalid.",
+    },
+    "Air1_CO2_ref": {
+        "description": "CO₂ concentration (raw sensor, corrected ×0.7 by SensorDataLoader)",
+        "unit": "ppm (raw)",
+        "physical_range": [400, 4000],
+        "notes": "Raw sensor reads ~30% too high (corrected ×0.7 in the data pipeline). "
+                 "Raw values above 4000 ppm or below 400 ppm are sensor artefacts. "
+                 "Post-correction (~280–2800 ppm) values above 2000 ppm indicate sensor drift.",
+    },
+}
+_SYSTEM_PROMPT_CLEANING = (
+    "You are a precision-agriculture sensor data quality engineer. "
+    "You are given descriptive statistics for sensor columns from a vineyard "
+    "in the Negev desert, Israel (Sde Boker region, Semillon grapevine, May–September). "
+    "Your task: for each column, propose anomaly filter thresholds to flag "
+    "or remove invalid readings. "
+    "Return ONLY a JSON object (no markdown, no explanation) with the following schema:\n"
+    "{\n"
+    '  "<column_name>": {\n'
+    '    "lower_bound": <float or null>,\n'
+    '    "upper_bound": <float or null>,\n'
+    '    "zscore_threshold": <float>,\n'
+    '    "iqr_multiplier": <float>,\n'
+    '    "rationale": "<one sentence>"\n'
+    "  },\n"
+    "  ...\n"
+    "}"
+)
+_SYSTEM_PROMPT_FEATURES = (
+    "You are a precision-agriculture feature engineering expert specialising in "
+    "grapevine physiology and agrivoltaic systems. "
+    "Given the available sensor columns, propose the exact mathematical formulae "
+    "for a Stress Risk Score that combines VPD and (optionally) CWSI. "
+    "Return ONLY a JSON object (no markdown, no explanation) with schema:\n"
+    "{\n"
+    '  "stress_risk_score": {\n'
+    '    "formula_description": "<one sentence>",\n'
+    '    "vpd_weight": <float>,\n'
+    '    "cwsi_weight": <float>,\n'
+    '    "vpd_clip_max": <float>,\n'
+    '    "cwsi_clip_max": <float>,\n'
+    '    "rationale": "<one or two sentences on biological justification>"\n'
+    "  }\n"
+    "}"
+)
+# ---------------------------------------------------------------------------
+# Helper: robust JSON extraction from LLM response
+# ---------------------------------------------------------------------------
+def _extract_json(text: str) -> dict:
+    """Thin wrapper around the shared genai_utils implementation."""
+    return extract_json_object(text)
+# ---------------------------------------------------------------------------
+# Main class
+# ---------------------------------------------------------------------------
+class LLMDataEngineer:
+    """
+    Gemini-assisted sensor data cleaning and feature engineering.
+    Usage
+    -----
+    engineer = LLMDataEngineer()
+    df_clean, thresholds, features_meta = engineer.run_pipeline(df)
+    """
+    def __init__(
+        self,
+        model_name: str = "gemini-2.5-flash",
+        api_key: Optional[str] = None,
+        verbose: bool = True,
+    ):
+        self.model_name = model_name
+        self._api_key = api_key
+        self._client = None
+        self.verbose = verbose
+        # Caches keyed by content hash — avoids repeated Gemini calls
+        self._threshold_cache: dict[str, dict] = {}
+        self._feature_spec_cache: dict[str, dict] = {}
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+    @property
+    def api_key(self) -> str:
+        return get_google_api_key(self._api_key)
+    @property
+    def client(self):
+        if self._client is None:
+            self._client = get_genai_client(self._api_key)
+        return self._client
+    def _call_gemini(self, system_prompt: str, user_prompt: str) -> str:
+        """Send a prompt to Gemini and return the raw text response."""
+        response = self.client.models.generate_content(
+            model=self.model_name,
+            contents=user_prompt,
+            config={"system_instruction": system_prompt},
+        )
+        return response.text
+    @staticmethod
+    def _hash_key(*parts: str) -> str:
+        """Create a short hash from string parts for cache keying."""
+        return hashlib.md5("|".join(parts).encode()).hexdigest()[:12]
+    def _log(self, msg: str) -> None:
+        if self.verbose:
+            print(f"[LLMDataEngineer] {msg}")
+    # ------------------------------------------------------------------
+    # Step 1: Anomaly detection — ask Gemini for filter thresholds
+    # ------------------------------------------------------------------
+    def analyze_anomalies(
+        self,
+        df: pd.DataFrame,
+        columns: Optional[list[str]] = None,
+    ) -> dict:
+        """
+        Send descriptive statistics to Gemini and receive per-column
+        anomaly filter thresholds.
+        Parameters
+        ----------
+        df : DataFrame with sensor measurements
+        columns : subset of columns to analyze; defaults to SENSOR_CONTEXT keys
+        Returns
+        -------
+        dict mapping column_name → {lower_bound, upper_bound,
+                                     zscore_threshold, iqr_multiplier, rationale}
+        """
+        target_cols = [
+            c for c in (columns or list(SENSOR_CONTEXT.keys())) if c in df.columns
+        ]
+        if not target_cols:
+            raise ValueError("No recognized sensor columns found in DataFrame.")
+        stats = df[target_cols].describe(percentiles=[0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99])
+        # Build prompt with stats + domain context
+        lines = [
+            "Analyze the following sensor columns from a vineyard dataset.",
+            "For each column, the physical context and expected range are provided.",
+            "",
+        ]
+        for col in target_cols:
+            ctx = SENSOR_CONTEXT.get(col, {})
+            lines.append(f"Column: {col}")
+            if ctx:
+                lines.append(f"  Description : {ctx['description']} ({ctx['unit']})")
+                lines.append(f"  Expected range : {ctx['physical_range']}")
+                lines.append(f"  Domain notes : {ctx['notes']}")
+            lines.append("  Observed statistics:")
+            for stat_name, val in stats[col].items():
+                lines.append(f"    {stat_name:10s}: {val:.4f}")
+            lines.append("")
+        user_prompt = "\n".join(lines)
+        # Check cache (same stats → same thresholds)
+        cache_key = self._hash_key(user_prompt)
+        if cache_key in self._threshold_cache:
+            self._log("Using cached anomaly thresholds (same data fingerprint).")
+            return self._threshold_cache[cache_key]
+        self._log("Querying Gemini for anomaly thresholds …")
+        try:
+            raw = self._call_gemini(_SYSTEM_PROMPT_CLEANING, user_prompt)
+            thresholds = _extract_json(raw)
+        except Exception as exc:
+            self._log(f"Gemini API error: {exc}. Using statistical fallback.")
+            thresholds = self._fallback_thresholds(df, target_cols)
+        self._threshold_cache[cache_key] = thresholds
+        self._log(f"Received thresholds for {len(thresholds)} columns.")
+        return thresholds
+    @staticmethod
+    def _fallback_thresholds(df: pd.DataFrame, cols: list[str]) -> dict:
+        """Conservative statistical fallback used when API is unavailable."""
+        result = {}
+        for col in cols:
+            ctx = SENSOR_CONTEXT.get(col, {})
+            phys = ctx.get("physical_range", [None, None])
+            result[col] = {
+                "lower_bound": phys[0],
+                "upper_bound": phys[1],
+                "zscore_threshold": 3.5,
+                "iqr_multiplier": 3.0,
+                "rationale": "Statistical fallback (Gemini unavailable).",
+            }
+        return result
+    # ------------------------------------------------------------------
+    # Step 2: Apply cleaning
+    # ------------------------------------------------------------------
+    def apply_cleaning(
+        self,
+        df: pd.DataFrame,
+        thresholds: dict,
+        strategy: str = "clip",
+    ) -> pd.DataFrame:
+        """
+        Apply Gemini-generated thresholds to clean the sensor DataFrame.
+        Parameters
+        ----------
+        df : raw sensor DataFrame
+        thresholds : dict from analyze_anomalies()
+        strategy : 'clip'  — clamp values to [lower_bound, upper_bound]
+                   'drop'  — drop rows where any column is out of bounds
+                   'nan'   — replace out-of-bounds values with NaN
+        Returns
+        -------
+        Cleaned DataFrame (copy).
+        """
+        result = df.copy()
+        report_lines = ["Anomaly cleaning report:"]
+        for col, thresh in thresholds.items():
+            if col not in result.columns:
+                continue
+            series = result[col]
+            lower = thresh.get("lower_bound")
+            upper = thresh.get("upper_bound")
+            # Count violations before cleaning
+            mask_low = (series < lower) if lower is not None else pd.Series(False, index=series.index)
+            mask_high = (series > upper) if upper is not None else pd.Series(False, index=series.index)
+            # Z-score based detection (secondary flag)
+            z_thresh = thresh.get("zscore_threshold", 3.5)
+            z_scores = (series - series.mean()) / (series.std() + 1e-9)
+            mask_zscore = z_scores.abs() > z_thresh
+            # IQR-based detection (tertiary flag)
+            iqr_mult = thresh.get("iqr_multiplier", 3.0)
+            q1, q3 = series.quantile(0.25), series.quantile(0.75)
+            iqr = q3 - q1
+            mask_iqr = (series < q1 - iqr_mult * iqr) | (series > q3 + iqr_mult * iqr)
+            # Union of all anomaly flags
+            mask_anomaly = mask_low | mask_high | (mask_zscore & mask_iqr)
+            n_anomalies = int(mask_anomaly.sum())
+            if n_anomalies > 0:
+                report_lines.append(
+                    f"  {col}: {n_anomalies} anomalies ({n_anomalies / len(series) * 100:.2f}%)"
+                )
+            if strategy == "clip":
+                result[col] = series.clip(
+                    lower=lower if lower is not None else -np.inf,
+                    upper=upper if upper is not None else np.inf,
+                )
+            elif strategy == "nan":
+                result.loc[mask_anomaly, col] = np.nan
+            elif strategy == "drop":
+                result = result.loc[~mask_anomaly].copy()
+            else:
+                raise ValueError(f"Unknown strategy '{strategy}'. Use 'clip', 'nan', or 'drop'.")
+        self._log("\n".join(report_lines))
+        return result
+    # ------------------------------------------------------------------
+    # Step 3: Feature engineering
+    # ------------------------------------------------------------------
+    def get_feature_spec(
+        self,
+        available_cols: list[str],
+    ) -> dict:
+        """
+        Ask Gemini to confirm the Stress Risk Score formula given available columns.
+        Returns a feature spec dict with vpd_weight, cwsi_weight, etc.
+        Falls back to a biologically motivated default if API is unavailable.
+        """
+        has_cwsi = any("cwsi" in c.lower() or "CWSI" in c for c in available_cols)
+        # Cache key: just depends on whether CWSI is available
+        cache_key = f"cwsi={has_cwsi}"
+        if cache_key in self._feature_spec_cache:
+            self._log("Using cached feature spec.")
+            return self._feature_spec_cache[cache_key]
+        user_prompt = (
+            f"Available sensor columns: {available_cols}.\n"
+            f"CWSI column available: {has_cwsi}.\n"
+            "Propose weights and clip bounds for a Stress Risk Score that linearly "
+            "combines normalised VPD and (if available) normalised CWSI. "
+            "The score should be in [0, 1] and reflect acute heat/drought stress "
+            "for Semillon grapevine in a desert agrivoltaic system."
+        )
+        self._log("Querying Gemini for Stress Risk Score formula …")
+        try:
+            raw = self._call_gemini(_SYSTEM_PROMPT_FEATURES, user_prompt)
+            spec = _extract_json(raw).get("stress_risk_score", {})
+        except Exception as exc:
+            self._log(f"Gemini API error: {exc}. Using default feature spec.")
+            spec = {}
+        # Merge with defaults so the dict is always complete
+        defaults = {
+            "formula_description": "Normalised weighted sum of VPD and CWSI stress signals",
+            "vpd_weight": 0.6,
+            "cwsi_weight": 0.4,
+            "vpd_clip_max": 6.0,
+            "cwsi_clip_max": 1.0,
+            "rationale": (
+                "VPD dominates stomatal response (weight 0.6); "
+                "CWSI captures cumulative water status (weight 0.4)."
+            ),
+        }
+        for k, v in defaults.items():
+            spec.setdefault(k, v)
+        self._feature_spec_cache[cache_key] = spec
+        return spec
+    def engineer_features(
+        self,
+        df: pd.DataFrame,
+        timestamp_col: str = "time",
+        cwsi_col: Optional[str] = None,
+        vpd_col: str = "Air1_VPD_ref",
+        feature_spec: Optional[dict] = None,
+    ) -> pd.DataFrame:
+        """
+        Add engineered features to the sensor DataFrame.
+        New columns added
+        -----------------
+        hour_sin, hour_cos          – cyclical encoding of hour-of-day
+        doy_sin, doy_cos            – cyclical encoding of day-of-year
+        stress_risk_score           – weighted VPD (+ CWSI) stress index in [0, 1]
+        Parameters
+        ----------
+        df : sensor DataFrame (original unmodified)
+        timestamp_col : name of the datetime column (or index if not a column)
+        cwsi_col : optional CWSI column name; if None, stress score uses VPD only
+        vpd_col : VPD column name
+        feature_spec : pre-fetched spec from get_feature_spec(); fetched if None
+        Returns
+        -------
+        DataFrame copy with additional feature columns.
+        """
+        result = df.copy()
+        # --- Cyclical time features (via shared utility) ---
+        ts_col = timestamp_col if timestamp_col in result.columns else None
+        use_index = ts_col is None and isinstance(result.index, pd.DatetimeIndex)
+        if ts_col is not None or use_index:
+            result = add_cyclical_time_features(
+                result,
+                timestamp_col=ts_col,
+                index_is_timestamp=use_index,
+            )
+            self._log("Added cyclical time features: hour_sin, hour_cos, doy_sin, doy_cos")
+        else:
+            self._log("Warning: no timestamp found; skipping cyclical features.")
+        # --- Stress Risk Score ---
+        if vpd_col in result.columns:
+            if feature_spec is None:
+                feature_spec = self.get_feature_spec(list(result.columns))
+            vpd_w = float(feature_spec.get("vpd_weight", 0.6))
+            cwsi_w = float(feature_spec.get("cwsi_weight", 0.4))
+            vpd_max = float(feature_spec.get("vpd_clip_max", 6.0))
+            cwsi_max = float(feature_spec.get("cwsi_clip_max", 1.0))
+            vpd_norm = (result[vpd_col].clip(0, vpd_max) / vpd_max).fillna(0.0)
+            if cwsi_col and cwsi_col in result.columns:
+                cwsi_norm = (result[cwsi_col].clip(0, cwsi_max) / cwsi_max).fillna(0.0)
+                effective_cwsi_w = cwsi_w
+                effective_vpd_w = vpd_w
+            else:
+                # No CWSI — redistribute weight entirely to VPD
+                cwsi_norm = pd.Series(0.0, index=result.index)
+                effective_cwsi_w = 0.0
+                effective_vpd_w = 1.0
+            score = (effective_vpd_w * vpd_norm + effective_cwsi_w * cwsi_norm).clip(0, 1)
+            result["stress_risk_score"] = score.round(4)
+            self._log(
+                f"Added stress_risk_score (vpd_weight={effective_vpd_w:.2f}, "
+                f"cwsi_weight={effective_cwsi_w:.2f})"
+            )
+        else:
+            self._log(f"Warning: VPD column '{vpd_col}' not found; skipping stress_risk_score.")
+        return result
+    # ------------------------------------------------------------------
+    # Full pipeline
+    # ------------------------------------------------------------------
+    def run_pipeline(
+        self,
+        df: pd.DataFrame,
+        cleaning_strategy: str = "clip",
+        timestamp_col: str = "time",
+        cwsi_col: Optional[str] = None,
+        vpd_col: str = "Air1_VPD_ref",
+    ) -> tuple[pd.DataFrame, dict, dict]:
+        """
+        Execute the full LLM data engineering pipeline.
+        Steps
+        -----
+        1. Gemini analyzes column stats → anomaly thresholds
+        2. Apply cleaning (clip / nan / drop)
+        3. Gemini confirms feature spec → engineer features
+        Returns
+        -------
+        (df_engineered, thresholds, feature_spec)
+        """
+        self._log("=== LLM Data Engineering Pipeline ===")
+        # Step 1: anomaly thresholds
+        thresholds = self.analyze_anomalies(df)
+        # Step 2: clean
+        df_clean = self.apply_cleaning(df, thresholds, strategy=cleaning_strategy)
+        # Step 3: feature spec + engineering
+        feature_spec = self.get_feature_spec(list(df_clean.columns))
+        df_engineered = self.engineer_features(
+            df_clean,
+            timestamp_col=timestamp_col,
+            cwsi_col=cwsi_col,
+            vpd_col=vpd_col,
+            feature_spec=feature_spec,
+        )
+        new_cols = [c for c in df_engineered.columns if c not in df.columns]
+        self._log(f"Pipeline complete. New columns: {new_cols}")
+        return df_engineered, thresholds, feature_spec
+# ---------------------------------------------------------------------------
+# CLI entry point
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    from pathlib import Path
+    DATA_DIR = Path(__file__).resolve().parent.parent / "Data"
+    sample_path = DATA_DIR / "Seymour" / "sensors_wide_sample.csv"
+    sensors_path = DATA_DIR / "Seymour" / "sensors_wide.csv"
+    csv_path = sample_path if sample_path.exists() else sensors_path
+    print(f"Loading sensor data from: {csv_path.name}")
+    df_raw = pd.read_csv(csv_path)
+    print(f"Shape: {df_raw.shape}  |  Columns: {list(df_raw.columns)}\n")
+    engineer = LLMDataEngineer(verbose=True)
+    df_out, thresh, feat_spec = engineer.run_pipeline(df_raw)
+    print("\n--- Anomaly Thresholds (from Gemini) ---")
+    for col, t in thresh.items():
+        print(
+            f"  {col:35s}  lower={t.get('lower_bound')}  "
+            f"upper={t.get('upper_bound')}  "
+            f"z={t.get('zscore_threshold')}  "
+            f"IQR×{t.get('iqr_multiplier')}"
+        )
+        print(f"    → {t.get('rationale', '')}")
+    print("\n--- Stress Risk Score Spec (from Gemini) ---")
+    for k, v in feat_spec.items():
+        print(f"  {k}: {v}")
+    print("\n--- Engineered DataFrame Head ---")
+    eng_cols = ["time", "Air1_PAR_ref", "Air1_VPD_ref",
+                "hour_sin", "hour_cos", "doy_sin", "doy_cos", "stress_risk_score"]
+    show = [c for c in eng_cols if c in df_out.columns]
+    print(df_out[show].head(6).to_string(index=False))

src/chatbot/routing_agent.py ADDED Viewed

	@@ -0,0 +1,233 @@

+"""
+RoutingAgent: Gemini-based intelligent model routing for the agrivoltaic
+control system.  Given real-time telemetry, routes to either the FvCB
+mechanistic model or the ML ensemble for photosynthesis prediction.
+Uses gemini-2.5-flash for low-latency (~100ms) routing decisions.
+"""
+from __future__ import annotations
+from typing import Optional
+from src.genai_utils import get_genai_client, get_google_api_key
+SYSTEM_PROMPT = (
+    "You are a model routing supervisor for an agrivoltaic vineyard control system. "
+    "Given real-time telemetry, decide which photosynthesis model to use:\n"
+    "- MODEL_A (FvCB mechanistic): accurate under standard conditions (T<30C, low stress)\n"
+    "- MODEL_B (ML ensemble): handles non-linear stress, high VPD, extreme heat\n"
+    "Reply with ONLY 'MODEL_A' or 'MODEL_B'."
+)
+class RoutingAgent:
+    """Model router for FvCB vs ML ensemble selection.
+    Uses deterministic rules first (covers >90% of cases without any API call).
+    Falls back to Gemini only for ambiguous transition-zone conditions.
+    """
+    # Thresholds for rule-based routing (avoids API calls)
+    _TEMP_CLEAR_FVCB = 28.0   # clearly FvCB territory
+    _TEMP_CLEAR_ML = 32.0     # clearly ML territory
+    _VPD_CLEAR_ML = 2.5       # high VPD → ML
+    _CWSI_CLEAR_ML = 0.4      # water stress → ML
+    def __init__(
+        self,
+        model_name: str = "gemini-2.5-flash",
+        api_key: Optional[str] = None,
+    ):
+        self.model_name = model_name
+        self._api_key = api_key
+        self._client = None
+    @property
+    def api_key(self) -> str:
+        return get_google_api_key(self._api_key)
+    @property
+    def client(self):
+        """Lazy-init the Gemini client."""
+        if self._client is None:
+            self._client = get_genai_client(self._api_key)
+        return self._client
+    # ------------------------------------------------------------------
+    # Rule-based fast path (no API call)
+    # ------------------------------------------------------------------
+    @classmethod
+    def _rule_based_route(cls, telemetry: dict) -> Optional[str]:
+        """Return 'fvcb' or 'ml' if rules are decisive, else None."""
+        temp = telemetry.get("temp_c")
+        vpd = telemetry.get("vpd")
+        cwsi = telemetry.get("cwsi")
+        # High stress signals → ML (no ambiguity)
+        if temp is not None and temp >= cls._TEMP_CLEAR_ML:
+            return "ml"
+        if vpd is not None and vpd >= cls._VPD_CLEAR_ML:
+            return "ml"
+        if cwsi is not None and cwsi >= cls._CWSI_CLEAR_ML:
+            return "ml"
+        # Clearly cool/calm → FvCB
+        if temp is not None and temp < cls._TEMP_CLEAR_FVCB:
+            if vpd is None or vpd < cls._VPD_CLEAR_ML:
+                if cwsi is None or cwsi < cls._CWSI_CLEAR_ML:
+                    return "fvcb"
+        return None  # transition zone — need LLM
+    # ------------------------------------------------------------------
+    # Gemini routing (only for ambiguous cases)
+    # ------------------------------------------------------------------
+    @staticmethod
+    def _format_telemetry(telemetry: dict) -> str:
+        """Format telemetry dict into a readable prompt string."""
+        lines = ["Current telemetry:"]
+        field_labels = {
+            "temp_c": "Air temperature",
+            "ghi_w_m2": "GHI (irradiance)",
+            "cwsi": "CWSI (crop water stress)",
+            "vpd": "VPD (vapor pressure deficit)",
+            "wind_speed_ms": "Wind speed",
+            "hour": "Hour of day",
+        }
+        for key, label in field_labels.items():
+            if key in telemetry:
+                val = telemetry[key]
+                lines.append(f"  {label}: {val}")
+        return "\n".join(lines)
+    @staticmethod
+    def _parse_response(text: str) -> str:
+        """Extract model choice from Gemini response.
+        Returns 'fvcb' or 'ml'. Falls back to 'fvcb' on ambiguous response.
+        """
+        text_upper = text.strip().upper()
+        if "MODEL_B" in text_upper:
+            return "ml"
+        return "fvcb"
+    def route(self, telemetry: dict) -> str:
+        """Route a single telemetry reading to fvcb or ml.
+        Uses deterministic rules first; only calls Gemini for ambiguous cases.
+        Parameters
+        ----------
+        telemetry : dict with keys like temp_c, ghi_w_m2, cwsi, vpd,
+                    wind_speed_ms, hour
+        Returns
+        -------
+        'fvcb' or 'ml'
+        """
+        # Fast path: rule-based (no API call)
+        rule_result = self._rule_based_route(telemetry)
+        if rule_result is not None:
+            return rule_result
+        # Slow path: Gemini for transition-zone ambiguity
+        prompt = self._format_telemetry(telemetry)
+        try:
+            response = self.client.models.generate_content(
+                model=self.model_name,
+                contents=prompt,
+                config={"system_instruction": SYSTEM_PROMPT},
+            )
+            return self._parse_response(response.text)
+        except Exception as e:
+            print(f"RoutingAgent: API error ({e}), falling back to fvcb")
+            return "fvcb"
+    def route_batch(self, telemetry_rows: list[dict]) -> list[str]:
+        """Route a batch of telemetry readings.
+        Uses rule-based routing where possible; batches remaining ambiguous
+        rows into a single Gemini call.
+        """
+        results = [None] * len(telemetry_rows)
+        ambiguous_indices = []
+        # First pass: rule-based
+        for i, row in enumerate(telemetry_rows):
+            rule_result = self._rule_based_route(row)
+            if rule_result is not None:
+                results[i] = rule_result
+            else:
+                ambiguous_indices.append(i)
+        # Second pass: single batched Gemini call for ambiguous rows
+        if ambiguous_indices:
+            lines = [
+                "Route each of the following telemetry readings to MODEL_A or MODEL_B.",
+                "Reply with one line per reading: '<index>: MODEL_A' or '<index>: MODEL_B'.",
+                "",
+            ]
+            for idx in ambiguous_indices:
+                lines.append(f"Reading {idx}: {self._format_telemetry(telemetry_rows[idx])}")
+                lines.append("")
+            try:
+                response = self.client.models.generate_content(
+                    model=self.model_name,
+                    contents="\n".join(lines),
+                    config={"system_instruction": SYSTEM_PROMPT},
+                )
+                resp_text = response.text.upper()
+                for idx in ambiguous_indices:
+                    # Look for this index's answer in the response
+                    if f"{idx}: MODEL_B" in resp_text or f"{idx}:MODEL_B" in resp_text:
+                        results[idx] = "ml"
+                    else:
+                        results[idx] = "fvcb"
+            except Exception as e:
+                print(f"RoutingAgent: batch API error ({e}), falling back to fvcb")
+                for idx in ambiguous_indices:
+                    results[idx] = "fvcb"
+        return results
+# ----------------------------------------------------------------------
+# CLI entry point
+# ----------------------------------------------------------------------
+if __name__ == "__main__":
+    sample_scenarios = [
+        {
+            "name": "Cool morning",
+            "telemetry": {
+                "temp_c": 22.0, "ghi_w_m2": 350.0, "cwsi": 0.15,
+                "vpd": 0.8, "wind_speed_ms": 2.0, "hour": 8,
+            },
+        },
+        {
+            "name": "Hot afternoon, high stress",
+            "telemetry": {
+                "temp_c": 38.0, "ghi_w_m2": 950.0, "cwsi": 0.72,
+                "vpd": 3.5, "wind_speed_ms": 1.0, "hour": 14,
+            },
+        },
+        {
+            "name": "Moderate conditions",
+            "telemetry": {
+                "temp_c": 29.5, "ghi_w_m2": 680.0, "cwsi": 0.35,
+                "vpd": 1.8, "wind_speed_ms": 3.0, "hour": 11,
+            },
+        },
+    ]
+    agent = RoutingAgent()
+    print("Gemini Routing Agent — Sample Scenarios\n")
+    for scenario in sample_scenarios:
+        choice = agent.route(scenario["telemetry"])
+        model_label = "FvCB (mechanistic)" if choice == "fvcb" else "ML ensemble"
+        print(f"  {scenario['name']:30s} → {choice:4s}  ({model_label})")

src/chatbot/vineyard_chatbot.py ADDED Viewed

	@@ -0,0 +1,939 @@

+"""
+VineyardChatbot: Gemini-powered conversational advisor for the SolarWine
+agrivoltaic system.
+Provides a natural-language interface for farmers to ask about shading
+decisions, photosynthesis, weather conditions, vine biology, and energy
+generation. Uses a DataHub of loosely-coupled service providers for all
+data access — the chatbot never imports data clients directly.
+Anti-hallucination guardrails (v2):
+  - Structured responses with confidence, sources, and caveats
+  - Mandatory tool grounding for data questions
+  - Post-response rule validation
+  - Source-tagged tool results
+  - Confidence estimation based on data freshness
+"""
+from __future__ import annotations
+import json
+import re
+import traceback
+from dataclasses import dataclass, field
+from typing import Optional
+from src.data_providers import DataHub
+from src.genai_utils import extract_json_object, get_genai_client, get_google_api_key
+from src.chatbot.guardrails import (
+    classify_query,
+    estimate_confidence,
+    get_source_label,
+    tag_tool_result,
+    validate_response,
+)
+def _extract_json(text: str) -> dict:
+    """Thin wrapper around the shared genai_utils implementation."""
+    return extract_json_object(text)
+# ---------------------------------------------------------------------------
+# Data structures
+# ---------------------------------------------------------------------------
+@dataclass
+class ChatResponse:
+    """Structured response from the chatbot with grounding metadata."""
+    message: str
+    tool_calls: list[dict] = field(default_factory=list)
+    data: dict = field(default_factory=dict)
+    # --- Grounding metadata (v2) ---
+    confidence: str = "low"                  # high / medium / low / insufficient_data
+    sources: list[str] = field(default_factory=list)
+    caveats: list[str] = field(default_factory=list)
+    rule_violations: list[dict] = field(default_factory=list)
+    # --- Dual-channel advisory (v3) ---
+    response_mode: str = "info"              # "info" (factual) or "advisory" (recommendation)
+# ---------------------------------------------------------------------------
+# Biology rules lookup (shared knowledge base)
+# ---------------------------------------------------------------------------
+BIOLOGY_RULES = {
+    "site_location": (
+        "The vineyard site is in Yeruham, Israel (Seymour experimental plot). "
+        "Weather data is from IMS station 43 (Sde Boker, Negev). Timezone is always "
+        "Asia/Jerusalem (Israel Standard Time / Israel Daylight Time). All timestamps "
+        "from tools (get_current_weather, get_vine_state, etc.) are in Israel local time. "
+        "When the user asks about 'right now' or 'current' conditions, interpret the "
+        "time in the tool result as Israel local time (e.g. 15:16 = afternoon in Yeruham)."
+    ),
+    "temperature_transition": (
+        "Below 30\u00b0C, Semillon photosynthesis is RuBP-limited (light is the "
+        "bottleneck \u2014 shading HURTS). Above 30\u00b0C, it becomes Rubisco-limited "
+        "(heat is the bottleneck \u2014 shading MAY help). The transition is gradual "
+        "(28\u201332\u00b0C)."
+    ),
+    "no_shade_before_10": (
+        "Morning light is critical for carbon fixation. Never shade before "
+        "10:00 regardless of temperature."
+    ),
+    "no_shade_in_may": (
+        "May is the flowering/fruit-set period. Yield protection has priority: "
+        "avoid shading in May under normal conditions because even small losses "
+        "can reduce cluster number and berry set. Only introduce shade in May "
+        "as a last resort in extreme heat to prevent serious damage (e.g. "
+        "severe sunburn or lethal stress)."
+    ),
+    "cwsi_threshold": (
+        "Crop Water Stress Index > 0.4 indicates real water stress. Below 0.4, "
+        "the vine is coping adequately."
+    ),
+    "berry_sunburn": (
+        "Direct exposure at air temperature > 35\u00b0C risks berry sunburn, "
+        "especially on the southwest-facing side of clusters in the afternoon."
+    ),
+    "energy_budget": (
+        "Primary objective is to maximise annual PV energy. The vines have a "
+        "limited \"protection budget\": up to 5% annual energy sacrifice for "
+        "shading that clearly protects vine health or yield. Suggested monthly "
+        "caps: May=0%, Jun=15%, Jul=30%, Aug=30%, Sep=20%, Oct=5%. Stay below "
+        "these caps unless there is an exceptional agronomic reason."
+    ),
+    "model_routing": (
+        "Use FvCB (Farquhar model) for standard conditions (T < 30\u00b0C, "
+        "VPD < 2.5 kPa, adequate water). Use ML ensemble for stress conditions "
+        "(T > 30\u00b0C, high VPD, water stress, or any non-linear regime)."
+    ),
+    "phenological_multiplier": (
+        "Stress during veraison (berry ripening) is 1.5x more damaging than "
+        "during vegetative growth. Protect veraison at higher cost."
+    ),
+    "irrigation_management": (
+        "Aim to keep soil moisture in a comfortable band for Semillon: avoid "
+        "both chronic dryness and chronic saturation. During vegetative growth "
+        "allow gentle dry-down between irrigations; during flowering and "
+        "veraison, avoid strong swings. Use CWSI and VPD together: if CWSI "
+        "stays > 0.4 and VPD is high for several hours, consider an irrigation "
+        "event unless the soil is already wet."
+    ),
+    "fertiliser_management": (
+        "Prioritise balanced nutrition over aggressive fertiliser use. Apply "
+        "most nitrogen early in the season (budburst to pre-flowering), reduce "
+        "near veraison to avoid excessive vigour and delayed ripening. Use "
+        "leaf tissue tests and visual cues; avoid fertilising stressed vines "
+        "during acute heat or drought events."
+    ),
+    "photosynthesis_3d": (
+        "The 3D viewer shows the vine canopy, solar tracker panel and sun position, "
+        "with each zone coloured by photosynthesis rate (green = rate). Connect a "
+        "Google API key to use the Vineyard Advisor and generate the interactive "
+        "3D scene from the chat (e.g. \"Show me the 3D vine and photosynthesis\")."
+    ),
+    "no_leaves_no_shade_problem": (
+        "When there are no leaves (dormant season, before budburst, or canopy not "
+        "yet developed), there is no problem with shading \u2014 the vine is not "
+        "photosynthesising, so shading does not harm it. Do not frame the answer as "
+        "\"you should not shade\" as if shading would be bad; instead say that "
+        "shading is irrelevant right now (no leaves to protect), and panel position "
+        "can favour energy. In the Negev, dormancy is roughly October\u2013March; budburst "
+        "is typically March\u2013April."
+    ),
+    "no_shading_must_explain": (
+        "When recommending that the farmer should NOT shade (or that shading is not "
+        "needed), always give a specific reason tied to photosynthesis or need. "
+        "Examples: (1) No leaves / dormant \u2014 no photosynthesis to protect, so shading "
+        "is irrelevant. (2) Full sun is beneficial \u2014 vine is light-limited (T < 30\u00b0C), "
+        "so shading would reduce photosynthesis; keep panels tracking. (3) No "
+        "radiation (night or GHI = 0) \u2014 nothing to manage; no shading decision needed. "
+        "Never say only \"you should not shade\" without explaining the underlying "
+        "reason (no need for PS protection, or need for full light for PS, etc.)."
+    ),
+}
+# ---------------------------------------------------------------------------
+# System prompt
+# ---------------------------------------------------------------------------
+_SYSTEM_PROMPT_TEMPLATE = """\
+You are a friendly vineyard advisor for the SolarWine agrivoltaic system. \
+Site: Yeruham, Israel (Seymour plot, Negev). Weather: IMS station 43 (Sde Boker). \
+Timezone: Asia/Jerusalem — all tool timestamps are Israel local time; interpret \
+"now" and "current" using that timezone (e.g. 15:16 = afternoon in Yeruham). \
+You help the farmer decide when and how much to shade their Semillon grapevines \
+(VSP trellis, 1.2 m canopy) under single-axis solar trackers (1.13 m panel at \
+2.05 m height, 3.0 m row spacing).
+CONTROL OBJECTIVE:
+- Primary goal: maximise annual PV energy production.
+- Secondary goal: protect vines from heat, water stress, and sunburn using a \
+limited shading budget (see energy_budget rule).
+- When in doubt and there is no clear sign of dangerous stress, prefer \
+keeping panels in their energy-maximising position.
+CALENDAR & STAGE HANDLING:
+- Do NOT guess the current calendar month. If the user does not supply a \
+date and you do not have a phenology tool result, talk in terms of stages \
+(budburst, flowering, veraison, etc.) rather than asserting a specific month.
+COMMUNICATION STYLE:
+- Use plain language; explain jargon when you first use it
+- Be concise but thorough
+- Always explain WHY a recommendation makes sense biologically
+- When uncertain, say so and suggest what data would help
+BIOLOGICAL GUIDELINES (strong constraints; balance them with the energy objective):
+{biology_rules}
+TOOLS AVAILABLE:
+You can call tools by including a JSON block in your response with this format:
+{{"tool_call": {{"name": "<tool_name>", "args": {{<arguments>}}}}}}
+Available tools:
+WEATHER & ENVIRONMENT:
+- get_current_weather: No args. Returns latest IMS weather readings plus \
+current_time_israel, current_date_israel, current_datetime_israel (the real \
+"now" in Yeruham). Use these for "right now" answers; timestamp_local is \
+when the weather was recorded (may be stale — check age_minutes).
+- get_weather_history: Args: start_date (str YYYY-MM-DD), end_date (str \
+YYYY-MM-DD). Returns hourly IMS weather summary for a date range.
+VINE SENSORS (ThingsBoard):
+- get_vine_state: No args. Returns the latest on-site sensor readings from \
+ThingsBoard (soil moisture, leaf temperature, fruiting-zone PAR, irrigation \
+status, panel surface temps) comparing TREATMENT area (rows 501-502, under \
+panels) vs REFERENCE area (rows 503-504, open sky). Use when the user asks \
+about current vine conditions, stress levels, soil moisture, or irrigation.
+- get_sensor_history: Args: device_type (str: air/crop/soil), area (str: \
+treatment/reference/ambient), hours_back (int, default 24). Returns hourly \
+averages from ThingsBoard time-series data.
+PHOTOSYNTHESIS:
+- calc_photosynthesis: Args: PAR (float), Tleaf (float), CO2 (float), \
+VPD (float), Tair (float). Returns net assimilation A and limiting factor \
+using the mechanistic Farquhar (FvCB) model.
+- predict_photosynthesis_ml: Args: features (dict, optional). Returns ML \
+ensemble prediction of A. If features not provided, auto-fills from latest \
+IMS cache. Use when conditions are stressful (T>30C, high VPD).
+- get_ps_forecast: Args: date (str YYYY-MM-DD, optional). Returns 24-hour \
+predicted A profile (hourly) using time-series forecasting.
+SHADING & TRACKING:
+- simulate_shading: Args: angle_offset (float, degrees), hour (int 0-23), \
+date (str YYYY-MM-DD, optional). Returns A comparison shaded vs unshaded.
+- compare_tilt_angles: Args: angles (list of ints, optional). Returns A \
+and energy at different tilt offsets.
+- get_daily_schedule: Args: stress_threshold (float, optional), \
+shade_angle (int, optional). Returns hourly shading schedule.
+ENERGY:
+- get_energy_generation: No args. Returns latest energy generation data \
+from ThingsBoard (today kWh, current power W).
+- get_energy_history: Args: hours_back (int, default 24). Returns energy \
+generation time-series.
+- predict_energy: Args: date (str YYYY-MM-DD, optional). Returns predicted \
+daily energy generation (kWh) based on IMS GHI forecast and panel geometry.
+ADVISORY:
+- run_day_ahead_advisory: Args: date (str YYYY-MM-DD, optional). Returns \
+full stress advisory from the DayAheadAdvisor.
+VISUALIZATION:
+- get_photosynthesis_3d: Args: hour (int 0-23, optional), date (str YYYY-MM-DD, \
+optional). Returns a 3D interactive scene showing the vine, solar tracker, sun, \
+and which parts of the canopy are doing how much photosynthesis (green = rate). \
+Use when the user asks to see a 3D view, visualize photosynthesis, or show vine \
+and tracker together.
+BIOLOGY:
+- explain_biology_rule: Args: rule_name (str). Returns detailed explanation. \
+Valid names: {rule_names}.
+RESPONSE RULES:
+- CRITICAL: When the user asks about current conditions, specific numbers, \
+predictions, sensor readings, or any site-specific data, you MUST call a \
+tool. NEVER answer data questions from your training knowledge — always \
+use a tool to get real data.
+- When quoting numbers from tool results, cite the data source and timestamp. \
+Example: "According to IMS Station 43 (recorded 14:30), the temperature is 28°C."
+- If tool data is older than 60 minutes, warn: "Note: this data is X minutes old."
+- After receiving tool results, explain them in plain language.
+- When the answer is "no shading" or "shading not needed", always state the \
+specific reason (no leaves / dormant; light-limited so full sun helps PS; or \
+no radiation). See no_shading_must_explain and no_leaves_no_shade_problem.
+- If the user suggests something that violates a biology rule, refuse clearly \
+and explain which rule and why.
+- If a tool returns an error or some data is missing, say clearly what data \
+is unavailable. Do NOT invent or estimate values — say "I don't have current \
+data for X" and explain what you can still answer from biology rules.
+- If no API key is available, you can still answer biology questions from \
+your built-in knowledge.
+- NEVER invent sensor readings, temperatures, or measurements. If you don't \
+have data, say so.
+"""
+# ---------------------------------------------------------------------------
+# Build system prompt from BIOLOGY_RULES to avoid drift
+# ---------------------------------------------------------------------------
+def _build_system_prompt() -> str:
+    """Build the system prompt, embedding biology rules from the shared dict."""
+    rules_text = "\n\n".join(
+        f"{i}. {name.upper().replace('_', ' ')}: {text}"
+        for i, (name, text) in enumerate(BIOLOGY_RULES.items(), 1)
+    )
+    rule_names = ", ".join(BIOLOGY_RULES.keys())
+    return _SYSTEM_PROMPT_TEMPLATE.format(
+        biology_rules=rules_text, rule_names=rule_names,
+    )
+CHATBOT_SYSTEM_PROMPT = _build_system_prompt()
+# RAG-style rule retrieval: keyword index for selecting relevant rules per query
+_RULE_KEYWORDS = {
+    "site_location": ["yeruham", "location", "timezone", "israel", "sde boker", "negev",
+                       "where", "site", "local time"],
+    "temperature_transition": ["temperature", "30", "rubp", "rubisco", "transition",
+                                "heat", "hot", "cold", "cool", "warm"],
+    "no_shade_before_10": ["morning", "before 10", "early", "sunrise", "dawn"],
+    "no_shade_in_may": ["may", "flowering", "fruit set", "spring"],
+    "cwsi_threshold": ["cwsi", "water stress", "crop water", "drought"],
+    "berry_sunburn": ["sunburn", "berry", "35", "cluster", "grape"],
+    "energy_budget": ["budget", "energy", "sacrifice", "ceiling", "5%", "kwh",
+                       "solar", "power", "generation"],
+    "model_routing": ["model", "fvcb", "farquhar", "ml", "routing", "predict"],
+    "phenological_multiplier": ["veraison", "ripening", "phenol", "stage"],
+    "irrigation_management": ["irrigation", "water", "soil", "moisture", "irrigate"],
+    "fertiliser_management": ["fertiliser", "fertilizer", "nitrogen", "nutrient"],
+    "photosynthesis_3d": ["3d", "visual", "scene", "show"],
+    "no_leaves_no_shade_problem": ["no leaves", "dormant", "budburst", "winter"],
+    "no_shading_must_explain": ["should not shade", "no shading", "don't shade",
+                                 "why not shade"],
+}
+# Rules that are always included (core constraints)
+_PINNED_RULES = {"no_shade_before_10", "energy_budget", "temperature_transition"}
+def retrieve_relevant_rules(query: str, max_rules: int = 5) -> list[str]:
+    """Retrieve the most relevant biology rules for a query.
+    Returns up to ``max_rules`` rule names, always including pinned rules.
+    Uses keyword matching (no vector DB needed for 13 rules).
+    """
+    query_lower = query.lower()
+    scores: dict[str, int] = {}
+    for rule_name, keywords in _RULE_KEYWORDS.items():
+        score = sum(1 for kw in keywords if kw in query_lower)
+        if score > 0:
+            scores[rule_name] = score
+    # Always include pinned rules
+    selected = set(_PINNED_RULES)
+    # Add scored rules sorted by relevance
+    for name, _ in sorted(scores.items(), key=lambda x: -x[1]):
+        if len(selected) >= max_rules:
+            break
+        selected.add(name)
+    # If we still have room, add remaining pinned rules
+    return [r for r in BIOLOGY_RULES if r in selected]
+_ADVISORY_PATTERNS = [re.compile(p, re.IGNORECASE) for p in [
+    r"\bshould i\b", r"\bwhat should\b", r"\brecommend\b", r"\badvice\b",
+    r"\bwhat do i\b", r"\baction\b", r"\bwhat to do\b", r"\bshade now\b",
+    r"\birrigate\b", r"\bprepare\b", r"\bneed to\b", r"\bhow much\b",
+    r"\bwhen should\b", r"\bcan i\b",
+]]
+def classify_response_mode(query: str) -> str:
+    """Classify whether a query needs factual info or actionable advisory.
+    Returns 'info' or 'advisory'.
+    """
+    for pat in _ADVISORY_PATTERNS:
+        if pat.search(query):
+            return "advisory"
+    return "info"
+def build_contextual_prompt(query: str) -> str:
+    """Build a system prompt with only relevant biology rules for this query."""
+    relevant = retrieve_relevant_rules(query)
+    rules_text = "\n\n".join(
+        f"{i}. {name.upper().replace('_', ' ')}: {BIOLOGY_RULES[name]}"
+        for i, name in enumerate(relevant, 1)
+    )
+    rule_names = ", ".join(BIOLOGY_RULES.keys())
+    return _SYSTEM_PROMPT_TEMPLATE.format(
+        biology_rules=rules_text, rule_names=rule_names,
+    )
+# ---------------------------------------------------------------------------
+# Main class
+# ---------------------------------------------------------------------------
+class VineyardChatbot:
+    """
+    Gemini-powered conversational vineyard advisor.
+    All data access is delegated to a DataHub of loosely-coupled services.
+    The chatbot itself only handles:
+      - Gemini communication (two-pass tool-calling flow)
+      - Tool dispatch (thin delegation to hub services)
+      - Guardrails (query classification, response validation, confidence)
+      - Offline fallback (keyword-match to biology rules)
+    Usage
+    -----
+    bot = VineyardChatbot()                    # default hub
+    bot = VineyardChatbot(hub=custom_hub)      # injected hub
+    response = bot.chat("Should I shade right now?", history=[])
+    """
+    # Maximum retries when LLM fails to call a required tool
+    _MAX_TOOL_RETRIES = 1
+    def __init__(
+        self,
+        hub: Optional[DataHub] = None,
+        model_name: str = "gemini-2.5-flash",
+        api_key: Optional[str] = None,
+        verbose: bool = False,
+    ):
+        self.hub = hub or DataHub.default(verbose=verbose)
+        self.model_name = model_name
+        self._api_key = api_key
+        self._client = None
+        self.verbose = verbose
+    # ------------------------------------------------------------------
+    # Gemini client (lazy)
+    # ------------------------------------------------------------------
+    @property
+    def api_key(self) -> str:
+        return get_google_api_key(self._api_key)
+    @property
+    def client(self):
+        if self._client is None:
+            self._client = get_genai_client(self._api_key)
+        return self._client
+    @property
+    def has_api_key(self) -> bool:
+        try:
+            get_google_api_key(self._api_key)
+            return True
+        except (ValueError, Exception):
+            return False
+    def _log(self, msg: str) -> None:
+        if self.verbose:
+            print(f"[VineyardChatbot] {msg}")
+    # ------------------------------------------------------------------
+    # Tool dispatch — thin delegation to hub services
+    # ------------------------------------------------------------------
+    def _dispatch_tool(self, tool_name: str, args: dict) -> dict:
+        """Route a tool call to the correct hub service method."""
+        self._log(f"Dispatching tool: {tool_name}({args})")
+        # --- Weather ---
+        if tool_name == "get_current_weather":
+            return self.hub.weather.get_current()
+        elif tool_name == "get_weather_history":
+            return self.hub.weather.get_history(
+                start_date=str(args.get("start_date", "")),
+                end_date=str(args.get("end_date", "")),
+            )
+        # --- Vine sensors ---
+        elif tool_name == "get_vine_state":
+            return self.hub.vine_sensors.get_snapshot()
+        elif tool_name == "get_sensor_history":
+            return self.hub.vine_sensors.get_history(
+                device_type=str(args.get("device_type", "crop")),
+                area=str(args.get("area", "treatment")),
+                hours_back=int(args.get("hours_back", 24)),
+            )
+        # --- Photosynthesis ---
+        elif tool_name == "calc_photosynthesis":
+            return self.hub.photosynthesis.predict_fvcb(
+                PAR=float(args.get("PAR", 1500)),
+                Tleaf=float(args.get("Tleaf", 30)),
+                CO2=float(args.get("CO2", 400)),
+                VPD=float(args.get("VPD", 2.0)),
+                Tair=float(args.get("Tair", 30)),
+            )
+        elif tool_name == "predict_photosynthesis_ml":
+            return self.hub.photosynthesis.predict_ml(
+                features=args.get("features"),
+            )
+        elif tool_name == "get_ps_forecast":
+            return self.hub.photosynthesis.forecast_day_ahead(
+                target_date=args.get("date"),
+            )
+        # --- Shading / tracking ---
+        elif tool_name == "simulate_shading":
+            return self.hub.photosynthesis.simulate_shading(
+                angle_offset=float(args.get("angle_offset", 20)),
+                hour=int(args.get("hour", 13)),
+                date_str=args.get("date"),
+            )
+        elif tool_name == "compare_tilt_angles":
+            angles = args.get("angles")
+            if angles and isinstance(angles, list):
+                angles = [int(a) for a in angles]
+            return self.hub.photosynthesis.compare_angles(angles=angles)
+        elif tool_name == "get_daily_schedule":
+            return self.hub.photosynthesis.daily_schedule(
+                stress_threshold=float(args.get("stress_threshold", 2.0)),
+                shade_angle=int(args.get("shade_angle", 20)),
+            )
+        # --- Energy ---
+        elif tool_name == "get_energy_generation":
+            return self.hub.energy.get_current()
+        elif tool_name == "get_energy_history":
+            return self.hub.energy.get_history(
+                hours_back=int(args.get("hours_back", 24)),
+            )
+        elif tool_name == "predict_energy":
+            return self.hub.energy.predict(
+                target_date=args.get("date"),
+            )
+        # --- Advisory ---
+        elif tool_name == "run_day_ahead_advisory":
+            return self.hub.advisory.run_advisory(
+                target_date=args.get("date"),
+            )
+        # --- Biology ---
+        elif tool_name == "explain_biology_rule":
+            return self.hub.biology.explain_rule(
+                rule_name=str(args.get("rule_name", "")),
+            )
+        elif tool_name == "get_photosynthesis_3d":
+            hour = args.get("hour")
+            if hour is not None:
+                hour = int(hour)
+            return self.hub.photosynthesis.get_photosynthesis_3d_scene(
+                hour=hour,
+                date_str=args.get("date"),
+            )
+        else:
+            return {"error": f"Unknown tool: {tool_name}"}
+    # ------------------------------------------------------------------
+    # Gemini communication
+    # ------------------------------------------------------------------
+    # Number of recent message pairs to keep verbatim
+    _RECENT_MESSAGES = 6
+    # Max older messages to summarize
+    _MAX_SUMMARY_MESSAGES = 20
+    def _build_messages(self, user_message: str, history: list[dict]) -> list[dict]:
+        """Build Gemini multi-turn message list with sliding context window.
+        Strategy:
+          - Keep the most recent 6 messages verbatim (for conversational flow)
+          - Summarize older messages into a single context message
+          - Always include pinned context (current date, season)
+        """
+        messages = []
+        n = len(history)
+        if n > self._RECENT_MESSAGES:
+            # Summarize older messages
+            older = history[:n - self._RECENT_MESSAGES]
+            # Take at most _MAX_SUMMARY_MESSAGES from the older portion
+            older = older[-self._MAX_SUMMARY_MESSAGES:]
+            summary = self._summarize_history(older)
+            if summary:
+                messages.append({
+                    "role": "user",
+                    "parts": [{"text": f"[Conversation context: {summary}]"}],
+                })
+                messages.append({
+                    "role": "model",
+                    "parts": [{"text": "Understood, I'll keep that context in mind."}],
+                })
+        # Recent messages verbatim
+        recent = history[-self._RECENT_MESSAGES:] if n > self._RECENT_MESSAGES else history
+        for entry in recent:
+            role = entry.get("role", "user")
+            content = entry.get("content", "")
+            if role == "user":
+                messages.append({"role": "user", "parts": [{"text": content}]})
+            elif role == "assistant":
+                messages.append({"role": "model", "parts": [{"text": content}]})
+        messages.append({"role": "user", "parts": [{"text": user_message}]})
+        return messages
+    @staticmethod
+    def _summarize_history(messages: list[dict]) -> str:
+        """Create a brief summary of older conversation messages."""
+        topics = []
+        for entry in messages:
+            content = entry.get("content", "")
+            role = entry.get("role", "user")
+            if role == "user" and content:
+                # Extract the core question/topic (first sentence or 100 chars)
+                first_line = content.split("\n")[0][:100]
+                topics.append(first_line)
+        if not topics:
+            return ""
+        # Deduplicate and keep last 5 topics
+        seen = set()
+        unique = []
+        for t in reversed(topics):
+            t_lower = t.lower().strip()
+            if t_lower not in seen:
+                seen.add(t_lower)
+                unique.append(t)
+        unique.reverse()
+        return "Earlier in this conversation, the user asked about: " + "; ".join(unique[-5:])
+    def _call_gemini(self, messages: list[dict], system_prompt: str | None = None) -> str:
+        """Send messages to Gemini and return raw text response."""
+        prompt = system_prompt or CHATBOT_SYSTEM_PROMPT
+        response = self.client.models.generate_content(
+            model=self.model_name,
+            contents=messages,
+            config={"system_instruction": prompt},
+        )
+        return response.text
+    def _extract_tool_call(self, text: str) -> Optional[dict]:
+        """Try to extract a tool_call JSON from the model response."""
+        try:
+            match = re.search(r'\{\s*"tool_call"\s*:', text)
+            if not match:
+                return None
+            start = match.start()
+            brace_count = 0
+            for i in range(start, len(text)):
+                if text[i] == "{":
+                    brace_count += 1
+                elif text[i] == "}":
+                    brace_count -= 1
+                    if brace_count == 0:
+                        snippet = text[start:i + 1]
+                        parsed = json.loads(snippet)
+                        return parsed.get("tool_call")
+            return None
+        except (json.JSONDecodeError, ValueError):
+            return None
+    # ------------------------------------------------------------------
+    # Context gathering (for rule validation)
+    # ------------------------------------------------------------------
+    def _get_validation_context(self) -> dict:
+        """Gather current context for post-response rule validation."""
+        ctx = {}
+        try:
+            from src.phenology import estimate_stage_for_date
+            from datetime import date, datetime
+            import zoneinfo
+            tz = zoneinfo.ZoneInfo("Asia/Jerusalem")
+            now = datetime.now(tz=tz)
+            ctx["hour"] = now.hour
+            ctx["month"] = now.month
+            stage = estimate_stage_for_date(date.today())
+            ctx["stage_id"] = stage.id
+            # Try to get current temperature from cached weather
+            try:
+                wx = self.hub.weather.get_current()
+                if "error" not in wx:
+                    t = wx.get("air_temperature_c")
+                    if t is not None:
+                        ctx["temp_c"] = float(t)
+            except Exception:
+                pass
+        except Exception:
+            pass
+        return ctx
+    # ------------------------------------------------------------------
+    # Main chat method
+    # ------------------------------------------------------------------
+    def chat(self, user_message: str, history: list[dict] | None = None) -> ChatResponse:
+        """
+        Process a user message and return a structured response.
+        Flow:
+        1. Classify query (data vs knowledge vs greeting)
+        2. Send to Gemini (Pass 1)
+        3. If data query and no tool call → re-prompt to force tool use
+        4. If tool call → dispatch → tag result → send back (Pass 2)
+        5. Validate response against biology rules
+        6. Estimate confidence
+        7. Return structured ChatResponse
+        """
+        history = history or []
+        if not self.has_api_key:
+            _, response = self._fallback_response(user_message)
+            return response
+        try:
+            # Step 1: Classify query
+            query_class = classify_query(user_message)
+            self._log(f"Query classified: {query_class.category} "
+                      f"(requires_data={query_class.requires_data})")
+            # Build contextual system prompt with only relevant biology rules
+            contextual_prompt = build_contextual_prompt(user_message)
+            messages = self._build_messages(user_message, history)
+            self._log("Pass 1: calling Gemini...")
+            response_text = self._call_gemini(messages, system_prompt=contextual_prompt)
+            self._log(f"Pass 1 response: {response_text[:200]}...")
+            tool_call = self._extract_tool_call(response_text)
+            # Step 2: Force tool use if query requires data but LLM didn't call one
+            if query_class.requires_data and not tool_call:
+                self._log("Data query but no tool call — re-prompting...")
+                retry_prompt = (
+                    "The user is asking about site-specific data or current conditions. "
+                    "You MUST call a tool to answer this — do not use your training "
+                    "knowledge for real-time data. Please call the appropriate tool now."
+                )
+                messages.append({"role": "model", "parts": [{"text": response_text}]})
+                messages.append({"role": "user", "parts": [{"text": retry_prompt}]})
+                response_text = self._call_gemini(messages, system_prompt=contextual_prompt)
+                tool_call = self._extract_tool_call(response_text)
+            # Step 3: Process tool call if present
+            tool_name = None
+            tool_result = None
+            tool_succeeded = False
+            data_age = None
+            if tool_call:
+                tool_name = tool_call.get("name", "")
+                tool_args = tool_call.get("args", {})
+                self._log(f"Tool call detected: {tool_name}")
+                try:
+                    tool_result = self._dispatch_tool(tool_name, tool_args)
+                    tool_succeeded = "error" not in tool_result
+                except Exception as exc:
+                    tool_result = {"error": f"Tool execution failed: {exc}"}
+                    tool_succeeded = False
+                # Tag result with source metadata
+                tagged_result = tag_tool_result(tool_name, tool_result)
+                data_age = tagged_result.get("_data_age_minutes")
+                # Build Pass 2 prompt with source citation instructions
+                source_label = get_source_label(tool_name)
+                freshness_note = ""
+                if data_age is not None and data_age > 60:
+                    freshness_note = (
+                        f"\n\nIMPORTANT: This data is {data_age:.0f} minutes old. "
+                        "Tell the user the data may be stale and conditions may have changed."
+                    )
+                tool_result_text = (
+                    f"Tool result for {tool_name} "
+                    f"(source: {source_label}):\n"
+                    f"```json\n{json.dumps(tagged_result, indent=2, default=str)}\n```\n\n"
+                    f"Explain this result to the farmer in plain language. "
+                    f"When quoting numbers, mention that they come from {source_label}."
+                    f"{freshness_note}"
+                )
+                messages.append({"role": "model", "parts": [{"text": response_text}]})
+                messages.append({"role": "user", "parts": [{"text": tool_result_text}]})
+                self._log("Pass 2: calling Gemini with tool result...")
+                final_response = self._call_gemini(messages)
+                self._log(f"Pass 2 response: {final_response[:200]}...")
+            else:
+                final_response = response_text
+            # Step 4: Estimate confidence
+            confidence = estimate_confidence(
+                tool_called=tool_call is not None,
+                tool_succeeded=tool_succeeded,
+                data_age_minutes=data_age,
+                tool_name=tool_name,
+            )
+            # Step 5: Post-response rule validation
+            validation_ctx = self._get_validation_context()
+            violations = validate_response(
+                response_text=final_response,
+                context=validation_ctx,
+            )
+            caveats: list[str] = []
+            violation_dicts: list[dict] = []
+            for v in violations:
+                violation_dicts.append({
+                    "rule": v.rule_name,
+                    "severity": v.severity,
+                    "message": v.message,
+                })
+                if v.severity == "block":
+                    # Override the response with the correction
+                    final_response = (
+                        f"{v.correction}\n\n"
+                        f"*(Original response was overridden because it violated "
+                        f"the **{v.rule_name.replace('_', ' ')}** rule.)*"
+                    )
+                    confidence = "high"  # rule-based override is deterministic
+                    self._log(f"BLOCKED: {v.rule_name} — {v.message}")
+                elif v.severity == "warn":
+                    caveats.append(v.correction)
+                    self._log(f"WARNING: {v.rule_name} — {v.message}")
+            # Build data freshness caveat
+            if data_age is not None and data_age > 60:
+                caveats.append(
+                    f"Data is {data_age:.0f} minutes old — conditions may have changed."
+                )
+            # Build sources list
+            sources: list[str] = []
+            if tool_name:
+                sources.append(get_source_label(tool_name))
+            if not tool_call and query_class.category == "knowledge":
+                sources.append("Built-in biology rules")
+            response_mode = classify_response_mode(user_message)
+            return ChatResponse(
+                message=final_response,
+                tool_calls=[{"name": tool_name, "args": tool_call.get("args", {}),
+                             "result": tool_result}] if tool_call else [],
+                data=tool_result if tool_result else {},
+                confidence=confidence,
+                sources=sources,
+                caveats=caveats,
+                rule_violations=violation_dicts,
+                response_mode=response_mode,
+            )
+        except Exception as exc:
+            self._log(f"Chat error: {exc}\n{traceback.format_exc()}")
+            matched, fallback = self._fallback_response(user_message)
+            if matched:
+                return fallback
+            return ChatResponse(
+                message=(
+                    "I'm having trouble connecting to the AI service right now. "
+                    "You can still ask me about vine biology rules \u2014 I have those "
+                    "built in. For data queries, please check that your Google API "
+                    "key is configured."
+                ),
+                confidence="insufficient_data",
+                sources=[],
+                caveats=["AI service connection failed"],
+            )
+    # ------------------------------------------------------------------
+    # Fallback (no API key / offline)
+    # ------------------------------------------------------------------
+    def _fallback_response(self, user_message: str) -> tuple[bool, ChatResponse]:
+        """Keyword-match fallback when Gemini is unavailable."""
+        msg_lower = user_message.lower()
+        rule_matches = {
+            "site_location": ["yeruham", "location", "timezone", "right now", "current time",
+                              "what time", "israel time", "local time"],
+            "temperature_transition": ["temperature", "30 degree", "30\u00b0", "rubp", "rubisco",
+                                        "transition", "heat", "hot"],
+            "no_shade_before_10": ["morning", "before 10", "early", "sunrise"],
+            "no_shade_in_may": ["may", "flowering", "fruit set", "fruit-set"],
+            "cwsi_threshold": ["cwsi", "water stress", "crop water"],
+            "berry_sunburn": ["sunburn", "berry", "35\u00b0", "35 degree"],
+            "energy_budget": ["budget", "energy", "sacrifice", "ceiling", "5%",
+                              "monthly", "generation", "kwh", "power", "solar"],
+            "model_routing": ["model", "fvcb", "farquhar", "ml", "routing",
+                              "predict", "forecast"],
+            "phenological_multiplier": ["veraison", "ripening", "phenolog"],
+            "irrigation_management": ["irrigation", "water", "soil moisture"],
+            "fertiliser_management": ["fertiliser", "fertilizer", "nitrogen", "nutrient"],
+            "photosynthesis_3d": ["3d", "3D", "visual", "visualize", "visualise",
+                                   "model show", "vine and tracker", "sun and vine"],
+            "no_leaves_no_shade_problem": ["no leaves", "dormant", "budburst", "no canopy"],
+            "no_shading_must_explain": ["should not shade", "don't shade", "no shading"],
+        }
+        matched_rules = []
+        for rule_name, keywords in rule_matches.items():
+            if any(kw in msg_lower for kw in keywords):
+                matched_rules.append(rule_name)
+        if matched_rules:
+            parts = ["Here's what I know about that (from built-in biology rules):\n"]
+            for rule in matched_rules:
+                parts.append(f"**{rule.replace('_', ' ').title()}:** {BIOLOGY_RULES[rule]}\n")
+            parts.append(
+                "\n*Note: I'm running without an AI connection, so I can only "
+                "answer from built-in biology rules. Connect a Google API key "
+                "for full advisory capabilities.*"
+            )
+            return True, ChatResponse(
+                message="\n".join(parts),
+                confidence="medium",
+                sources=["Built-in biology rules"],
+            )
+        return False, ChatResponse(
+            message=(
+                "I'm currently running without an AI connection (no Google API key). "
+                "I can answer questions about vine biology rules \u2014 try asking about:\n\n"
+                "- Temperature and shading thresholds\n"
+                "- Morning light rules\n"
+                "- May shading restrictions\n"
+                "- Water stress (CWSI)\n"
+                "- Berry sunburn risk\n"
+                "- Energy budget limits\n"
+                "- Model routing (FvCB vs ML)\n"
+                "- Veraison protection\n"
+                "- Irrigation management\n"
+                "- Energy generation and prediction\n\n"
+                "*Connect a Google API key for full advisory capabilities "
+                "(weather, photosynthesis calculations, shading simulations, "
+                "energy analysis).*"
+            ),
+            confidence="insufficient_data",
+            sources=[],
+        )

src/chronos_forecaster.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """Backward-compatible re-export from src.forecasting.chronos_forecaster."""
2	+ from src.forecasting.chronos_forecaster import * # noqa: F401, F403

src/command_arbiter.py ADDED Viewed

	@@ -0,0 +1,327 @@

+"""
+CommandArbiter: priority stack, hysteresis, and fallback logic for tracker commands.
+Sits between the TradeoffEngine output and the physical tracker actuator.
+Ensures:
+  1. Weather protection and harvest mode override everything.
+  2. Safety rail alerts and simulation timeouts fall back to θ_astro.
+  3. Hysteresis prevents sub-slot jitter (motor protection).
+  4. All fallbacks default to full astronomical tracking (zero energy cost).
+Priority Stack (highest to lowest):
+  P1  Weather Protection  → stow angle (flat, 0°)
+  P2  Mechanical Harvest  → vertical park (90°)
+  P3  Safety Rail Alert   → θ_astro
+  P4  Simulation Timeout  → θ_astro
+  P5  TradeoffEngine      → θ_astro or θ_astro + offset
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import Enum
+from typing import Optional
+import pandas as pd
+from config.settings import (
+    ANGLE_TOLERANCE_DEG,
+    HYSTERESIS_WINDOW_MIN,
+    SIMULATION_TIMEOUT_SEC,
+    WIND_STOW_SPEED_MS,
+)
+class CommandSource(str, Enum):
+    """Priority source identifiers for tracker commands."""
+    WEATHER = "weather_protection"
+    HARVEST = "harvest_mode"
+    SAFETY = "safety_fallback"
+    TIMEOUT = "timeout_fallback"
+    ENGINE = "engine"
+    HYSTERESIS = "hysteresis"
+    INITIAL = "initial"
+    STABLE = "stable"
+@dataclass
+class ArbiterDecision:
+    """Output of the CommandArbiter."""
+    angle: float                    # final tracker tilt angle (degrees)
+    dispatch: bool                  # True = send command to actuator
+    source: str                     # which priority level decided
+    requested_angle: float = 0.0    # what was originally requested
+    suppressed_reason: Optional[str] = None  # why dispatch=False (if suppressed)
+    def decision_tags(self) -> list[str]:
+        tags = [f"source:{self.source}"]
+        if not self.dispatch and self.suppressed_reason:
+            tags.append(f"suppressed:{self.suppressed_reason}")
+        return tags
+class CommandArbiter:
+    """Priority stack + hysteresis for tracker tilt commands.
+    Parameters
+    ----------
+    hysteresis_window_min : float
+        Minimum time (minutes) between consecutive tilt changes.
+    angle_tolerance_deg : float
+        Changes smaller than this are suppressed (motor protection).
+    """
+    def __init__(
+        self,
+        hysteresis_window_min: float = HYSTERESIS_WINDOW_MIN,
+        angle_tolerance_deg: float = ANGLE_TOLERANCE_DEG,
+    ):
+        self.window_min = hysteresis_window_min
+        self.tolerance = angle_tolerance_deg
+        self._buffer: list[tuple[datetime, float]] = []
+        self.current_angle: float = 0.0
+        self._last_dispatch_time: Optional[datetime] = None
+    # ------------------------------------------------------------------
+    # Priority selection
+    # ------------------------------------------------------------------
+    def select_source(
+        self,
+        engine_result: dict,
+        safety_valid: bool = True,
+        sim_time_sec: float = 0.0,
+        weather_override: Optional[dict] = None,
+        harvest_active: bool = False,
+        theta_astro: float = 0.0,
+    ) -> dict:
+        """Select the highest-priority command source.
+        Parameters
+        ----------
+        engine_result : dict
+            Output from TradeoffEngine.evaluate_slot() or find_minimum_dose().
+            Must contain 'angle' key (or 'chosen_offset_deg' for DoseResult).
+        safety_valid : bool
+            False if SafetyRails detected FvCB/ML divergence.
+        sim_time_sec : float
+            Wall-clock time the simulation took (seconds).
+        weather_override : dict or None
+            If not None, must contain 'target_angle' and optionally 'reason'.
+        harvest_active : bool
+            True if mechanical harvesting is in progress.
+        theta_astro : float
+            Astronomical tracking angle (safe default).
+        Returns
+        -------
+        dict with 'angle', 'source', 'reason'
+        """
+        # P1: Weather protection (wind stow, hail, etc.)
+        if weather_override is not None:
+            return {
+                "angle": weather_override.get("target_angle", 0.0),
+                "source": CommandSource.WEATHER,
+                "reason": weather_override.get("reason", "weather override active"),
+            }
+        # P2: Mechanical harvesting — panels go vertical for clearance
+        if harvest_active:
+            return {
+                "angle": 90.0,
+                "source": CommandSource.HARVEST,
+                "reason": "mechanical harvesting in progress",
+            }
+        # P3: Safety rail alert — FvCB/ML divergence too high
+        if not safety_valid:
+            return {
+                "angle": theta_astro,
+                "source": CommandSource.SAFETY,
+                "reason": "FvCB/ML divergence exceeded threshold; reverting to astronomical",
+            }
+        # P4: Simulation timeout — shadow model took too long
+        if sim_time_sec > SIMULATION_TIMEOUT_SEC:
+            return {
+                "angle": theta_astro,
+                "source": CommandSource.TIMEOUT,
+                "reason": f"simulation took {sim_time_sec:.1f}s > {SIMULATION_TIMEOUT_SEC}s limit",
+            }
+        # P5: Normal — use TradeoffEngine result
+        angle = engine_result.get("angle", theta_astro)
+        return {
+            "angle": angle,
+            "source": CommandSource.ENGINE,
+            "reason": engine_result.get("action", "tradeoff_engine"),
+        }
+    # ------------------------------------------------------------------
+    # Hysteresis filter
+    # ------------------------------------------------------------------
+    def should_move(
+        self,
+        requested_angle: float,
+        timestamp: datetime,
+    ) -> ArbiterDecision:
+        """Apply hysteresis filter to a requested angle change.
+        Motor protection logic:
+        - Suppresses changes smaller than angle_tolerance_deg.
+        - Requires the requested angle to be stable for hysteresis_window_min
+          before dispatching.
+        - Immediate dispatch if this is the first command or if the change
+          is large (e.g., weather stow).
+        """
+        # Record request in buffer
+        self._buffer.append((timestamp, requested_angle))
+        # Trim buffer to window
+        cutoff = timestamp - pd.Timedelta(minutes=self.window_min)
+        self._buffer = [(t, a) for t, a in self._buffer if t >= cutoff]
+        # Change smaller than tolerance → suppress
+        angle_diff = abs(requested_angle - self.current_angle)
+        if angle_diff <= self.tolerance:
+            return ArbiterDecision(
+                angle=self.current_angle,
+                dispatch=False,
+                source=CommandSource.HYSTERESIS,
+                requested_angle=requested_angle,
+                suppressed_reason=f"change {angle_diff:.1f}° ≤ tolerance {self.tolerance}°",
+            )
+        # First command or only one entry in buffer → dispatch immediately
+        if len(self._buffer) < 2 or self._last_dispatch_time is None:
+            self.current_angle = requested_angle
+            self._last_dispatch_time = timestamp
+            return ArbiterDecision(
+                angle=requested_angle,
+                dispatch=True,
+                source=CommandSource.INITIAL,
+                requested_angle=requested_angle,
+            )
+        # Check stability: all recent entries must agree within tolerance
+        stable = all(
+            abs(a - requested_angle) <= self.tolerance
+            for _, a in self._buffer
+        )
+        if stable:
+            self.current_angle = requested_angle
+            self._last_dispatch_time = timestamp
+            return ArbiterDecision(
+                angle=requested_angle,
+                dispatch=True,
+                source=CommandSource.STABLE,
+                requested_angle=requested_angle,
+            )
+        return ArbiterDecision(
+            angle=self.current_angle,
+            dispatch=False,
+            source=CommandSource.HYSTERESIS,
+            requested_angle=requested_angle,
+            suppressed_reason="angle not stable within hysteresis window",
+        )
+    # ------------------------------------------------------------------
+    # Combined: select + filter
+    # ------------------------------------------------------------------
+    def arbitrate(
+        self,
+        timestamp: datetime,
+        engine_result: dict,
+        theta_astro: float,
+        safety_valid: bool = True,
+        sim_time_sec: float = 0.0,
+        weather_override: Optional[dict] = None,
+        harvest_active: bool = False,
+    ) -> ArbiterDecision:
+        """Full arbitration: priority selection → hysteresis filter.
+        This is the main entry point for the 15-min control loop.
+        """
+        selected = self.select_source(
+            engine_result=engine_result,
+            safety_valid=safety_valid,
+            sim_time_sec=sim_time_sec,
+            weather_override=weather_override,
+            harvest_active=harvest_active,
+            theta_astro=theta_astro,
+        )
+        # Weather and harvest overrides bypass hysteresis (safety-critical)
+        if selected["source"] in {CommandSource.WEATHER, CommandSource.HARVEST}:
+            self.current_angle = selected["angle"]
+            self._last_dispatch_time = timestamp
+            self._buffer.clear()
+            return ArbiterDecision(
+                angle=selected["angle"],
+                dispatch=True,
+                source=selected["source"],
+                requested_angle=selected["angle"],
+            )
+        # Normal path: apply hysteresis
+        decision = self.should_move(selected["angle"], timestamp)
+        # Override source with the priority level that selected the angle
+        if decision.dispatch:
+            decision.source = selected["source"]
+        return decision
+    # ------------------------------------------------------------------
+    # Wind stow helper (delegates to operational_modes)
+    # ------------------------------------------------------------------
+    @staticmethod
+    def check_wind_stow(
+        wind_speed_ms: float,
+        stow_threshold: float = WIND_STOW_SPEED_MS,
+    ) -> Optional[dict]:
+        """Return a weather override dict if wind speed exceeds stow threshold.
+        Note: ControlLoop uses OperationalModeChecker instead of this method.
+        Kept for backward compatibility with direct arbiter usage.
+        """
+        from src.operational_modes import check_wind_stow as _check
+        result = _check(wind_speed_ms, stow_threshold)
+        return result.to_weather_override()
+class AstronomicalTracker:
+    """Pure sun-following. The always-safe default.
+    Wraps ShadowModel to provide a simple get_angle(timestamp) interface.
+    """
+    def __init__(self, shadow_model=None):
+        self._shadow_model = shadow_model
+    @property
+    def shadow_model(self):
+        if self._shadow_model is None:
+            from src.shading.solar_geometry import ShadowModel
+            self._shadow_model = ShadowModel()
+        return self._shadow_model
+    def get_angle(self, timestamp: datetime) -> float:
+        """Return the astronomical tracking angle for a given timestamp."""
+        ts = pd.Timestamp(timestamp)
+        if ts.tzinfo is None:
+            ts = ts.tz_localize("UTC")
+        sp = self.shadow_model.get_solar_position(
+            pd.DatetimeIndex([ts])
+        )
+        elev = float(sp["solar_elevation"].iloc[0])
+        if elev <= 0:
+            return 0.0
+        azim = float(sp["solar_azimuth"].iloc[0])
+        result = self.shadow_model.compute_tracker_tilt(azim, elev)
+        return float(result["tracker_theta"])

src/control_loop.py ADDED Viewed

	@@ -0,0 +1,779 @@

+"""
+ControlLoop: the 15-minute agrivoltaic control cycle.
+Each tick:
+  1. Fetch live sensor data (IMS weather + TB vine sensors)
+  2. Load/validate the day-ahead plan for today
+  3. Look up the planned offset for the current slot
+  4. Run live gate check (may override plan if conditions diverged)
+  5. Check energy budget (block intervention if budget exhausted)
+  6. Run CommandArbiter (priority stack + hysteresis)
+  7. Resolve per-tracker fleet overrides (rare; default = all same angle)
+  8. Dispatch angle to trackers via TrackerDispatcher
+  9. Spend energy budget for the slot
+ 10. Check plan divergence and trigger re-plan if needed
+ 11. Log the result
+The loop can run as:
+  - **one-shot**: ``loop.tick()`` — execute one cycle (called externally)
+  - **continuous**: ``loop.run()`` — blocking loop with 15-min sleep
+  - **plan-only**: ``loop.tick(dry_run=True)`` — compute decisions without sending
+"""
+from __future__ import annotations
+import json
+import logging
+import time
+from dataclasses import dataclass, field
+from datetime import date, datetime, timedelta, timezone
+from pathlib import Path
+from typing import Dict, List, Optional
+import pandas as pd
+from config.settings import (
+    ANGLE_TOLERANCE_DEG,
+    DAILY_PLAN_PATH,
+    DP_SLOT_DURATION_MIN,
+    PLAN_DIVERGENCE_THRESHOLD_KWH,
+    PLAN_DIVERGENCE_THRESHOLD_SLOTS,
+    PLAN_REPLAN_COOLDOWN_SLOTS,
+    SIMULATION_LOG_PATH,
+)
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Tick result
+# ---------------------------------------------------------------------------
+@dataclass
+class TickResult:
+    """Output of a single control loop tick."""
+    timestamp: datetime
+    slot_index: int                     # 0–95
+    stage_id: str = "unknown"
+    # Plan lookup
+    plan_offset_deg: float = 0.0        # what the day-ahead plan says
+    plan_gate_passed: bool = False
+    # Live override
+    live_gate_passed: bool = False
+    live_override: bool = False         # True if live data diverged from plan
+    override_reason: Optional[str] = None
+    # Arbiter decision
+    target_angle: float = 0.0
+    dispatch: bool = False
+    source: str = ""
+    # Dispatch result
+    trackers_verified: int = 0
+    trackers_total: int = 0
+    dispatch_error: Optional[str] = None
+    # Energy cost
+    energy_cost_kwh: float = 0.0        # energy sacrificed by this slot's offset
+    # Budget tracking
+    budget_spent_kwh: float = 0.0       # actual amount deducted from budget
+    budget_remaining_kwh: float = 0.0   # daily budget remaining after this slot
+    # Model routing
+    model_route: str = ""               # "fvcb" or "ml" — which model was selected
+    # Fleet overrides (per-tracker angles, if any differ from default)
+    fleet_overrides: Optional[Dict[str, float]] = None
+    # Plan divergence tracking
+    divergence_cumulative_kwh: float = 0.0
+    divergence_consecutive: int = 0
+    replan_triggered: bool = False
+    # Sensor snapshot
+    air_temp_c: Optional[float] = None
+    ghi_w_m2: Optional[float] = None
+    wind_speed_ms: Optional[float] = None
+    def to_dict(self) -> dict:
+        return {k: (v.isoformat() if isinstance(v, datetime) else v)
+                for k, v in self.__dict__.items()}
+# ---------------------------------------------------------------------------
+# ControlLoop
+# ---------------------------------------------------------------------------
+class ControlLoop:
+    """15-minute agrivoltaic control loop.
+    Parameters
+    ----------
+    dry_run : bool
+        If True, compute decisions but don't send commands to trackers.
+    plan_path : Path
+        Path to the day-ahead plan JSON file.
+    log_path : Path
+        Path for simulation log output.
+    """
+    def __init__(
+        self,
+        dry_run: bool = True,
+        plan_path: Path = DAILY_PLAN_PATH,
+        log_path: Path = SIMULATION_LOG_PATH,
+    ):
+        self.dry_run = dry_run
+        self.plan_path = plan_path
+        self.log_path = log_path
+        # Lazy-init components
+        self._arbiter = None
+        self._dispatcher = None
+        self._astro = None
+        self._hub = None
+        self._modes = None
+        self._fleet = None
+        self._schedulers: Dict[str, object] = {}
+        self._budget_planner = None
+        self._router = None
+        self._current_plan: Optional[dict] = None
+        self._tick_log: List[dict] = []
+        # Daily budget state (reset each day)
+        self._daily_budget_plan: Optional[dict] = None
+        self._daily_budget_date: Optional[date] = None
+        # Divergence tracking (reset on re-plan or new day)
+        self._divergence_cumulative_kwh: float = 0.0
+        self._divergence_consecutive: int = 0
+        self._last_replan_slot: int = -99
+        self._replan_count: int = 0
+    # ------------------------------------------------------------------
+    # Lazy component init
+    # ------------------------------------------------------------------
+    @property
+    def arbiter(self):
+        if self._arbiter is None:
+            from src.command_arbiter import CommandArbiter
+            self._arbiter = CommandArbiter()
+        return self._arbiter
+    @property
+    def dispatcher(self):
+        if self._dispatcher is None:
+            from src.tracker_dispatcher import TrackerDispatcher
+            self._dispatcher = TrackerDispatcher(dry_run=self.dry_run)
+        return self._dispatcher
+    @property
+    def astro(self):
+        if self._astro is None:
+            from src.command_arbiter import AstronomicalTracker
+            self._astro = AstronomicalTracker()
+        return self._astro
+    @property
+    def hub(self):
+        if self._hub is None:
+            from src.data.data_providers import DataHub
+            self._hub = DataHub.default()
+        return self._hub
+    @property
+    def modes(self):
+        if self._modes is None:
+            from src.operational_modes import OperationalModeChecker
+            self._modes = OperationalModeChecker()
+        return self._modes
+    @property
+    def fleet(self):
+        if self._fleet is None:
+            from src.tracker_fleet import TrackerFleet
+            self._fleet = TrackerFleet()
+        return self._fleet
+    @property
+    def budget_planner(self):
+        if self._budget_planner is None:
+            from src.energy_budget import EnergyBudgetPlanner
+            self._budget_planner = EnergyBudgetPlanner()
+        return self._budget_planner
+    @property
+    def router(self):
+        if self._router is None:
+            from src.chatbot.routing_agent import RoutingAgent
+            self._router = RoutingAgent()
+        return self._router
+    # ------------------------------------------------------------------
+    # Plan loading
+    # ------------------------------------------------------------------
+    def _build_persistence_forecast(self) -> tuple[list[float], list[float]]:
+        """Build 96-slot temp/GHI forecast from last available IMS day."""
+        ims_df = self.hub.weather.get_dataframe()
+        if ims_df.empty:
+            return [25.0] * 96, [0.0] * 96
+        df = ims_df.copy()
+        if "timestamp_utc" in df.columns:
+            df["timestamp_utc"] = pd.to_datetime(df["timestamp_utc"], utc=True)
+            df = df.set_index("timestamp_utc")
+        last_day = df.index.max().normalize()
+        day_data = df[df.index.normalize() == last_day]
+        if len(day_data) < 10:
+            last_day -= pd.Timedelta(days=1)
+            day_data = df[df.index.normalize() == last_day]
+        temps = [25.0] * 96
+        ghis = [0.0] * 96
+        for _, row in day_data.iterrows():
+            slot = row.name.hour * 4 + row.name.minute // 15
+            if 0 <= slot < 96:
+                t = row.get("air_temperature_c")
+                if pd.notna(t):
+                    temps[slot] = float(t)
+                g = row.get("ghi_w_m2")
+                if pd.notna(g):
+                    ghis[slot] = float(g)
+        return temps, ghis
+    def _compute_daily_budget(self, target: date) -> float:
+        """Compute the daily energy budget from the annual/monthly hierarchy."""
+        annual = self.budget_planner.compute_annual_plan(target.year)
+        month_budget = annual["monthly_budgets"].get(target.month, 0.5)
+        weekly = self.budget_planner.compute_weekly_plan(target, month_budget)
+        dow = target.weekday()
+        return weekly["daily_budgets_kWh"][min(dow, 6)]
+    def load_plan(self, target_date: Optional[date] = None) -> Optional[dict]:
+        """Load the day-ahead plan for the given date."""
+        target = target_date or date.today()
+        # Try loading from file
+        if self.plan_path.exists():
+            try:
+                with open(self.plan_path) as f:
+                    plan = json.load(f)
+                if plan.get("target_date") == str(target):
+                    self._current_plan = plan
+                    logger.info("Loaded plan for %s (%d slots)",
+                                target, len(plan.get("slots", [])))
+                    return plan
+            except Exception as exc:
+                logger.warning("Failed to load plan from %s: %s", self.plan_path, exc)
+        # No plan file or wrong date — compute on the fly
+        try:
+            from src.day_ahead_planner import DayAheadPlanner
+            temps, ghis = self._build_persistence_forecast()
+            daily_budget = self._compute_daily_budget(target)
+            planner = DayAheadPlanner()
+            plan_obj = planner.plan_day(target, temps, ghis, max(daily_budget, 0.1))
+            plan = plan_obj.to_dict()
+            # Save for reuse
+            self.plan_path.parent.mkdir(parents=True, exist_ok=True)
+            with open(self.plan_path, "w") as f:
+                json.dump(plan, f, indent=2)
+            self._current_plan = plan
+            return plan
+        except Exception as exc:
+            logger.error("Plan generation failed: %s", exc)
+            return None
+    def _get_slot_plan(self, slot_index: int) -> Optional[dict]:
+        """Look up the planned offset for a given slot."""
+        if not self._current_plan:
+            return None
+        slots = self._current_plan.get("slots", [])
+        for s in slots:
+            t = s.get("time", "")
+            try:
+                h, m = map(int, t.split(":"))
+                s_idx = h * 4 + m // 15
+                if s_idx == slot_index:
+                    return s
+            except (ValueError, AttributeError):
+                continue
+        return None
+    # ------------------------------------------------------------------
+    # Energy budget
+    # ------------------------------------------------------------------
+    def _ensure_daily_budget(self, today: date) -> Optional[dict]:
+        """Load or reuse the daily slot-level budget plan."""
+        if self._daily_budget_plan and self._daily_budget_date == today:
+            return self._daily_budget_plan
+        # Try restoring from Redis (survives worker restarts)
+        try:
+            from src.data.redis_cache import get_redis
+            redis = get_redis()
+            if redis:
+                cached = redis.get_json("control:budget")
+                if cached and cached.get("date") == str(today):
+                    self._daily_budget_plan = cached["plan"]
+                    self._daily_budget_date = today
+                    logger.info("Restored daily budget from Redis for %s", today)
+                    return self._daily_budget_plan
+        except Exception:
+            pass
+        try:
+            daily_budget = self._compute_daily_budget(today)
+            self._daily_budget_plan = self.budget_planner.compute_daily_plan(
+                today, daily_budget,
+            )
+            self._daily_budget_date = today
+            # Reset divergence tracking for new day
+            self._divergence_cumulative_kwh = 0.0
+            self._divergence_consecutive = 0
+            self._last_replan_slot = -99
+            # Persist to Redis
+            self._persist_budget(today)
+            return self._daily_budget_plan
+        except Exception as exc:
+            logger.warning("Failed to compute daily budget: %s", exc)
+            return None
+    def _persist_budget(self, today: date) -> None:
+        """Save daily budget state to Redis for cross-process access."""
+        try:
+            from src.data.redis_cache import get_redis
+            import json as _json
+            redis = get_redis()
+            if redis and self._daily_budget_plan:
+                payload = {
+                    "date": str(today),
+                    "plan": _json.loads(_json.dumps(self._daily_budget_plan, default=str)),
+                }
+                redis.set_json("control:budget", payload, ttl=86400)
+        except Exception as exc:
+            logger.debug("Budget Redis persist failed: %s", exc)
+    @staticmethod
+    def _slot_key(now: datetime) -> str:
+        """Format a datetime as a slot key like '10:15'."""
+        return f"{now.hour:02d}:{(now.minute // 15) * 15:02d}"
+    # ------------------------------------------------------------------
+    # Fleet overrides (Task 1)
+    # ------------------------------------------------------------------
+    def _resolve_fleet_overrides(
+        self, now: datetime, theta_astro: float,
+    ) -> Dict[str, float]:
+        """Resolve per-tracker angle overrides from TrackerFleet assignments.
+        Returns an empty dict in the common case (all trackers follow the
+        arbiter's angle).  Only returns overrides for trackers that have
+        an explicit non-tracking assignment active right now.
+        """
+        from src.tracker_fleet import tracker_id_to_name
+        from src.tracker_scheduler import TrackerScheduler, PLAN_LIBRARY
+        overrides: Dict[str, float] = {}
+        try:
+            best = self.fleet.get_all_best_assignments(now)
+        except Exception as exc:
+            logger.debug("Fleet assignment lookup skipped: %s", exc)
+            return overrides
+        for tracker_id, assignment in best.items():
+            if assignment is None:
+                continue
+            plan_id = assignment.plan_id
+            # Get or create scheduler for this plan
+            if plan_id not in self._schedulers:
+                if assignment.plan_file:
+                    plan_path = Path(assignment.plan_file)
+                    if plan_path.exists():
+                        self._schedulers[plan_id] = TrackerScheduler(
+                            plan_file=plan_path,
+                        )
+                    else:
+                        logger.warning("Plan file not found: %s", plan_path)
+                        continue
+                elif plan_id in PLAN_LIBRARY:
+                    self._schedulers[plan_id] = TrackerScheduler(
+                        plan_data=PLAN_LIBRARY[plan_id],
+                    )
+                else:
+                    logger.debug("Unknown plan_id %r, skipping", plan_id)
+                    continue
+            sched = self._schedulers[plan_id]
+            event = sched.get_event(now)
+            if event is None:
+                continue
+            mode = event.get("mode")
+            event_angle = event.get("angle")
+            if mode == "tracking" or mode is None:
+                # Same as default astronomical tracking — no override needed
+                continue
+            elif mode == "antiTracking" and event_angle is not None:
+                overrides[tracker_id_to_name(tracker_id)] = theta_astro + event_angle
+            elif mode == "fixed_angle" and event_angle is not None:
+                overrides[tracker_id_to_name(tracker_id)] = event_angle
+        return overrides
+    # ------------------------------------------------------------------
+    # Plan divergence (Task 3)
+    # ------------------------------------------------------------------
+    def _check_plan_divergence(
+        self,
+        slot_index: int,
+        planned_offset: float,
+        actual_offset: float,
+        planned_cost: float,
+        actual_cost: float,
+    ) -> bool:
+        """Track divergence between plan and execution. Return True if re-plan needed."""
+        cost_diff = abs(planned_cost - actual_cost)
+        offset_diverged = abs(planned_offset - actual_offset) > ANGLE_TOLERANCE_DEG
+        self._divergence_cumulative_kwh += cost_diff
+        if offset_diverged:
+            self._divergence_consecutive += 1
+        else:
+            self._divergence_consecutive = 0
+        # Check cooldown
+        if slot_index - self._last_replan_slot < PLAN_REPLAN_COOLDOWN_SLOTS:
+            return False
+        if self._divergence_cumulative_kwh >= PLAN_DIVERGENCE_THRESHOLD_KWH:
+            logger.warning(
+                "Cumulative divergence %.3f kWh >= %.3f threshold; triggering re-plan",
+                self._divergence_cumulative_kwh, PLAN_DIVERGENCE_THRESHOLD_KWH,
+            )
+            return True
+        if self._divergence_consecutive >= PLAN_DIVERGENCE_THRESHOLD_SLOTS:
+            logger.warning(
+                "%d consecutive divergent slots >= %d threshold; triggering re-plan",
+                self._divergence_consecutive, PLAN_DIVERGENCE_THRESHOLD_SLOTS,
+            )
+            return True
+        return False
+    def _trigger_replan(self, now: datetime, slot_index: int) -> bool:
+        """Re-generate the day-ahead plan from the current slot onward."""
+        today = now.date()
+        daily_bp = self._ensure_daily_budget(today)
+        spent = daily_bp["cumulative_spent"] if daily_bp else 0.0
+        remaining = (daily_bp["daily_total_kWh"] - spent) if daily_bp else 0.0
+        if remaining <= 0:
+            logger.info("Re-plan skipped: no budget remaining")
+            return False
+        try:
+            from src.day_ahead_planner import DayAheadPlanner
+            temps, ghis = self._build_persistence_forecast()
+            planner = DayAheadPlanner()
+            plan_obj = planner.plan_day(today, temps, ghis, max(remaining, 0.01))
+            plan = plan_obj.to_dict()
+            # Save for reuse
+            self.plan_path.parent.mkdir(parents=True, exist_ok=True)
+            with open(self.plan_path, "w") as f:
+                json.dump(plan, f, indent=2)
+            self._current_plan = plan
+            self._last_replan_slot = slot_index
+            self._divergence_cumulative_kwh = 0.0
+            self._divergence_consecutive = 0
+            self._replan_count += 1
+            n_slots = len(plan.get("slots", []))
+            logger.info(
+                "Re-plan #%d at slot %d: %d slots, %.4f kWh remaining budget",
+                self._replan_count, slot_index, n_slots, remaining,
+            )
+            return True
+        except Exception as exc:
+            logger.error("Re-plan failed: %s", exc)
+            return False
+    # ------------------------------------------------------------------
+    # Main tick
+    # ------------------------------------------------------------------
+    def tick(self, timestamp: Optional[datetime] = None) -> TickResult:
+        """Execute one control loop cycle.
+        Parameters
+        ----------
+        timestamp : datetime, optional
+            Override current time (for simulation/replay).
+        """
+        now = timestamp or datetime.now(tz=timezone.utc)
+        slot_index = now.hour * 4 + now.minute // 15
+        result = TickResult(timestamp=now, slot_index=slot_index)
+        # 1. Load plan if needed
+        today = now.date() if hasattr(now, 'date') else date.today()
+        if (not self._current_plan or
+                self._current_plan.get("target_date") != str(today)):
+            self.load_plan(today)
+        # 2. Fetch live weather
+        try:
+            wx = self.hub.weather.get_current()
+            if "error" not in wx:
+                result.air_temp_c = wx.get("air_temperature_c")
+                result.ghi_w_m2 = wx.get("ghi_w_m2")
+                result.wind_speed_ms = wx.get("wind_speed_ms")
+        except Exception as exc:
+            logger.warning("Weather fetch failed: %s", exc)
+        # 2b. Route model selection (FvCB vs ML) based on live conditions
+        try:
+            telemetry = {
+                "temp_c": result.air_temp_c,
+                "ghi_w_m2": result.ghi_w_m2,
+                "hour": now.hour,
+            }
+            result.model_route = self.router.route(telemetry)
+        except Exception as exc:
+            logger.debug("Model routing failed: %s", exc)
+            result.model_route = "fvcb"
+        # 3. Get astronomical tracking angle
+        theta_astro = self.astro.get_angle(now)
+        # 4. Look up plan for this slot
+        slot_plan = self._get_slot_plan(slot_index)
+        if slot_plan:
+            result.plan_offset_deg = slot_plan.get("offset_deg", 0.0)
+            result.plan_gate_passed = slot_plan.get("gate_passed", False)
+            result.energy_cost_kwh = slot_plan.get("energy_cost_kwh", 0.0)
+            result.stage_id = self._current_plan.get("stage_id", "unknown")
+        else:
+            logger.debug("No plan slot for index %d — defaulting to astronomical", slot_index)
+        # 5. Live gate check — override plan if conditions diverged
+        # Intentionally simpler than DayAheadPlanner._check_gate():
+        # the planner has forecast CWSI + FvCB shading_helps; the live gate
+        # only checks real-time temp and GHI as hard constraints.
+        planned_offset = result.plan_offset_deg
+        live_offset = planned_offset  # default: follow the plan
+        if result.air_temp_c is not None:
+            from config.settings import (
+                NO_SHADE_BEFORE_HOUR,
+                SEMILLON_TRANSITION_TEMP_C,
+                SHADE_ELIGIBLE_GHI_ABOVE,
+            )
+            if planned_offset > 0:
+                blocked = False
+                reason = ""
+                if now.hour < NO_SHADE_BEFORE_HOUR:
+                    blocked, reason = True, "morning — no shading before 10:00"
+                elif result.air_temp_c < SEMILLON_TRANSITION_TEMP_C:
+                    blocked, reason = True, f"temp {result.air_temp_c:.0f}°C < {SEMILLON_TRANSITION_TEMP_C:.0f}°C"
+                elif result.ghi_w_m2 is not None and result.ghi_w_m2 < SHADE_ELIGIBLE_GHI_ABOVE:
+                    blocked, reason = True, f"GHI {result.ghi_w_m2:.0f} < {SHADE_ELIGIBLE_GHI_ABOVE:.0f}"
+                if blocked:
+                    live_offset = 0.0
+                    result.live_override = True
+                    result.override_reason = reason
+                    logger.info("Live override: plan offset %.0f° → 0° (%s)",
+                                planned_offset, reason)
+            result.live_gate_passed = live_offset > 0
+        # 5b. Budget guard — block intervention if daily budget exhausted
+        if live_offset > 0:
+            daily_bp = self._ensure_daily_budget(today)
+            if daily_bp:
+                sk = self._slot_key(now)
+                slot_remaining = daily_bp["slot_budgets"].get(sk, 0.0)
+                margin_remaining = daily_bp["daily_margin_remaining_kWh"]
+                if slot_remaining + margin_remaining <= 0:
+                    live_offset = 0.0
+                    result.live_override = True
+                    result.override_reason = "daily energy budget exhausted"
+                    logger.info("Budget guard: forcing astronomical (budget depleted)")
+        # 6. Build engine result for arbiter
+        target_angle = theta_astro + live_offset
+        engine_result = {
+            "angle": target_angle,
+            "action": f"plan_offset_{live_offset:.0f}deg",
+        }
+        # Check operational modes (wind stow, heat shield, harvest)
+        mode_override = self.modes.check_all(
+            wind_speed_ms=result.wind_speed_ms,
+            air_temp_c=result.air_temp_c,
+            theta_astro=theta_astro,
+            current_date=today,
+        )
+        weather_override = mode_override.to_weather_override() if mode_override else None
+        # 7. Arbitrate
+        decision = self.arbiter.arbitrate(
+            timestamp=now,
+            engine_result=engine_result,
+            theta_astro=theta_astro,
+            weather_override=weather_override,
+        )
+        result.target_angle = decision.angle
+        result.dispatch = decision.dispatch
+        result.source = decision.source.value if hasattr(decision.source, 'value') else str(decision.source)
+        # 7b. Resolve per-tracker fleet overrides (rare; most ticks return {})
+        fleet_overrides = self._resolve_fleet_overrides(now, theta_astro)
+        if fleet_overrides:
+            result.fleet_overrides = fleet_overrides
+            logger.info("Fleet overrides active: %s", fleet_overrides)
+        # 8. Dispatch to trackers
+        if decision.dispatch:
+            try:
+                dispatch_result = self.dispatcher.dispatch(
+                    decision, angle_overrides=fleet_overrides or None,
+                )
+                result.trackers_verified = dispatch_result.n_success
+                result.trackers_total = len(dispatch_result.trackers)
+                if not dispatch_result.all_verified:
+                    failed = [t.device_name for t in dispatch_result.trackers if not t.verified]
+                    result.dispatch_error = f"failed: {', '.join(failed)}"
+            except Exception as exc:
+                result.dispatch_error = str(exc)
+                logger.error("Dispatch failed: %s", exc)
+        # 9. Spend energy budget
+        if result.energy_cost_kwh > 0:
+            daily_bp = self._ensure_daily_budget(today)
+            if daily_bp:
+                sk = self._slot_key(now)
+                result.budget_spent_kwh = self.budget_planner.spend_slot(
+                    daily_bp, sk, result.energy_cost_kwh,
+                )
+                result.budget_remaining_kwh = (
+                    sum(daily_bp["slot_budgets"].values())
+                    + daily_bp["daily_margin_remaining_kWh"]
+                )
+                # Persist updated budget to Redis
+                self._persist_budget(today)
+                if result.budget_spent_kwh < result.energy_cost_kwh:
+                    logger.warning(
+                        "Budget shortfall: requested %.4f kWh, spent %.4f kWh (slot %s)",
+                        result.energy_cost_kwh, result.budget_spent_kwh, sk,
+                    )
+        # 10. Check plan divergence and trigger re-plan if needed
+        if slot_plan:
+            actual_offset = live_offset if not result.live_override else 0.0
+            needs_replan = self._check_plan_divergence(
+                slot_index=slot_index,
+                planned_offset=result.plan_offset_deg,
+                actual_offset=actual_offset,
+                planned_cost=slot_plan.get("energy_cost_kwh", 0.0),
+                actual_cost=result.energy_cost_kwh,
+            )
+            result.divergence_cumulative_kwh = self._divergence_cumulative_kwh
+            result.divergence_consecutive = self._divergence_consecutive
+            if needs_replan:
+                result.replan_triggered = self._trigger_replan(now, slot_index)
+        # 11. Log
+        self._tick_log.append(result.to_dict())
+        logger.info(
+            "Tick %02d:%02d slot=%d angle=%.1f° offset=%.0f° dispatch=%s source=%s"
+            " budget_remaining=%.3f kWh%s",
+            now.hour, now.minute, slot_index, decision.angle,
+            live_offset, decision.dispatch, decision.source,
+            result.budget_remaining_kwh,
+            f" [OVERRIDE: {result.override_reason}]" if result.live_override else "",
+        )
+        return result
+    # ------------------------------------------------------------------
+    # Continuous run
+    # ------------------------------------------------------------------
+    def run(self, max_ticks: Optional[int] = None) -> None:
+        """Run the control loop continuously (blocking).
+        Parameters
+        ----------
+        max_ticks : int, optional
+            Stop after this many ticks (for testing). None = run forever.
+        """
+        logger.info("Control loop starting (dry_run=%s)", self.dry_run)
+        tick_count = 0
+        while max_ticks is None or tick_count < max_ticks:
+            try:
+                result = self.tick()
+                tick_count += 1
+            except Exception as exc:
+                logger.error("Tick failed: %s", exc)
+            # Sleep until next 15-min boundary
+            now = datetime.now(tz=timezone.utc)
+            next_slot = now.replace(
+                minute=(now.minute // DP_SLOT_DURATION_MIN + 1) * DP_SLOT_DURATION_MIN % 60,
+                second=0, microsecond=0,
+            )
+            if next_slot <= now:
+                next_slot += timedelta(hours=1)
+            sleep_sec = (next_slot - now).total_seconds()
+            logger.debug("Sleeping %.0f s until %s", sleep_sec, next_slot)
+            time.sleep(max(sleep_sec, 1.0))
+    # ------------------------------------------------------------------
+    # Log access
+    # ------------------------------------------------------------------
+    def get_log(self) -> List[dict]:
+        """Return all tick results from this session."""
+        return list(self._tick_log)
+    def save_log(self, path: Optional[Path] = None) -> Path:
+        """Save tick log to JSON file."""
+        out = path or self.log_path.with_suffix(".json")
+        out.parent.mkdir(parents=True, exist_ok=True)
+        with open(out, "w") as f:
+            json.dump(self._tick_log, f, indent=2, default=str)
+        logger.info("Saved %d tick results to %s", len(self._tick_log), out)
+        return out

src/data/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Data access: IMS, sensors, schema, ThingsBoard, data providers."""

src/data/data_providers.py ADDED Viewed

	@@ -0,0 +1,1180 @@

+"""
+Data provider layer for the VineyardChatbot.
+Architecture
+------------
+Each data domain gets a **Service** class that encapsulates:
+  - data fetching (IMS API, ThingsBoard API, model inference, ...)
+  - caching / TTL logic
+  - error handling (returns dict with "error" key on failure)
+  - serialisation to LLM-friendly dicts
+Services are registered on a lightweight **DataHub** which is injected
+into the chatbot.  The chatbot's tool methods become thin one-liners
+that delegate to ``self.hub.<service>.<method>()``.
+                         ┌────────────────────┐
+                         │  VineyardChatbot    │
+                         │  (tool dispatch)    │
+                         └────────┬───────────┘
+                                  │ self.hub
+                         ┌────────▼───────────┐
+                         │     DataHub         │
+                         │  (service registry) │
+                         └────────┬───────────┘
+              ┌──────────┬────────┼────────┬──────────┐
+              ▼          ▼        ▼        ▼          ▼
+        WeatherSvc  VineSensorSvc  PSSvc  EnergySvc  BiologySvc
+           │            │          │        │          │
+        IMSClient   TB Client   Farquhar  TB+Analytical  rules dict
+                                ML Pred
+Loose coupling guarantees:
+  - The chatbot never imports IMS / TB / Farquhar / ML directly.
+  - Each service can be unit-tested in isolation (pass a mock client).
+  - Adding a new data source = write a new Service + register it.
+  - Services own their TTL caches — the chatbot is stateless w.r.t. data.
+"""
+from __future__ import annotations
+import time
+import traceback
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from datetime import date, datetime, timedelta, timezone
+from typing import Any, Dict, List, Optional
+import numpy as np
+import pandas as pd
+# ═══════════════════════════════════════════════════════════════════════
+# TTL Cache helper
+# ═══════════════════════════════════════════════════════════════════════
+@dataclass
+class _CacheEntry:
+    value: Any
+    expires_at: float  # monotonic clock
+class TTLCache:
+    """TTL cache with optional Redis backend.
+    When Redis is available (``UPSTASH_REDIS_URL`` set), values are stored
+    in Redis so multiple processes (API server, workers) share state.
+    Falls back to in-memory when Redis is unavailable — Streamlit keeps
+    working exactly as before.
+    """
+    def __init__(self, ttl_seconds: float = 300, redis_prefix: str = ""):
+        self.ttl = ttl_seconds
+        self._prefix = redis_prefix
+        self._store: Dict[str, _CacheEntry] = {}
+        # Lazy Redis lookup (avoid import-time side effects)
+        self._redis_checked = False
+        self._redis = None
+    def _get_redis(self):
+        if not self._redis_checked:
+            self._redis_checked = True
+            try:
+                from src.data.redis_cache import get_redis
+                self._redis = get_redis()
+            except Exception:
+                self._redis = None
+        return self._redis
+    def _rkey(self, key: str) -> str:
+        return f"{self._prefix}{key}" if self._prefix else key
+    def get(self, key: str) -> Any | None:
+        # Try Redis first
+        redis = self._get_redis()
+        if redis:
+            val = redis.get_json(self._rkey(key))
+            if val is not None:
+                return val
+        # Fall back to in-memory
+        entry = self._store.get(key)
+        if entry and time.monotonic() < entry.expires_at:
+            return entry.value
+        return None
+    def set(self, key: str, value: Any) -> None:
+        # Write to Redis if available
+        redis = self._get_redis()
+        if redis:
+            redis.set_json(self._rkey(key), value, ttl=int(self.ttl))
+        # Always write in-memory too (local fast path)
+        self._store[key] = _CacheEntry(value=value, expires_at=time.monotonic() + self.ttl)
+    def invalidate(self, key: str) -> None:
+        redis = self._get_redis()
+        if redis:
+            redis.delete(self._rkey(key))
+        self._store.pop(key, None)
+# ═══════════════════════════════════════════════════════════════════════
+# LLM-friendly summarisation
+# ══��════════════════════════════════════════════════════════════════════
+def summarise_dataframe(df: pd.DataFrame, max_rows: int = 48) -> Dict[str, Any]:
+    """Compress a DataFrame to key stats when it exceeds *max_rows*.
+    Returns a dict with ``rows`` (list of dicts) if small enough, or
+    ``summary`` (per-column min/max/mean/trend) if too large.
+    """
+    if df.empty:
+        return {"rows": [], "note": "No data available."}
+    if len(df) <= max_rows:
+        records = df.reset_index().to_dict(orient="records")
+        for r in records:
+            for k, v in list(r.items()):
+                if isinstance(v, (pd.Timestamp, datetime)):
+                    r[k] = str(v)
+                elif isinstance(v, (float, np.floating)):
+                    r[k] = round(float(v), 2)
+        return {"rows": records, "row_count": len(records)}
+    # Summarise
+    summary: Dict[str, Any] = {"row_count": len(df), "summarised": True, "columns": {}}
+    numeric = df.select_dtypes(include=[np.number])
+    for col in numeric.columns:
+        s = numeric[col].dropna()
+        if s.empty:
+            continue
+        summary["columns"][col] = {
+            "min": round(float(s.min()), 2),
+            "max": round(float(s.max()), 2),
+            "mean": round(float(s.mean()), 2),
+            "first": round(float(s.iloc[0]), 2),
+            "last": round(float(s.iloc[-1]), 2),
+        }
+    # Time range
+    if isinstance(df.index, pd.DatetimeIndex):
+        summary["time_range"] = {"start": str(df.index.min()), "end": str(df.index.max())}
+    return summary
+# ═══════════════════════════════════════════════════════════════════════
+# Service base class
+# ═══════════════════════════════════════════════════════════════════════
+class BaseService(ABC):
+    """Abstract base for all data-provider services.
+    Subclasses must implement ``service_name`` (used as registry key).
+    All public methods should return plain dicts (JSON-serialisable)
+    so the chatbot can forward them to the LLM without conversion.
+    """
+    @property
+    @abstractmethod
+    def service_name(self) -> str: ...
+# ═══════════════════════════════════════════════════════════════════════
+# 1. WeatherService  (IMS station 43)
+# ═══════════════════════════════════════════════════════════════════════
+class WeatherService(BaseService):
+    """IMS weather data — cached CSV for history, latest row for 'now'."""
+    service_name = "weather"
+    def __init__(self, ims_client: Any = None, cache_ttl: float = 1800):
+        self._ims = ims_client          # lazy
+        self._df_cache = TTLCache(ttl_seconds=cache_ttl, redis_prefix="weather:")
+    # -- lazy client --
+    def _client(self):
+        if self._ims is None:
+            from src.ims_client import IMSClient
+            self._ims = IMSClient()
+        return self._ims
+    def _load_df(self) -> pd.DataFrame:
+        cached = self._df_cache.get("ims")
+        if cached is not None:
+            return cached
+        df = self._client().load_cached()
+        if not df.empty:
+            self._df_cache.set("ims", df)
+        return df
+    def get_dataframe(self) -> pd.DataFrame:
+        """Public accessor for the cached IMS DataFrame."""
+        return self._load_df()
+    # -- public API --
+    def _now_israel(self) -> Dict[str, str]:
+        """Current time in Yeruham (Asia/Jerusalem) for context in API responses."""
+        try:
+            from zoneinfo import ZoneInfo
+            tz = ZoneInfo("Asia/Jerusalem")
+        except ImportError:
+            tz = timezone(timedelta(hours=2))
+        now = datetime.now(tz)
+        return {
+            "current_time_israel": now.strftime("%H:%M"),
+            "current_date_israel": now.strftime("%Y-%m-%d"),
+            "current_datetime_israel": now.isoformat(),
+        }
+    def get_current(self) -> Dict[str, Any]:
+        """Latest IMS weather row with local time and staleness. Always includes current time (Yeruham) so callers can compare."""
+        try:
+            df = self._load_df()
+            if df.empty:
+                return {"error": "No cached IMS data available.", **self._now_israel()}
+            last = df.iloc[-1]
+            result: Dict[str, Any] = {
+                "timezone": "Asia/Jerusalem (Israel local, Yeruham/Sde Boker)",
+                **self._now_israel(),
+            }
+            try:
+                ts_utc = pd.to_datetime(last.get("timestamp_utc"), utc=True)
+                ts_local = ts_utc.tz_convert("Asia/Jerusalem")
+                now_utc = pd.Timestamp.now(tz="UTC")
+                result["timestamp_utc"] = ts_utc.isoformat()
+                result["timestamp_local"] = ts_local.isoformat()
+                result["age_minutes"] = round((now_utc - ts_utc).total_seconds() / 60, 1)
+            except Exception:
+                result["timestamp_utc"] = str(last.get("timestamp_utc", "unknown"))
+            for col in df.columns:
+                if col != "timestamp_utc":
+                    val = last[col]
+                    if pd.notna(val):
+                        result[col] = round(float(val), 2) if isinstance(val, (int, float, np.floating)) else str(val)
+            return result
+        except Exception as exc:
+            return {"error": f"Could not load weather data: {exc}"}
+    def get_history(self, start_date: str, end_date: str) -> Dict[str, Any]:
+        """Hourly IMS summary for a date range (from cached CSV)."""
+        try:
+            df = self._load_df()
+            if df.empty:
+                return {"error": "No cached IMS data."}
+            if "timestamp_utc" in df.columns:
+                df = df.set_index(pd.to_datetime(df["timestamp_utc"], utc=True))
+            start = pd.Timestamp(start_date, tz="UTC")
+            end = pd.Timestamp(end_date, tz="UTC") + pd.Timedelta(days=1)
+            subset = df.loc[start:end]
+            if subset.empty:
+                return {"error": f"No data in range {start_date} to {end_date}."}
+            hourly = subset.resample("1h").mean(numeric_only=True)
+            return summarise_dataframe(hourly)
+        except Exception as exc:
+            return {"error": f"Weather history failed: {exc}"}
+# ═══════════════════════════════════════════════════════════════════════
+# 2. VineSensorService  (ThingsBoard)
+# ═══════════════════════════════════════════════════════════════════════
+class VineSensorService(BaseService):
+    """On-site vine sensors via ThingsBoard — snapshot + time-series."""
+    service_name = "vine_sensors"
+    def __init__(self, tb_client: Any = None, snapshot_ttl: float = 300):
+        self._tb = tb_client            # lazy
+        self._snap_cache = TTLCache(ttl_seconds=snapshot_ttl, redis_prefix="vine:")
+    def _client(self):
+        if self._tb is None:
+            from src.thingsboard_client import ThingsBoardClient
+            self._tb = ThingsBoardClient()
+        return self._tb
+    # -- public API --
+    def get_snapshot(self, light: bool = False,
+                     mode: Optional[str] = None) -> Dict[str, Any]:
+        """Latest vine state (treatment vs reference), 5-min TTL.
+        Parameters
+        ----------
+        light : bool
+            If True, fetch only ~6 key devices instead of all 21.
+        mode : str, optional
+            "dashboard" = 4 devices only (air + soil + irrigation).
+        """
+        cache_key = mode or ("snap_light" if light else "snap")
+        cached = self._snap_cache.get(cache_key)
+        if cached is not None:
+            return cached
+        try:
+            snapshot = self._client().get_vine_snapshot(light=light, mode=mode)
+            result = snapshot.to_dict()
+            self._snap_cache.set(cache_key, result)
+            return result
+        except Exception as exc:
+            return {
+                "error": f"ThingsBoard unavailable: {exc}",
+                "hint": "Check THINGSBOARD_USERNAME/PASSWORD in .env",
+            }
+    def get_history(
+        self,
+        device_type: str = "crop",
+        area: str = "treatment",
+        hours_back: int = 24,
+    ) -> Dict[str, Any]:
+        """Hourly averages for a device group over the last N hours."""
+        from src.thingsboard_client import (
+            AIR_KEYS, CROP_KEYS, SOIL_KEYS, DEVICE_REGISTRY, VineArea,
+        )
+        key_map = {"air": AIR_KEYS, "crop": CROP_KEYS, "soil": SOIL_KEYS}
+        keys = key_map.get(device_type.lower())
+        if keys is None:
+            return {"error": f"Unknown device_type '{device_type}'. Use air/crop/soil."}
+        area_enum = {
+            "treatment": VineArea.TREATMENT,
+            "reference": VineArea.REFERENCE,
+            "ambient": VineArea.AMBIENT,
+        }.get(area.lower())
+        if area_enum is None:
+            return {"error": f"Unknown area '{area}'. Use treatment/reference/ambient."}
+        # Select matching devices
+        devices = [
+            name for name, info in DEVICE_REGISTRY.items()
+            if info.area == area_enum and name.lower().startswith(device_type.lower())
+        ]
+        if not devices:
+            return {"error": f"No {device_type} devices in {area} area."}
+        end = datetime.now(tz=timezone.utc)
+        start = end - timedelta(hours=hours_back)
+        try:
+            frames = []
+            for dev in devices:
+                df = self._client().get_timeseries(dev, keys, start, end)
+                if not df.empty:
+                    df = df.add_prefix(f"{dev}_")
+                    frames.append(df)
+            if not frames:
+                return {"error": "No time-series data returned from ThingsBoard."}
+            merged = pd.concat(frames, axis=1).sort_index()
+            hourly = merged.resample("1h").mean(numeric_only=True)
+            return summarise_dataframe(hourly)
+        except Exception as exc:
+            return {"error": f"Sensor history failed: {exc}"}
+# ═══════════════════════════════════════════════════════════════════════
+# 3. PhotosynthesisService  (FvCB + ML + forecast)
+# ═══════════════════════════════════════════════════════════════════════
+class PhotosynthesisService(BaseService):
+    """Photosynthesis predictions — mechanistic, ML, and day-ahead."""
+    service_name = "photosynthesis"
+    def __init__(self):
+        self._farquhar = None
+        self._ml_predictor = None
+        self._shadow = None
+        self._canopy = None
+    # -- lazy loaders --
+    def _get_farquhar(self):
+        if self._farquhar is None:
+            from src.farquhar_model import FarquharModel
+            self._farquhar = FarquharModel()
+        return self._farquhar
+    def _get_shadow(self):
+        if self._shadow is None:
+            from src.solar_geometry import ShadowModel
+            self._shadow = ShadowModel()
+        return self._shadow
+    def _get_canopy(self):
+        if self._canopy is None:
+            from src.canopy_photosynthesis import CanopyPhotosynthesisModel
+            self._canopy = CanopyPhotosynthesisModel(
+                shadow_model=self._get_shadow(),
+                farquhar_model=self._get_farquhar(),
+            )
+        return self._canopy
+    # -- public API --
+    def predict_fvcb(
+        self, PAR: float, Tleaf: float, CO2: float, VPD: float, Tair: float,
+    ) -> Dict[str, Any]:
+        """Single-point Farquhar model prediction with limiting factor."""
+        model = self._get_farquhar()
+        A = model.calc_photosynthesis(PAR=PAR, Tleaf=Tleaf, CO2=CO2, VPD=VPD, Tair=Tair)
+        Tk = Tleaf + 273.15
+        Vcmax = model.calc_Vcmax(Tk)
+        Jmax = model.calc_Jmax(Tk)
+        gamma_star = model.calc_gamma_star(Tk)
+        Kc = model.calc_Kc(Tk)
+        Ko = model.calc_Ko(Tk)
+        ci = model._ci_from_ca(CO2, VPD)
+        J = model.calc_electron_transport(PAR, Jmax)
+        Ac = Vcmax * (ci - gamma_star) / (ci + Kc * (1.0 + 210.0 / Ko))
+        Aj = J * (ci - gamma_star) / (4.0 * ci + 8.0 * gamma_star)
+        limiting = ("Rubisco-limited (high temperature is the bottleneck)"
+                     if Ac < Aj else
+                     "RuBP-limited (light is the bottleneck)")
+        shading_helps = Tleaf > 30.0
+        return {
+            "A_net": round(A, 3),
+            "units": "umol CO2 m-2 s-1",
+            "limiting_factor": limiting,
+            "Tleaf": Tleaf,
+            "shading_would_help": shading_helps,
+            "model": "fvcb",
+            "note": ("Shading may help reduce heat stress" if shading_helps
+                     else "Shading would reduce photosynthesis (vine needs light)"),
+        }
+    def predict_ml(self, features: Optional[Dict[str, float]] = None) -> Dict[str, Any]:
+        """ML ensemble prediction. Auto-fills features from latest IMS if not provided.
+        Trains the model once on first call (lazy), then caches it.
+        """
+        try:
+            predictor, feature_cols, best_name = self._ensure_ml_predictor()
+        except Exception as exc:
+            return {"error": f"ML predictor unavailable: {exc}"}
+        try:
+            if features:
+                row = {col: features.get(col, 0.0) for col in feature_cols}
+            else:
+                row = self._auto_fill_features(feature_cols)
+                if row is None:
+                    return {"error": "No IMS data available to auto-fill features."}
+            import pandas as _pd
+            X = _pd.DataFrame([row])[feature_cols]
+            model = predictor.models[best_name]
+            pred = float(model.predict(X)[0])
+            metrics = predictor.results.get(best_name, {})
+            return {
+                "A_net_predicted": round(pred, 3),
+                "units": "umol CO2 m-2 s-1",
+                "model": best_name,
+                "model_mae": round(metrics.get("mae", 0), 3),
+                "model_r2": round(metrics.get("r2", 0), 3),
+                "features_used": {k: round(v, 2) for k, v in row.items()},
+                "note": "Prediction from ML ensemble trained on IMS weather features.",
+            }
+        except Exception as exc:
+            return {"error": f"ML prediction failed: {exc}"}
+    def _ensure_ml_predictor(self):
+        """Train the ML predictor once and cache it. Returns (predictor, feature_cols, best_name)."""
+        if self._ml_predictor is not None:
+            return self._ml_predictor
+        from src.ims_client import IMSClient
+        from src.farquhar_model import FarquharModel
+        from src.preprocessor import Preprocessor
+        from src.predictor import PhotosynthesisPredictor
+        ims = IMSClient()
+        ims_df = ims.load_cached()
+        if ims_df.empty:
+            raise RuntimeError("No IMS cache data — cannot train ML predictor.")
+        # Compute Stage 1 labels (A) from sensor data
+        from src.sensor_data_loader import SensorDataLoader
+        loader = SensorDataLoader()
+        sensor_df = loader.load()
+        fvcb = FarquharModel()
+        labels = fvcb.compute_all(sensor_df)
+        labels.name = "A"
+        # Ensure labels have a datetime index for merge
+        if "time" in sensor_df.columns:
+            ts = pd.to_datetime(sensor_df["time"], utc=True)
+            labels.index = ts
+        # Preprocess: merge, time features, split
+        prep = Preprocessor()
+        merged = prep.merge_ims_with_labels(ims_df, labels)
+        if merged.empty:
+            raise RuntimeError("Merge of IMS + labels produced empty DataFrame.")
+        merged = prep.create_time_features(merged)
+        X_train, y_train, X_test, y_test = prep.temporal_split(merged)
+        if X_train.empty:
+            raise RuntimeError("Not enough data to train ML predictor.")
+        predictor = PhotosynthesisPredictor()
+        predictor.train(X_train, y_train)
+        if not X_test.empty:
+            predictor.evaluate(X_test, y_test)
+        best_name = "GradientBoosting"
+        if predictor.results:
+            best_name = min(predictor.results, key=lambda n: predictor.results[n].get("mae", 999))
+        feature_cols = list(X_train.columns)
+        self._ml_predictor = (predictor, feature_cols, best_name)
+        return self._ml_predictor
+    def _auto_fill_features(self, feature_cols: List[str]) -> Optional[Dict[str, float]]:
+        """Fill feature vector from the latest IMS cache row + time features."""
+        try:
+            from src.ims_client import IMSClient
+            from src.time_features import add_cyclical_time_features
+            ims = IMSClient()
+            df = ims.load_cached()
+            if df.empty:
+                return None
+            last_row_df = df.tail(1).copy()
+            last_row_df = add_cyclical_time_features(last_row_df, timestamp_col="timestamp_utc")
+            ts = pd.to_datetime(last_row_df["timestamp_utc"].iloc[0], utc=True)
+            last_row_df["month"] = ts.month
+            last_row_df["day_of_year"] = ts.day_of_year
+            row = {}
+            for col in feature_cols:
+                if col in last_row_df.columns:
+                    val = last_row_df[col].iloc[0]
+                    row[col] = float(val) if pd.notna(val) else 0.0
+                else:
+                    row[col] = 0.0
+            return row
+        except Exception:
+            return None
+    def forecast_day_ahead(self, target_date: Optional[str] = None) -> Dict[str, Any]:
+        """24h A profile using FvCB model over IMS weather data.
+        For each daytime hour, computes A from IMS temperature/GHI/humidity
+        using typical vine conditions. Falls back to FvCB-based projection
+        when Chronos or ML forecast is unavailable.
+        """
+        try:
+            from src.ims_client import IMSClient
+            ims = IMSClient()
+            df = ims.load_cached()
+            if df.empty:
+                return {"error": "No IMS data cached for PS forecast."}
+            if "timestamp_utc" in df.columns:
+                df["timestamp_utc"] = pd.to_datetime(df["timestamp_utc"], utc=True)
+                df = df.set_index("timestamp_utc")
+            target = target_date or str(date.today())
+            try:
+                day_start = pd.Timestamp(target, tz="UTC")
+                day_end = day_start + pd.Timedelta(days=1)
+                day_df = df.loc[day_start:day_end]
+            except Exception:
+                day_df = pd.DataFrame()
+            # If target date not in cache, use last available day
+            if day_df.empty:
+                day_df = df.tail(96)  # ~24h of 15-min data
+                if day_df.empty:
+                    return {"error": "Not enough IMS data for forecast."}
+                target = str(day_df.index[-1].date())
+            hourly = day_df.resample("1h").mean(numeric_only=True)
+            model = self._get_farquhar()
+            # Map IMS columns (try exact settings names first, then fuzzy match)
+            def _find_col(df_cols, exact_names, fuzzy_terms, exclude_terms=()):
+                for name in exact_names:
+                    if name in df_cols:
+                        return name
+                for c in df_cols:
+                    cl = c.lower()
+                    if any(t in cl for t in fuzzy_terms) and not any(t in cl for t in exclude_terms):
+                        return c
+                return None
+            temp_col = _find_col(hourly.columns, ["air_temperature_c"], ["temp"], ["dew", "soil"])
+            ghi_col = _find_col(hourly.columns, ["ghi_w_m2"], ["ghi", "rad", "irrad"])
+            rh_col = _find_col(hourly.columns, ["rh_percent"], ["rh", "humid"])
+            hourly_results = []
+            for idx, row in hourly.iterrows():
+                hour = idx.hour if hasattr(idx, "hour") else 0
+                if hour < 6 or hour > 19:
+                    continue
+                Tair = float(row[temp_col]) if temp_col and pd.notna(row.get(temp_col)) else 25.0
+                Tleaf = Tair + 2.0  # leaf typically ~2C above air
+                ghi = float(row[ghi_col]) if ghi_col and pd.notna(row.get(ghi_col)) else 0.0
+                PAR = ghi * 2.0  # approximate PAR from GHI (umol/m2/s ~ 2x W/m2)
+                rh = float(row[rh_col]) if rh_col and pd.notna(row.get(rh_col)) else 40.0
+                # Estimate VPD from T and RH
+                es = 0.6108 * np.exp(17.27 * Tair / (Tair + 237.3))
+                VPD = max(es * (1 - rh / 100), 0.1)
+                if PAR < 50:
+                    A = 0.0
+                    limiting = "dark"
+                else:
+                    A = model.calc_photosynthesis(PAR=PAR, Tleaf=Tleaf, CO2=400.0, VPD=VPD, Tair=Tair)
+                    limiting = "rubisco" if Tleaf > 30 else "rubp"
+                hourly_results.append({
+                    "hour": hour,
+                    "A_predicted": round(A, 2),
+                    "Tair": round(Tair, 1),
+                    "PAR": round(PAR, 0),
+                    "VPD": round(VPD, 2),
+                    "limiting": limiting,
+                    "shading_helps": Tleaf > 30.0,
+                })
+            if not hourly_results:
+                return {"error": "No daytime hours available in forecast range."}
+            peak = max(hourly_results, key=lambda r: r["A_predicted"])
+            total_A = sum(r["A_predicted"] for r in hourly_results)
+            stress_hours = sum(1 for r in hourly_results if r["limiting"] == "rubisco")
+            return {
+                "date": target,
+                "method": "fvcb_projection",
+                "hourly": hourly_results,
+                "peak_A": peak["A_predicted"],
+                "peak_hour": peak["hour"],
+                "daily_total_A": round(total_A, 1),
+                "rubisco_limited_hours": stress_hours,
+                "note": "FvCB-based projection from IMS weather data. "
+                        "PAR estimated as 2x GHI. Leaf temp estimated as Tair+2C.",
+            }
+        except Exception as exc:
+            return {"error": f"PS forecast failed: {exc}"}
+    def simulate_shading(
+        self,
+        angle_offset: float,
+        hour: int,
+        date_str: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """Compare A at astronomical tracking vs offset angle."""
+        shadow = self._get_shadow()
+        canopy = self._get_canopy()
+        dt_str = date_str or str(date.today())
+        try:
+            dt = pd.Timestamp(f"{dt_str} {hour:02d}:00:00", tz="Asia/Jerusalem")
+        except Exception:
+            dt = pd.Timestamp(f"{date.today()} {hour:02d}:00:00", tz="Asia/Jerusalem")
+        solar_pos = shadow.get_solar_position(pd.DatetimeIndex([dt]))
+        elev = float(solar_pos["solar_elevation"].iloc[0])
+        azim = float(solar_pos["solar_azimuth"].iloc[0])
+        if elev <= 2.0:
+            return {"error": f"Sun below horizon at hour {hour} (elevation {elev:.1f}\u00b0)."}
+        tracker = shadow.compute_tracker_tilt(azim, elev)
+        astro_tilt = tracker["tracker_theta"]
+        PAR, Tleaf, CO2, VPD, Tair = 1800.0, 32.0, 400.0, 2.5, 33.0
+        mask_un = shadow.project_shadow(elev, azim, astro_tilt)
+        res_un = canopy.compute_vine_A(
+            par=PAR, Tleaf=Tleaf, CO2=CO2, VPD=VPD, Tair=Tair,
+            shadow_mask=mask_un, solar_elevation=elev,
+            solar_azimuth=azim, tracker_tilt=astro_tilt,
+        )
+        shaded_tilt = astro_tilt + angle_offset
+        mask_sh = shadow.project_shadow(elev, azim, shaded_tilt)
+        res_sh = canopy.compute_vine_A(
+            par=PAR, Tleaf=Tleaf, CO2=CO2, VPD=VPD, Tair=Tair,
+            shadow_mask=mask_sh, solar_elevation=elev,
+            solar_azimuth=azim, tracker_tilt=shaded_tilt,
+        )
+        A_un = res_un["A_vine"]
+        A_sh = res_sh["A_vine"]
+        change = ((A_sh - A_un) / A_un * 100) if A_un > 0 else 0
+        return {
+            "hour": hour, "date": dt_str, "angle_offset": angle_offset,
+            "solar_elevation": round(elev, 1),
+            "A_unshaded": round(A_un, 3), "A_shaded": round(A_sh, 3),
+            "A_change_pct": round(change, 1),
+            "sunlit_fraction_unshaded": round(res_un["sunlit_fraction"], 3),
+            "sunlit_fraction_shaded": round(res_sh["sunlit_fraction"], 3),
+            "tracker_tilt_astronomical": round(astro_tilt, 1),
+            "tracker_tilt_shaded": round(shaded_tilt, 1),
+        }
+    def compare_angles(self, angles: Optional[List[int]] = None) -> Dict[str, Any]:
+        """Compare A and energy across tilt angle offsets."""
+        try:
+            from src.tracker_optimizer import simulate_tilt_angles, load_sensor_data
+            df = load_sensor_data()
+            result_df = simulate_tilt_angles(df, angles=angles)
+            records = result_df.to_dict(orient="records")
+            for r in records:
+                for k, v in r.items():
+                    if isinstance(v, (float, np.floating)):
+                        r[k] = round(float(v), 2)
+            return {"angles": records}
+        except Exception as exc:
+            return {"error": f"Angle comparison failed: {exc}"}
+    def daily_schedule(
+        self, stress_threshold: float = 2.0, shade_angle: int = 20,
+    ) -> Dict[str, Any]:
+        """Hourly shading schedule based on leaf-air temperature stress."""
+        try:
+            from src.tracker_optimizer import compute_daily_schedule, load_sensor_data
+            df = load_sensor_data()
+            last_date = df["date"].max()
+            day_df = df[df["date"] == last_date].copy()
+            if day_df.empty:
+                return {"error": "No sensor data available for schedule."}
+            result_df = compute_daily_schedule(
+                day_df, stress_threshold=stress_threshold, shade_angle=shade_angle,
+            )
+            records = result_df.to_dict(orient="records")
+            for r in records:
+                for k, v in list(r.items()):
+                    if isinstance(v, (float, np.floating)):
+                        r[k] = round(float(v), 2)
+                    elif isinstance(v, (pd.Timestamp, datetime)):
+                        r[k] = str(v)
+            return {"date": str(last_date), "schedule": records}
+        except Exception as exc:
+            return {"error": f"Schedule failed: {exc}"}
+    def get_photosynthesis_3d_scene(
+        self,
+        hour: Optional[int] = None,
+        date_str: Optional[str] = None,
+        height_px: int = 480,
+    ) -> Dict[str, Any]:
+        """Build 3D scene data and HTML for vine, tracker, sun and photosynthesis.
+        Returns dict with scene_3d (data), scene_3d_html (full HTML string),
+        A_vine, sunlit_fraction, and optional error.
+        """
+        try:
+            from src.vine_3d_scene import build_scene_data, build_scene_html
+        except Exception as exc:
+            return {"error": f"3D scene module unavailable: {exc}"}
+        try:
+            from datetime import datetime
+            h = hour if hour is not None else datetime.now().hour
+            scene_data = build_scene_data(hour=h, date_str=date_str)
+            html = build_scene_html(scene_data, height_px=height_px)
+            return {
+                "scene_3d": scene_data,
+                "scene_3d_html": html,
+                "A_vine": scene_data["A_vine"],
+                "sunlit_fraction": scene_data["sunlit_fraction"],
+                "hour": scene_data["hour"],
+                "date": scene_data["date"],
+            }
+        except Exception as exc:
+            return {"error": f"3D scene build failed: {exc}"}
+# ═══════════════════════════════════════════════════════════════════════
+# 4. EnergyService  (TB generation + analytical prediction)
+# ═══════════════════════════════════════════════════════════════════════
+class EnergyService(BaseService):
+    """Energy generation data from ThingsBoard Plant asset.
+    The 'Yeruham Vineyard' asset (type=Plant) provides:
+      - ``power``:      instantaneous power in W
+      - ``production``: energy produced per 5-min interval in Wh
+    Daily kWh = sum(production) / 1000 over the day.
+    """
+    service_name = "energy"
+    def __init__(self, tb_client: Any = None):
+        self._tb = tb_client
+    def _client(self):
+        if self._tb is None:
+            from src.data.thingsboard_client import ThingsBoardClient
+            self._tb = ThingsBoardClient()
+        return self._tb
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+    def get_current(self) -> Dict[str, Any]:
+        """Latest power reading from the Plant asset."""
+        try:
+            vals = self._client().get_asset_latest("Plant", ["power", "production"])
+            power_w = vals.get("power")
+            return {
+                "power_kw": round(power_w / 1000, 1) if power_w else None,
+                "source": "ThingsBoard Plant asset",
+            }
+        except Exception as exc:
+            return {"error": f"Energy current failed: {exc}"}
+    def get_daily_production(self, target_date: Optional[str] = None) -> Dict[str, Any]:
+        """Accumulated energy production for a single day (real TB data).
+        Returns dict with daily_kwh, peak_hour, hourly_profile.
+        """
+        try:
+            target = target_date or str(date.today())
+            day_start = pd.Timestamp(target, tz="UTC")
+            day_end = day_start + pd.Timedelta(days=1)
+            df = self._client().get_asset_timeseries(
+                "Plant", ["production"],
+                start=day_start.to_pydatetime(),
+                end=day_end.to_pydatetime(),
+                limit=500,
+                interval_ms=3_600_000,  # 1 hour
+                agg="SUM",
+            )
+            if df.empty or "production" not in df.columns:
+                return {"date": target, "daily_kwh": None, "error": "No production data"}
+            # production is in Wh per interval; hourly SUM = Wh per hour
+            df["kwh"] = df["production"].fillna(0) / 1000
+            total_kwh = df["kwh"].sum()
+            # Convert UTC → Israel local time for display
+            try:
+                import zoneinfo
+                tz_il = zoneinfo.ZoneInfo("Asia/Jerusalem")
+            except Exception:
+                tz_il = None
+            hourly_profile = []
+            peak_hour = 12
+            peak_kwh = 0.0
+            for ts, row in df.iterrows():
+                local_ts = ts.astimezone(tz_il) if tz_il else ts
+                h = local_ts.hour if hasattr(local_ts, "hour") else 0
+                kwh = row["kwh"]
+                hourly_profile.append({"hour": h, "energy_kwh": round(kwh, 2)})
+                if kwh > peak_kwh:
+                    peak_kwh = kwh
+                    peak_hour = h
+            return {
+                "date": target,
+                "daily_kwh": round(total_kwh, 1),
+                "peak_hour": peak_hour,
+                "peak_hour_kwh": round(peak_kwh, 2),
+                "hourly_profile": hourly_profile,
+                "source": "ThingsBoard Plant asset",
+            }
+        except Exception as exc:
+            return {"date": target_date, "daily_kwh": None, "error": f"Energy fetch failed: {exc}"}
+    def get_history(self, hours_back: int = 24) -> Dict[str, Any]:
+        """Hourly power time-series from TB Plant asset."""
+        try:
+            end = datetime.now(tz=timezone.utc)
+            start = end - timedelta(hours=hours_back)
+            df = self._client().get_asset_timeseries(
+                "Plant", ["power", "production"],
+                start=start, end=end,
+                limit=500,
+                interval_ms=3_600_000,
+                agg="AVG",
+            )
+            if df.empty:
+                return {"error": f"No energy data in last {hours_back} hours."}
+            df["power_kw"] = df["power"].fillna(0) / 1000
+            return summarise_dataframe(df[["power_kw"]])
+        except Exception as exc:
+            return {"error": f"Energy history failed: {exc}"}
+    def predict(self, target_date: Optional[str] = None,
+                *, ims_df: Optional[pd.DataFrame] = None) -> Dict[str, Any]:
+        """For future dates: analytical estimate. For past/today: real TB data."""
+        target = target_date or str(date.today())
+        target_d = date.fromisoformat(target)
+        today = date.today()
+        # Past or today → use real TB data
+        if target_d <= today:
+            return self.get_daily_production(target)
+        # Future → analytical estimate from IMS GHI
+        return self._predict_analytical(target, ims_df=ims_df)
+    def _predict_analytical(self, target_date: str,
+                            *, ims_df: Optional[pd.DataFrame] = None) -> Dict[str, Any]:
+        """Energy estimate for future dates.
+        Strategy (in priority order):
+        1. ML predictor (XGBoost) with ThingsBoard Air1 weather persistence
+        2. ML predictor with IMS weather persistence
+        3. Analytical fallback (GHI × system capacity)
+        """
+        # --- Try ML predictor with on-site weather first ---
+        try:
+            result = self._predict_ml(target_date)
+            if result and result.get("daily_kwh") is not None:
+                return result
+        except Exception:
+            pass  # fall through to IMS / analytical
+        # --- Fallback: analytical from IMS GHI ---
+        try:
+            if ims_df is not None:
+                df = ims_df
+            else:
+                from src.ims_client import IMSClient
+                df = IMSClient().load_cached()
+            if df.empty:
+                return {"date": target_date, "daily_kwh": None, "error": "No weather data"}
+            if "timestamp_utc" in df.columns:
+                df = df.copy()
+                df["timestamp_utc"] = pd.to_datetime(df["timestamp_utc"], utc=True)
+                df = df.set_index("timestamp_utc")
+            # Try ML predictor with IMS data
+            try:
+                from src.energy_predictor import EnergyPredictor
+                ep = EnergyPredictor()
+                return ep.predict_day_from_weather_df(target_date, df.tail(96))
+            except Exception:
+                pass  # fall through to raw analytical
+            # Raw analytical: GHI × capacity / STC
+            day_df = df.tail(96).copy()
+            if day_df.empty:
+                return {"date": target_date, "daily_kwh": None, "error": "Not enough IMS data"}
+            ghi_col = next(
+                (c for c in day_df.columns if "ghi" in c.lower() or "rad" in c.lower()), None)
+            if ghi_col is None:
+                return {"date": target_date, "daily_kwh": None, "error": "No GHI column"}
+            from config.settings import SYSTEM_CAPACITY_KW, STC_IRRADIANCE_W_M2
+            system_kw = SYSTEM_CAPACITY_KW
+            stc_ghi = STC_IRRADIANCE_W_M2
+            slot_hours = 0.25
+            total_kwh = 0.0
+            hourly_kwh: Dict[int, float] = {}
+            for idx, row in day_df.iterrows():
+                ghi = float(row[ghi_col]) if pd.notna(row.get(ghi_col)) else 0.0
+                if ghi <= 0:
+                    continue
+                energy = system_kw * (ghi / stc_ghi) * slot_hours
+                total_kwh += energy
+                h = idx.hour if hasattr(idx, "hour") else 0
+                hourly_kwh[h] = hourly_kwh.get(h, 0) + energy
+            peak_hour = max(hourly_kwh, key=hourly_kwh.get) if hourly_kwh else 12
+            hourly_profile = [
+                {"hour": h, "energy_kwh": round(e, 2)}
+                for h, e in sorted(hourly_kwh.items())
+            ]
+            return {
+                "date": target_date,
+                "daily_kwh": round(total_kwh, 1),
+                "peak_hour": peak_hour,
+                "peak_hour_kwh": round(hourly_kwh.get(peak_hour, 0), 2),
+                "hourly_profile": hourly_profile,
+                "source": f"Analytical estimate (persistence forecast × {system_kw:.0f} kW system)",
+            }
+        except Exception as exc:
+            return {"date": target_date, "daily_kwh": None, "error": f"Prediction failed: {exc}"}
+    def _predict_ml(self, target_date: str) -> Optional[Dict[str, Any]]:
+        """ML energy prediction using latest ThingsBoard Air1 weather as persistence forecast."""
+        from src.energy_predictor import EnergyPredictor
+        ep = EnergyPredictor()
+        # Fetch last 24h of on-site weather (Air1) as persistence forecast
+        end = datetime.now(tz=timezone.utc)
+        start = end - timedelta(hours=24)
+        df = self._client().get_timeseries(
+            "Air1",
+            keys=["GSR", "airTemperature", "windSpeed"],
+            start=start, end=end,
+            limit=500,
+            interval_ms=3_600_000,
+            agg="AVG",
+        )
+        if df.empty or len(df) < 8:
+            return None
+        return ep.predict_day_from_weather_df(target_date, df)
+# ═══════════════════════════════════════════════════════════════════════
+# 5. AdvisoryService  (Gemini day-ahead advisor)
+# ═══════════════════════════════════════════════════════════════════════
+class AdvisoryService(BaseService):
+    """Gemini-powered day-ahead stress advisory."""
+    service_name = "advisory"
+    def __init__(self, vine_sensor_svc: Optional[VineSensorService] = None, verbose: bool = False):
+        self._vine_svc = vine_sensor_svc
+        self._verbose = verbose
+    def run_advisory(self, target_date: Optional[str] = None) -> Dict[str, Any]:
+        """Full DayAheadAdvisor report, enriched with vine snapshot if available."""
+        try:
+            from src.day_ahead_advisor import DayAheadAdvisor
+            from src.ims_client import IMSClient
+            advisor = DayAheadAdvisor(verbose=self._verbose)
+            weather_df = IMSClient().load_cached()
+            if weather_df.empty:
+                return {"error": "No IMS weather data cached. Cannot run advisory."}
+            vine_snapshot = None
+            if self._vine_svc:
+                snap_dict = self._vine_svc.get_snapshot()
+                if "error" not in snap_dict:
+                    # Reconstruct a VineSnapshot-like object for to_advisor_text()
+                    try:
+                        from src.thingsboard_client import ThingsBoardClient
+                        tb = self._vine_svc._client()
+                        vine_snapshot = tb.get_vine_snapshot()
+                    except Exception:
+                        pass
+            report = advisor.advise(
+                date=target_date or str(date.today()),
+                weather_forecast=weather_df,
+                phenological_stage="vegetative",
+                vine_snapshot=vine_snapshot,
+            )
+            return DayAheadAdvisor.report_to_dict(report)
+        except Exception as exc:
+            return {"error": f"Advisory failed: {exc}"}
+# ═══════════════════════════════════════════════════════════════════════
+# 6. BiologyService  (rule lookup — no external deps)
+# ═══════════════════════════════════════════════════════════════════════
+class BiologyService(BaseService):
+    """Biology rules lookup — pure in-memory, no API calls."""
+    service_name = "biology"
+    def __init__(self, rules: Optional[Dict[str, str]] = None):
+        if rules is None:
+            from src.vineyard_chatbot import BIOLOGY_RULES
+            rules = BIOLOGY_RULES
+        self._rules = rules
+    def explain_rule(self, rule_name: str) -> Dict[str, Any]:
+        key = rule_name.lower().strip()
+        if key in self._rules:
+            return {"rule": key, "explanation": self._rules[key]}
+        return {"error": f"Unknown rule '{key}'", "available_rules": list(self._rules.keys())}
+    def list_rules(self) -> Dict[str, Any]:
+        return {"rules": list(self._rules.keys())}
+# ═══════════════════════════════════════════════════════════════════════
+# DataHub  (service registry)
+# ═══════════════════════════════════════════════════════════════════════
+class DataHub:
+    """Lightweight registry of data-provider services.
+    Usage
+    -----
+    hub = DataHub.default()
+    hub.weather.get_current()
+    hub.vine_sensors.get_snapshot()
+    hub.photosynthesis.predict_fvcb(PAR=1500, ...)
+    hub.energy.get_current()
+    The chatbot receives a hub at init and delegates all data access
+    through it — never importing data clients directly.
+    """
+    def __init__(self) -> None:
+        self._services: Dict[str, BaseService] = {}
+    # -- registration --
+    def register(self, service: BaseService) -> None:
+        self._services[service.service_name] = service
+    def get(self, name: str) -> BaseService:
+        if name not in self._services:
+            raise KeyError(f"No service registered as '{name}'. "
+                           f"Available: {list(self._services)}")
+        return self._services[name]
+    # -- typed accessors (convenience, avoids casts everywhere) --
+    @property
+    def weather(self) -> WeatherService:
+        return self._services["weather"]  # type: ignore[return-value]
+    @property
+    def vine_sensors(self) -> VineSensorService:
+        return self._services["vine_sensors"]  # type: ignore[return-value]
+    @property
+    def photosynthesis(self) -> PhotosynthesisService:
+        return self._services["photosynthesis"]  # type: ignore[return-value]
+    @property
+    def energy(self) -> EnergyService:
+        return self._services["energy"]  # type: ignore[return-value]
+    @property
+    def advisory(self) -> AdvisoryService:
+        return self._services["advisory"]  # type: ignore[return-value]
+    @property
+    def biology(self) -> BiologyService:
+        return self._services["biology"]  # type: ignore[return-value]
+    # -- factory --
+    @classmethod
+    def default(cls, verbose: bool = False) -> "DataHub":
+        """Create a hub with all default services (lazy clients)."""
+        hub = cls()
+        vine_svc = VineSensorService()
+        hub.register(WeatherService())
+        hub.register(vine_svc)
+        hub.register(PhotosynthesisService())
+        hub.register(EnergyService())
+        hub.register(AdvisoryService(vine_sensor_svc=vine_svc, verbose=verbose))
+        hub.register(BiologyService())
+        return hub

src/data/data_schema.py ADDED Viewed

	@@ -0,0 +1,519 @@

+"""
+SolarWine 2.0 — Data Schema
+============================
+Canonical dataclasses for the four telemetry tables that flow through
+the 15-minute control loop.
+  SensorRaw         — one-slot snapshot of all on-site + IMS inputs
+  BiologicalState   — photosynthesis model outputs + phenological state
+  TrackerKinematics — tracker position, commands, operational mode
+  SimulationLog     — complete audit record for one 15-min slot
+Storage
+-------
+CSV/Parquet backend via to_dict() / from_dict() helpers. Schema is forward-
+compatible with a future TimescaleDB migration (all timestamps are UTC,
+numeric fields are SI units).
+Unit conventions
+----------------
+Temperatures     : °C
+PAR              : µmol m⁻² s⁻¹
+DLI              : mol m⁻² day⁻¹
+Irradiance (GHI) : W m⁻²
+VPD              : kPa
+CO₂              : ppm
+Angles           : degrees (tilt: + = east-facing, 0 = horizontal, - = west-facing)
+Energy           : kWh
+Soil moisture    : %
+Wind speed       : m s⁻¹
+"""
+from __future__ import annotations
+from dataclasses import asdict, dataclass, field
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional
+from src.utils import cwsi_from_delta_t
+# ---------------------------------------------------------------------------
+# SensorRaw — single 15-min slot of all sensor inputs
+# ---------------------------------------------------------------------------
+@dataclass
+class SensorRaw:
+    """
+    Canonical sensor snapshot for one 15-min control slot.
+    Populated from ThingsBoard (TB) via VineSnapshot for real-time control,
+    or from CSV/Parquet for historical replay. IMS fields are always from
+    the IMS station 43 (Sde Boker) cache.
+    """
+    ts: datetime                              # UTC timestamp of the slot start
+    # --- TB microclimate (treatment area Air2/3/4 average) ---
+    air_temp_c: Optional[float] = None
+    leaf_temp_c: Optional[float] = None
+    vpd_kpa: Optional[float] = None
+    co2_ppm: Optional[float] = None
+    air_leaf_delta_t: Optional[float] = None  # proxy for CWSI
+    humidity_pct: Optional[float] = None
+    dew_temp_c: Optional[float] = None
+    # --- PAR / irradiance ---
+    par_umol: Optional[float] = None          # above-canopy ambient PAR (Air devices)
+    fruiting_zone_par_umol: Optional[float] = None   # mid-canopy PAR (Crop3/5/6/7 avg)
+    ghi_w_m2: Optional[float] = None          # IMS global horizontal irradiance
+    # --- Daily light / spectral indices ---
+    dli_mol_m2: Optional[float] = None        # daily light integral so far
+    ndvi: Optional[float] = None
+    pri: Optional[float] = None
+    # --- Wind & rain ---
+    wind_speed_ms: Optional[float] = None
+    wind_angle_deg: Optional[float] = None
+    rain_mm: Optional[float] = None
+    air_pressure_hpa: Optional[float] = None
+    # --- TB soil (treatment area Soil1/3/5/6 average) ---
+    soil_moisture_pct: Optional[float] = None
+    soil_temp_c: Optional[float] = None
+    soil_ec_ds_m: Optional[float] = None
+    soil_ph: Optional[float] = None
+    # --- TB reference area (Crop1/2/4 avg, open sky) ---
+    reference_crop_par_umol: Optional[float] = None
+    reference_crop_leaf_temp_c: Optional[float] = None
+    reference_soil_moisture_pct: Optional[float] = None
+    # --- Shading effectiveness ---
+    par_shading_ratio: Optional[float] = None   # treatment / reference PAR (<1 = shaded)
+    # --- Derived stress index ---
+    cwsi: Optional[float] = None              # explicit CWSI if available from TB
+    # --- Data provenance ---
+    source: str = "unknown"                   # "thingsboard" | "ims" | "csv" | "mixed"
+    quality_flags: List[str] = field(default_factory=list)
+    # e.g. ["soil5_temp_outlier_excluded", "air3_stale"]
+    # ------------------------------------------------------------------
+    # Factory: build from a VineSnapshot
+    # ------------------------------------------------------------------
+    @classmethod
+    def from_vine_snapshot(cls, snapshot: Any) -> "SensorRaw":
+        """
+        Construct SensorRaw from a ThingsBoardClient.VineSnapshot.
+        The snapshot already contains treatment-vs-reference aggregations
+        and bounded averages; this method simply re-maps them to the
+        canonical SensorRaw field names.
+        """
+        flags: List[str] = []
+        if hasattr(snapshot, "staleness_minutes") and snapshot.staleness_minutes > 20:
+            flags.append(f"stale_{snapshot.staleness_minutes:.0f}min")
+        # CWSI proxy from air-leaf temperature delta (see src.utils.cwsi_from_delta_t)
+        cwsi_proxy: Optional[float] = None
+        delta_t = getattr(snapshot, "treatment_air_leaf_delta_t", None)
+        if delta_t is not None:
+            cwsi_proxy = cwsi_from_delta_t(delta_t=delta_t)
+        return cls(
+            ts=getattr(snapshot, "snapshot_ts", datetime.now(tz=timezone.utc)),
+            # Microclimate
+            air_temp_c=getattr(snapshot, "treatment_air_temp_c", None),
+            leaf_temp_c=getattr(snapshot, "treatment_leaf_temp_c", None)
+                        or getattr(snapshot, "treatment_crop_leaf_temp_c", None),
+            vpd_kpa=getattr(snapshot, "treatment_vpd_kpa", None),
+            co2_ppm=getattr(snapshot, "treatment_co2_ppm", None),
+            air_leaf_delta_t=delta_t,
+            humidity_pct=getattr(snapshot, "ambient_humidity_pct", None),
+            # PAR
+            par_umol=getattr(snapshot, "treatment_par_umol", None),
+            fruiting_zone_par_umol=getattr(snapshot, "treatment_crop_par_umol", None),
+            dli_mol_m2=getattr(snapshot, "treatment_crop_dli_mol_m2", None),
+            ndvi=getattr(snapshot, "treatment_crop_ndvi", None),
+            pri=getattr(snapshot, "treatment_pri", None),
+            # Wind / weather
+            wind_speed_ms=getattr(snapshot, "ambient_wind_speed_ms", None),
+            wind_angle_deg=getattr(snapshot, "ambient_wind_angle_deg", None),
+            rain_mm=getattr(snapshot, "ambient_rain_mm", None),
+            # Soil
+            soil_moisture_pct=getattr(snapshot, "treatment_soil_moisture_pct", None),
+            soil_temp_c=getattr(snapshot, "treatment_soil_temp_c", None),
+            soil_ec_ds_m=getattr(snapshot, "treatment_soil_ec_ds_m", None),
+            soil_ph=getattr(snapshot, "treatment_soil_ph", None),
+            # Reference
+            reference_crop_par_umol=getattr(snapshot, "reference_crop_par_umol", None),
+            reference_crop_leaf_temp_c=getattr(snapshot, "reference_crop_leaf_temp_c", None),
+            reference_soil_moisture_pct=getattr(snapshot, "reference_soil_moisture_pct", None),
+            # Shading ratio
+            par_shading_ratio=getattr(snapshot, "par_shading_ratio", None),
+            cwsi=cwsi_proxy,
+            source="thingsboard",
+            quality_flags=flags,
+        )
+    # ------------------------------------------------------------------
+    # Serialization
+    # ------------------------------------------------------------------
+    def to_dict(self) -> Dict[str, Any]:
+        d = asdict(self)
+        d["ts"] = self.ts.isoformat() if self.ts else None
+        return d
+    @classmethod
+    def from_dict(cls, d: Dict[str, Any]) -> "SensorRaw":
+        d = d.copy()
+        if isinstance(d.get("ts"), str):
+            d["ts"] = datetime.fromisoformat(d["ts"])
+        return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__})
+# ---------------------------------------------------------------------------
+# BiologicalState — photosynthesis model outputs + phenology
+# ---------------------------------------------------------------------------
+@dataclass
+class BiologicalState:
+    """
+    Computed vine physiological state for one control slot.
+    Produced by the FarquharModel (or ML ensemble via RoutingAgent) and
+    the phenology tracker. Drives the InterventionGate and TradeoffEngine.
+    """
+    ts: datetime
+    # --- Photosynthesis model outputs ---
+    a_net_umol: Optional[float] = None        # net carbon assimilation (µmol CO₂ m⁻² s⁻¹)
+    limiting_state: Optional[str] = None      # "rubp" | "rubisco" | "tpu" | "transition"
+    shading_helps: Optional[bool] = None      # True only when Rubisco-limited AND heat is bottleneck
+    # --- Model provenance ---
+    model_used: str = "unknown"               # "fvcb" | "fvcb_semillon" | "ml" | "ml_ensemble"
+    model_confidence: Optional[float] = None  # 0–1 (1 = high confidence in routing choice)
+    # --- Raw inputs echoed for auditing ---
+    par_input: Optional[float] = None
+    tleaf_input: Optional[float] = None
+    vpd_input: Optional[float] = None
+    co2_input: Optional[float] = None
+    # --- Phenological state ---
+    phenological_stage: str = "vegetative"    # vegetative | flowering | veraison | harvest
+    gdd_cumulative: Optional[float] = None    # growing degree days since budburst
+    crop_value_weight: float = 1.0            # seasonal multiplier (1.5× at veraison, 0.5× post-harvest)
+    # --- Stress levels ---
+    heat_stress_level: str = "none"           # none | low | moderate | high | extreme
+    water_stress_level: str = "none"
+    sunburn_risk: bool = False                # True when Tleaf > BERRY_SUNBURN_TEMP_C
+    # --- Fruiting-zone specific ---
+    fruiting_zone_a_net: Optional[float] = None   # A at mid-canopy zone (zone index 1)
+    fruiting_zone_par: Optional[float] = None     # PAR at mid-canopy
+    top_canopy_a_net: Optional[float] = None      # A at top-canopy zone (zone index 2)
+    # ------------------------------------------------------------------
+    def to_dict(self) -> Dict[str, Any]:
+        d = asdict(self)
+        d["ts"] = self.ts.isoformat() if self.ts else None
+        return d
+    @classmethod
+    def from_dict(cls, d: Dict[str, Any]) -> "BiologicalState":
+        d = d.copy()
+        if isinstance(d.get("ts"), str):
+            d["ts"] = datetime.fromisoformat(d["ts"])
+        return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__})
+# ---------------------------------------------------------------------------
+# TrackerKinematics — tracker position and operational mode
+# ---------------------------------------------------------------------------
+@dataclass
+class TrackerKinematics:
+    """
+    Single-axis tracker state for one control slot.
+    astronomical_tilt_deg is always the sun-following position (full-energy).
+    shade_offset_deg is the deliberate deviation for vine protection.
+    effective_tilt_deg = astronomical_tilt_deg + shade_offset_deg.
+    Angle convention: 0° = horizontal, positive = tilted toward east,
+    negative = tilted toward west (consistent with pvlib single-axis sign convention).
+    """
+    ts: datetime
+    # --- Astronomical tracking (default / full-energy position) ---
+    astronomical_tilt_deg: float = 0.0
+    solar_azimuth_deg: Optional[float] = None
+    solar_elevation_deg: Optional[float] = None
+    # --- Shading offset (deliberate protection deviation) ---
+    shade_offset_deg: float = 0.0             # 0 = no protection, positive values = shade intervention
+    effective_tilt_deg: float = 0.0           # astronomical + shade_offset
+    # --- Previous slot (for hysteresis) ---
+    previous_tilt_deg: Optional[float] = None
+    tilt_change_deg: float = 0.0              # effective_tilt - previous_tilt
+    motion_triggered: bool = False            # True if |change| > ANGLE_TOLERANCE_DEG
+    # --- Operational mode ---
+    operational_mode: str = "tracking"        # tracking | wind_stow | heat_shield | harvest_park
+    mode_override_reason: Optional[str] = None
+    # --- Panel surface temperatures ---
+    panel_temp_treatment_c: Optional[float] = None   # Thermocouples1 avg
+    panel_temp_reference_c: Optional[float] = None   # Thermocouples2 avg
+    # ------------------------------------------------------------------
+    def to_dict(self) -> Dict[str, Any]:
+        d = asdict(self)
+        d["ts"] = self.ts.isoformat() if self.ts else None
+        return d
+    @classmethod
+    def from_dict(cls, d: Dict[str, Any]) -> "TrackerKinematics":
+        d = d.copy()
+        if isinstance(d.get("ts"), str):
+            d["ts"] = datetime.fromisoformat(d["ts"])
+        return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__})
+# ---------------------------------------------------------------------------
+# SimulationLog — complete audit record for one 15-min slot
+# ---------------------------------------------------------------------------
+@dataclass
+class SimulationLog:
+    """
+    Full audit record for one 15-minute control loop execution.
+    Written to `data/simulation_log.parquet` (or CSV) after every slot.
+    Used for replay, validation, ROI reporting, and Phase 7 integration tests.
+    """
+    ts: datetime
+    slot_index: int                           # 0–95 for a 24-hour day (96 × 15-min slots)
+    date_str: str = ""                        # YYYY-MM-DD local date for partitioning
+    # --- Nested state objects ---
+    sensor: Optional[SensorRaw] = None
+    bio: Optional[BiologicalState] = None
+    kinematics: Optional[TrackerKinematics] = None
+    # --- InterventionGate outcome ---
+    intervention_gate_passed: bool = False
+    gate_rejection_reason: Optional[str] = None
+    # Rejection categories: "no_shade_window:morning" | "no_shade_window:may" |
+    #   "overcast" | "below_temp_threshold" | "below_cwsi_threshold" | "budget_exhausted"
+    # --- TradeoffEngine outcome ---
+    candidate_offsets_tested: List[float] = field(default_factory=list)
+    chosen_offset_deg: float = 0.0
+    minimum_dose_rationale: Optional[str] = None
+    # e.g. "offset 5° sufficient: fruiting PAR reduced below 400 µmol/m²/s"
+    # --- Safety rails ---
+    fvcb_a: Optional[float] = None
+    ml_a: Optional[float] = None
+    model_divergence_pct: Optional[float] = None   # |fvcb_a - ml_a| / max * 100
+    safety_fallback_triggered: bool = False
+    routing_decision: Optional[str] = None    # "fvcb" | "ml" — which model was used
+    # --- Energy budget accounting ---
+    energy_fraction_this_slot: float = 0.0    # fraction of max generation sacrificed
+    budget_remaining_daily_kwh: Optional[float] = None
+    budget_remaining_weekly_kwh: Optional[float] = None
+    budget_remaining_monthly_kwh: Optional[float] = None
+    # --- Feedback (filled in the following slot) ---
+    a_net_actual: Optional[float] = None      # measured A in next slot (for validation)
+    a_net_improvement_pct: Optional[float] = None   # vs unshaded counterfactual
+    # --- Explainability tags ---
+    decision_tags: List[str] = field(default_factory=list)
+    # e.g. ["rubisco_limited", "dose:5deg", "veraison_1.5x", "budget_ok:32%_remaining"]
+    # ------------------------------------------------------------------
+    # Serialization
+    # ------------------------------------------------------------------
+    def to_dict(self) -> Dict[str, Any]:
+        """Deep-serialize to a plain dict (JSON-serializable)."""
+        d: Dict[str, Any] = {
+            "ts": self.ts.isoformat() if self.ts else None,
+            "slot_index": self.slot_index,
+            "date_str": self.date_str,
+            "sensor": self.sensor.to_dict() if self.sensor else None,
+            "bio": self.bio.to_dict() if self.bio else None,
+            "kinematics": self.kinematics.to_dict() if self.kinematics else None,
+            "intervention_gate_passed": self.intervention_gate_passed,
+            "gate_rejection_reason": self.gate_rejection_reason,
+            "candidate_offsets_tested": self.candidate_offsets_tested,
+            "chosen_offset_deg": self.chosen_offset_deg,
+            "minimum_dose_rationale": self.minimum_dose_rationale,
+            "fvcb_a": self.fvcb_a,
+            "ml_a": self.ml_a,
+            "model_divergence_pct": self.model_divergence_pct,
+            "safety_fallback_triggered": self.safety_fallback_triggered,
+            "routing_decision": self.routing_decision,
+            "energy_fraction_this_slot": self.energy_fraction_this_slot,
+            "budget_remaining_daily_kwh": self.budget_remaining_daily_kwh,
+            "budget_remaining_weekly_kwh": self.budget_remaining_weekly_kwh,
+            "budget_remaining_monthly_kwh": self.budget_remaining_monthly_kwh,
+            "a_net_actual": self.a_net_actual,
+            "a_net_improvement_pct": self.a_net_improvement_pct,
+            "decision_tags": self.decision_tags,
+        }
+        return d
+    def to_flat_row(self) -> Dict[str, Any]:
+        """
+        Flatten all nested objects into a single dict row suitable for
+        appending to a Parquet or CSV log file.
+        Nested field names are prefixed: sensor__*, bio__*, kinematics__*.
+        """
+        row: Dict[str, Any] = {
+            "ts": self.ts.isoformat() if self.ts else None,
+            "slot_index": self.slot_index,
+            "date_str": self.date_str,
+            "gate_passed": self.intervention_gate_passed,
+            "gate_reason": self.gate_rejection_reason,
+            "chosen_offset_deg": self.chosen_offset_deg,
+            "fvcb_a": self.fvcb_a,
+            "ml_a": self.ml_a,
+            "divergence_pct": self.model_divergence_pct,
+            "fallback": self.safety_fallback_triggered,
+            "routing": self.routing_decision,
+            "energy_fraction": self.energy_fraction_this_slot,
+            "budget_daily_kwh": self.budget_remaining_daily_kwh,
+            "budget_monthly_kwh": self.budget_remaining_monthly_kwh,
+            "a_net_actual": self.a_net_actual,
+            "a_net_improvement_pct": self.a_net_improvement_pct,
+            "tags": "|".join(self.decision_tags),
+        }
+        if self.sensor:
+            for k, v in self.sensor.to_dict().items():
+                if k not in ("ts", "quality_flags", "source"):
+                    row[f"sensor__{k}"] = v
+        if self.bio:
+            for k, v in self.bio.to_dict().items():
+                if k != "ts":
+                    row[f"bio__{k}"] = v
+        if self.kinematics:
+            for k, v in self.kinematics.to_dict().items():
+                if k != "ts":
+                    row[f"kin__{k}"] = v
+        return row
+# ---------------------------------------------------------------------------
+# Public convenience re-exports from VineSnapshot
+# ---------------------------------------------------------------------------
+def sensor_raw_from_vine_snapshot(snapshot: Any) -> SensorRaw:
+    """Module-level alias for SensorRaw.from_vine_snapshot()."""
+    return SensorRaw.from_vine_snapshot(snapshot)
+# ---------------------------------------------------------------------------
+# Quick self-test
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    import json
+    from datetime import timezone
+    now = datetime.now(tz=timezone.utc)
+    sensor = SensorRaw(
+        ts=now,
+        air_temp_c=33.5,
+        leaf_temp_c=35.1,
+        vpd_kpa=2.9,
+        co2_ppm=410.0,
+        fruiting_zone_par_umol=820.0,
+        soil_moisture_pct=31.2,
+        reference_crop_par_umol=1150.0,
+        par_shading_ratio=0.71,
+        source="thingsboard",
+    )
+    bio = BiologicalState(
+        ts=now,
+        a_net_umol=14.3,
+        limiting_state="rubisco",
+        shading_helps=True,
+        model_used="fvcb_semillon",
+        phenological_stage="veraison",
+        crop_value_weight=1.5,
+        heat_stress_level="moderate",
+        sunburn_risk=True,
+    )
+    kin = TrackerKinematics(
+        ts=now,
+        astronomical_tilt_deg=42.0,
+        shade_offset_deg=5.0,
+        effective_tilt_deg=47.0,
+        previous_tilt_deg=42.0,
+        tilt_change_deg=5.0,
+        motion_triggered=True,
+        operational_mode="tracking",
+        panel_temp_treatment_c=58.3,
+    )
+    log = SimulationLog(
+        ts=now,
+        slot_index=52,
+        date_str="2025-07-15",
+        sensor=sensor,
+        bio=bio,
+        kinematics=kin,
+        intervention_gate_passed=True,
+        candidate_offsets_tested=[3.0, 5.0],
+        chosen_offset_deg=5.0,
+        minimum_dose_rationale="5° sufficient to reduce fruiting-zone PAR below 400",
+        fvcb_a=14.3,
+        ml_a=14.8,
+        model_divergence_pct=3.4,
+        routing_decision="fvcb_semillon",
+        energy_fraction_this_slot=0.042,
+        budget_remaining_daily_kwh=8.1,
+        decision_tags=["rubisco_limited", "dose:5deg", "veraison_1.5x", "budget_ok"],
+    )
+    print("SensorRaw:")
+    print(json.dumps(sensor.to_dict(), indent=2, default=str))
+    print("\nBiologicalState:")
+    print(json.dumps(bio.to_dict(), indent=2, default=str))
+    print("\nTrackerKinematics:")
+    print(json.dumps(kin.to_dict(), indent=2, default=str))
+    print("\nSimulationLog flat row keys:")
+    row = log.to_flat_row()
+    print(f"  {len(row)} columns")
+    print("  First 10:", list(row.keys())[:10])
+    print("\nSensorRaw round-trip:")
+    s2 = SensorRaw.from_dict(sensor.to_dict())
+    assert s2.air_temp_c == sensor.air_temp_c
+    assert isinstance(s2.ts, datetime)
+    print("  OK")

src/data/ims_client.py ADDED Viewed

	@@ -0,0 +1,215 @@

+"""
+IMSClient: fetch and cache IMS weather data from station 43 (Sde Boker).
+Resamples 10min data to 15min for alignment with sensor data.
+"""
+import os
+import time
+from pathlib import Path
+from typing import Optional
+import pandas as pd
+import requests
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    pass
+def _parse_ims_date(d: str) -> str:
+    """Convert YYYY-MM-DD to IMS format YYYY/MM/DD."""
+    return d.replace("-", "/")
+class IMSClient:
+    """Fetch IMS API data for a station and cache to Data/ims/."""
+    def __init__(
+        self,
+        token: Optional[str] = None,
+        station_id: Optional[int] = None,
+        cache_dir: Optional[Path] = None,
+        channel_map: Optional[dict[int, str]] = None,
+    ):
+        from config import settings
+        self.token = (token or os.environ.get("IMS_API_TOKEN", "")).strip()
+        if not self.token:
+            raise ValueError(
+                "IMS API token is required. Set IMS_API_TOKEN in .env, "
+                "in Streamlit Secrets, or pass token= to IMSClient."
+            )
+        self.station_id = station_id or settings.IMS_STATION_ID
+        self.cache_dir = cache_dir or settings.IMS_CACHE_DIR
+        self.channel_map = channel_map or settings.IMS_CHANNEL_MAP.copy()
+        self._base = f"{settings.IMS_BASE_URL}/{self.station_id}/data"
+        self._stations_url = settings.IMS_BASE_URL
+    def get_station_metadata(self, station_id: Optional[int] = None) -> dict:
+        """
+        Fetch station metadata from IMS API (name, location, monitors/channels).
+        Returns dict with 'stationId', 'name', 'monitors' (list of {channelId, name, units, ...}).
+        """
+        sid = station_id or self.station_id
+        url = f"{self._stations_url}/{sid}"
+        headers = {"Authorization": f"ApiToken {self.token}"}
+        r = requests.get(url, headers=headers, timeout=30)
+        r.raise_for_status()
+        return r.json()
+    def list_channels(self, station_id: Optional[int] = None) -> list[dict]:
+        """Return list of channel descriptors for the station (channelId, name, units, active)."""
+        meta = self.get_station_metadata(station_id)
+        monitors = meta.get("monitors", meta.get("channelGroups", []))
+        # Flatten if nested; IMS may return list of { channelId, name, ... }
+        out = []
+        for m in monitors:
+            if isinstance(m, dict):
+                out.append({
+                    "channelId": m.get("channelId", m.get("id")),
+                    "name": m.get("name", m.get("channelName", "")),
+                    "units": m.get("units", ""),
+                    "active": m.get("active", True),
+                })
+        return out
+    def fetch_channel(
+        self,
+        channel_id: int,
+        from_date: str,
+        to_date: str,
+    ) -> pd.DataFrame:
+        """
+        Fetch one channel for date range. Dates as YYYY-MM-DD.
+        Returns DataFrame with timestamp_utc and one value column.
+        """
+        from_f = _parse_ims_date(from_date)
+        to_f = _parse_ims_date(to_date)
+        url = f"{self._base}/{channel_id}?from={from_f}&to={to_f}"
+        headers = {"Authorization": f"ApiToken {self.token}"}
+        r = requests.get(url, headers=headers, timeout=120)
+        r.raise_for_status()
+        if not r.text or not r.text.strip():
+            return pd.DataFrame()
+        try:
+            raw = r.json()
+        except Exception:
+            return pd.DataFrame()
+        data = raw.get("data", raw) if isinstance(raw, dict) else raw
+        if not isinstance(data, list):
+            data = []
+        col_name = self.channel_map.get(channel_id, f"channel_{channel_id}")
+        rows = []
+        for item in data:
+            dt = item.get("datetime")
+            # IMS returns Israel time (Asia/Jerusalem); parse and convert to UTC
+            if isinstance(dt, str):
+                ts = pd.to_datetime(dt)
+                if ts.tzinfo is None:
+                    ts = ts.tz_localize("Asia/Jerusalem").tz_convert("UTC")
+                else:
+                    ts = ts.tz_convert("UTC")
+            else:
+                continue
+            ch_list = item.get("channels", [])
+            val = None
+            for ch in ch_list:
+                if ch.get("id") == channel_id and ch.get("status") == 1:
+                    val = ch.get("value")
+                    break
+            rows.append({"timestamp_utc": ts, col_name: val})
+        df = pd.DataFrame(rows)
+        if not df.empty:
+            df = df.dropna(subset=[col_name])
+            df = df.set_index("timestamp_utc").sort_index()
+        return df
+    def fetch_all_channels(
+        self,
+        from_date: str,
+        to_date: str,
+        delay_seconds: float = 0.5,
+    ) -> pd.DataFrame:
+        """Fetch all configured channels and merge on timestamp_utc."""
+        out = None
+        for ch_id, col_name in self.channel_map.items():
+            df = self.fetch_channel(ch_id, from_date, to_date)
+            if df.empty:
+                continue
+            df = df.rename(columns={c: c for c in df.columns})
+            if out is None:
+                out = df
+            else:
+                out = out.join(df, how="outer")
+            time.sleep(delay_seconds)
+        if out is None:
+            return pd.DataFrame()
+        out = out.reset_index()
+        return out
+    def resample_to_15min(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Resample 10min IMS data to 15min (mean). Expects timestamp_utc column."""
+        if df.empty or "timestamp_utc" not in df.columns:
+            return df
+        d = df.set_index("timestamp_utc")
+        d = d.resample("15min").mean().dropna(how="all")
+        return d.reset_index()
+    def load_cached(self, cache_path: Optional[Path] = None) -> pd.DataFrame:
+        """Load merged IMS data from cache file if it exists."""
+        path = cache_path or (self.cache_dir / "ims_merged_15min.csv")
+        if not path.exists():
+            return pd.DataFrame()
+        df = pd.read_csv(path)
+        if "timestamp_utc" in df.columns:
+            df["timestamp_utc"] = pd.to_datetime(df["timestamp_utc"], utc=True)
+        return df
+    def fetch_and_cache(
+        self,
+        from_date: str,
+        to_date: str,
+        cache_path: Optional[Path] = None,
+        chunk_days: Optional[int] = 60,
+    ) -> pd.DataFrame:
+        """
+        Fetch all channels for the date range, resample to 15min, save to cache.
+        If chunk_days is set, split the range into chunks to avoid API empty responses.
+        """
+        path = cache_path or (self.cache_dir / "ims_merged_15min.csv")
+        path.parent.mkdir(parents=True, exist_ok=True)
+        from datetime import datetime, timedelta
+        start = datetime.strptime(from_date, "%Y-%m-%d").date()
+        end = datetime.strptime(to_date, "%Y-%m-%d").date()
+        if start > end:
+            start, end = end, start
+        if chunk_days is None or (end - start).days <= chunk_days:
+            df = self.fetch_all_channels(from_date, to_date)
+        else:
+            chunks = []
+            d = start
+            while d < end:
+                chunk_end = min(d + timedelta(days=chunk_days), end)
+                from_s = d.strftime("%Y-%m-%d")
+                to_s = chunk_end.strftime("%Y-%m-%d")
+                try:
+                    df_chunk = self.fetch_all_channels(from_s, to_s)
+                    if not df_chunk.empty:
+                        chunks.append(df_chunk)
+                except Exception:
+                    pass  # skip failed chunk, continue
+                d = chunk_end
+            df = pd.concat(chunks, ignore_index=True) if chunks else pd.DataFrame()
+            if not df.empty and "timestamp_utc" in df.columns:
+                df = df.drop_duplicates(subset=["timestamp_utc"]).sort_values("timestamp_utc")
+        if df.empty:
+            return df
+        df = self.resample_to_15min(df)
+        df.to_csv(path, index=False)
+        return df

src/data/redis_cache.py ADDED Viewed

	@@ -0,0 +1,152 @@

+"""
+Thin Redis wrapper for cross-process caching (Upstash Redis REST API).
+Falls back gracefully to ``None`` returns when Redis is unavailable,
+so callers can use in-memory TTLCache as a fallback.
+Usage::
+    from src.data.redis_cache import get_redis
+    redis = get_redis()            # None if no UPSTASH_REDIS_URL
+    if redis:
+        redis.set_json("weather:current", data, ttl=1800)
+        cached = redis.get_json("weather:current")
+"""
+from __future__ import annotations
+import json
+import logging
+import os
+import threading
+from typing import Any, Optional
+log = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Singleton (thread-safe)
+# ---------------------------------------------------------------------------
+_instance: Optional["RedisCache"] = None
+_lock = threading.Lock()
+def get_redis() -> Optional["RedisCache"]:
+    """Return the global RedisCache instance, or *None* if not configured."""
+    global _instance
+    # Fast path (no lock)
+    if _instance is not None:
+        return _instance
+    url = os.environ.get("UPSTASH_REDIS_URL")
+    token = os.environ.get("UPSTASH_REDIS_TOKEN")
+    if not url or not token:
+        log.debug("Redis not configured (UPSTASH_REDIS_URL / UPSTASH_REDIS_TOKEN missing)")
+        return None
+    with _lock:
+        # Double-check after acquiring lock
+        if _instance is not None:
+            return _instance
+        try:
+            _instance = RedisCache(url=url, token=token)
+            log.info("Redis connected: %s", url.split("@")[-1] if "@" in url else url[:40])
+            return _instance
+        except Exception as exc:
+            log.error("Redis init failed: %s", exc)
+            return None
+# ---------------------------------------------------------------------------
+# RedisCache (Upstash REST)
+# ---------------------------------------------------------------------------
+class RedisCache:
+    """Minimal Redis cache using the Upstash REST API (no native driver needed)."""
+    def __init__(self, url: str, token: str):
+        self._url = url.rstrip("/")
+        self._headers = {"Authorization": f"Bearer {token}"}
+        # Lazy import — requests is already a project dependency
+        import requests as _req
+        self._req = _req
+        # Connectivity check
+        resp = self._req.get(f"{self._url}/ping", headers=self._headers, timeout=5)
+        resp.raise_for_status()
+    # -- JSON helpers -------------------------------------------------------
+    def get_json(self, key: str) -> Optional[Any]:
+        """Retrieve and JSON-decode a key. Returns None on miss or error."""
+        try:
+            resp = self._req.get(
+                f"{self._url}/get/{key}",
+                headers=self._headers,
+                timeout=5,
+            )
+            resp.raise_for_status()
+            result = resp.json().get("result")
+            if result is None:
+                return None
+            return json.loads(result)
+        except Exception as exc:
+            log.debug("Redis GET %s failed: %s", key, exc)
+            return None
+    def set_json(self, key: str, value: Any, ttl: int = 300) -> bool:
+        """JSON-encode and store *value* with a TTL in seconds."""
+        try:
+            payload = json.dumps(value, default=str)
+            # Upstash REST API: POST pipeline format
+            resp = self._req.post(
+                f"{self._url}/pipeline",
+                headers={**self._headers, "Content-Type": "application/json"},
+                json=[["SET", key, payload, "EX", str(ttl)]],
+                timeout=5,
+            )
+            resp.raise_for_status()
+            return True
+        except Exception as exc:
+            log.debug("Redis SET %s failed: %s", key, exc)
+            return False
+    def delete(self, key: str) -> bool:
+        """Delete a key."""
+        try:
+            resp = self._req.get(
+                f"{self._url}/del/{key}",
+                headers=self._headers,
+                timeout=5,
+            )
+            resp.raise_for_status()
+            return True
+        except Exception as exc:
+            log.debug("Redis DEL %s failed: %s", key, exc)
+            return False
+    def exists(self, key: str) -> bool:
+        """Check if a key exists."""
+        try:
+            resp = self._req.get(
+                f"{self._url}/exists/{key}",
+                headers=self._headers,
+                timeout=5,
+            )
+            resp.raise_for_status()
+            return resp.json().get("result", 0) == 1
+        except Exception:
+            return False
+    def ping(self) -> bool:
+        """Health check."""
+        try:
+            resp = self._req.get(
+                f"{self._url}/ping",
+                headers=self._headers,
+                timeout=5,
+            )
+            return resp.status_code == 200
+        except Exception:
+            return False

src/data/sensor_data_loader.py ADDED Viewed

	@@ -0,0 +1,87 @@

+"""
+SensorDataLoader: load and filter sensors_wide.csv for Stage 1 (Farquhar model).
+Uses only on-site sensor data from the sensor data directory.
+"""
+from pathlib import Path
+from typing import Optional
+import pandas as pd
+# Stage 1 columns (Farquhar + CWSI) per context/2_plan.md
+STAGE1_COLUMNS = [
+    "Air1_PAR_ref",
+    "Air1_leafTemperature_ref",
+    "Air1_airTemperature_ref",
+    "Air1_CO2_ref",
+    "Air1_VPD_ref",
+    "Air1_airHumidity_ref",
+]
+# Optional spectral indices (Crop sensors); include if present
+STAGE1_OPTIONAL = ["Air1_NDVI_ref", "Air1_PRI_ref", "Air1_rNDVI_ref", "Air1_RENDVI_ref"]
+# Default timestamp column name in wide CSV
+DEFAULT_TIMESTAMP_COL = "time"
+class SensorDataLoader:
+    """Load sensors_wide.csv and provide Stage 1 columns and daytime filter."""
+    def __init__(
+        self,
+        data_path: Optional[Path] = None,
+        metadata_path: Optional[Path] = None,
+    ):
+        from config import settings
+        _default = settings.SENSORS_WIDE_PATH
+        if not _default.exists() and settings.SENSORS_WIDE_SAMPLE_PATH.exists():
+            _default = settings.SENSORS_WIDE_SAMPLE_PATH
+        self.data_path = data_path or _default
+        self.metadata_path = metadata_path or settings.SENSORS_WIDE_METADATA_PATH
+    def get_stage1_columns(self) -> list[str]:
+        """Return list of column names required for Stage 1 (Farquhar + CWSI)."""
+        return list(STAGE1_COLUMNS)
+    def load(
+        self,
+        columns: Optional[list[str]] = None,
+        timestamp_col: Optional[str] = None,
+    ) -> pd.DataFrame:
+        """
+        Load sensors_wide.csv. If columns is None, load all Stage 1 columns
+        plus timestamp. Columns not present are dropped from the request.
+        """
+        ts_col = timestamp_col or DEFAULT_TIMESTAMP_COL
+        use_cols = columns if columns is not None else self.get_stage1_columns()
+        use_cols = [c for c in use_cols if c != ts_col]
+        if ts_col not in use_cols:
+            use_cols = [ts_col] + use_cols
+        df = pd.read_csv(self.data_path, usecols=lambda c: c in use_cols)
+        missing = [c for c in use_cols if c not in df.columns]
+        if missing:
+            raise ValueError(
+                f"Sensor data missing required columns: {missing}. "
+                f"Available: {list(df.columns)[:20]}{'...' if len(df.columns) > 20 else ''}"
+            )
+        if ts_col in df.columns:
+            df[ts_col] = pd.to_datetime(df[ts_col], utc=True)
+            df = df.sort_values(ts_col).reset_index(drop=True)
+        # Correct Air1_CO2_ref — raw sensor reads ≈ 30% too high
+        if "Air1_CO2_ref" in df.columns:
+            df["Air1_CO2_ref"] = df["Air1_CO2_ref"] * 0.7
+        return df
+    def filter_daytime(
+        self,
+        df: pd.DataFrame,
+        par_threshold: float = 50.0,
+        par_column: str = "Air1_PAR_ref",
+    ) -> pd.DataFrame:
+        """Keep only rows where PAR > par_threshold (daytime, umol m-2 s-1)."""
+        if par_column not in df.columns:
+            return df
+        return df.loc[df[par_column] > par_threshold].copy()

src/data/thingsboard_client.py ADDED Viewed

	@@ -0,0 +1,1058 @@

+"""
+ThingsBoardClient: live telemetry client for the Seymour vineyard at
+web.seymouragri.com.
+Device layout
+-------------
+TREATMENT area (rows 501–502, under solar panels):
+  Air2, Air3, Air4          — microclimate sensors under the panels
+  Crop3, Crop5, Crop6, Crop7 — fruiting-zone crop sensors (per panel position)
+  Soil1, Soil3, Soil5, Soil6 — root-zone soil probes
+  Irrigation1               — irrigation flow/volume/quality logger
+  Thermocouples-1           — panel surface temperature (4 positions)
+REFERENCE area (rows 503–504, open sky, no panels):
+  Crop1, Crop2, Crop4       — fruiting-zone crop sensors (no shading)
+  Soil2, Soil4, Soil7, Soil9 — root-zone soil probes
+  Thermocouples-2           — structural/ambient thermocouple reference
+AMBIENT (site-level outdoor baseline):
+  Air1                      — outdoor climate station (above canopy, no panel)
+Credentials (env vars or .env):
+  THINGSBOARD_HOST      — default https://web.seymouragri.com
+  THINGSBOARD_USERNAME  — tenant login email
+  THINGSBOARD_PASSWORD  — tenant login password
+  THINGSBOARD_TOKEN     — pre-generated JWT (takes priority over user/pass)
+"""
+from __future__ import annotations
+import math
+import os
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from enum import Enum
+from typing import Any, Dict, List, Optional, Tuple
+import pandas as pd
+import requests
+# ---------------------------------------------------------------------------
+# Enumerations
+# ---------------------------------------------------------------------------
+class VineArea(str, Enum):
+    TREATMENT = "treatment"   # under solar panels
+    REFERENCE = "reference"   # open sky, no panels
+    AMBIENT   = "ambient"     # site-level outdoor baseline
+# ---------------------------------------------------------------------------
+# Device registry
+# ---------------------------------------------------------------------------
+@dataclass(frozen=True)
+class DeviceInfo:
+    uuid: str
+    device_id: int
+    area: VineArea
+    row: Optional[int]
+    label: str
+#: Full device registry mapping short name → DeviceInfo.
+#: UUIDs are from devices.csv in the Research/PV_Vine_Tradeoff repository.
+DEVICE_REGISTRY: Dict[str, DeviceInfo] = {
+    "Air1": DeviceInfo(
+        uuid="373041f0-089a-11ef-9126-b746c27d34bd", device_id=4,
+        area=VineArea.AMBIENT, row=None, label="Outdoor Climate (ambient baseline)",
+    ),
+    "Air2": DeviceInfo(
+        uuid="37bf89a0-089a-11ef-9126-b746c27d34bd", device_id=5,
+        area=VineArea.TREATMENT, row=501, label="Indoor Climate Row 501 (under panels)",
+    ),
+    "Air3": DeviceInfo(
+        uuid="3860aba0-089a-11ef-9126-b746c27d34bd", device_id=6,
+        area=VineArea.TREATMENT, row=502, label="Indoor Climate Row 502 (under panels)",
+    ),
+    "Air4": DeviceInfo(
+        uuid="04452660-7114-11ef-9360-f1ed9d9dc643", device_id=7,
+        area=VineArea.TREATMENT, row=502, label="Treatment Row 502 North (under panels)",
+    ),
+    "Crop1": DeviceInfo(
+        uuid="39224df0-089a-11ef-9126-b746c27d34bd", device_id=8,
+        area=VineArea.REFERENCE, row=503, label="Reference crop Row 503",
+    ),
+    "Crop2": DeviceInfo(
+        uuid="aa0d9970-7113-11ef-9360-f1ed9d9dc643", device_id=9,
+        area=VineArea.REFERENCE, row=503, label="Control crop Row 503",
+    ),
+    "Crop3": DeviceInfo(
+        uuid="859b3ce0-29dd-11f0-96bc-55874793181d", device_id=10,
+        area=VineArea.TREATMENT, row=502, label="Treatment 502 – West Bottom",
+    ),
+    "Crop4": DeviceInfo(
+        uuid="889765e0-29dd-11f0-96bc-55874793181d", device_id=11,
+        area=VineArea.REFERENCE, row=502, label="Control crop Row 502 (reference vine)",
+    ),
+    "Crop5": DeviceInfo(
+        uuid="8b092930-29dd-11f0-96bc-55874793181d", device_id=12,
+        area=VineArea.TREATMENT, row=502, label="Treatment 502 – East Upper",
+    ),
+    "Crop6": DeviceInfo(
+        uuid="8cce31c0-29dd-11f0-96bc-55874793181d", device_id=13,
+        area=VineArea.TREATMENT, row=502, label="Treatment 502 – East Bottom",
+    ),
+    "Crop7": DeviceInfo(
+        uuid="8e7440a0-29dd-11f0-96bc-55874793181d", device_id=14,
+        area=VineArea.TREATMENT, row=502, label="Treatment 502 – West Upper",
+    ),
+    "Soil1": DeviceInfo(
+        uuid="3586b0a0-089a-11ef-9126-b746c27d34bd", device_id=16,
+        area=VineArea.TREATMENT, row=502, label="Soil Row 502 (treatment)",
+    ),
+    "Soil2": DeviceInfo(
+        uuid="35cda4b0-089a-11ef-9126-b746c27d34bd", device_id=17,
+        area=VineArea.REFERENCE, row=503, label="Soil Row 503 (reference)",
+    ),
+    "Soil3": DeviceInfo(
+        uuid="3634caf0-089a-11ef-9126-b746c27d34bd", device_id=18,
+        area=VineArea.TREATMENT, row=501, label="Soil Row 501 (treatment)",
+    ),
+    "Soil4": DeviceInfo(
+        uuid="36a4cad0-089a-11ef-9126-b746c27d34bd", device_id=19,
+        area=VineArea.REFERENCE, row=504, label="Soil Row 504 Control",
+    ),
+    "Soil5": DeviceInfo(
+        uuid="77d55280-70e7-11ef-9360-f1ed9d9dc643", device_id=20,
+        area=VineArea.TREATMENT, row=502, label="Treatment Row 502 South",
+    ),
+    "Soil6": DeviceInfo(
+        uuid="7e4e4630-70e7-11ef-9360-f1ed9d9dc643", device_id=21,
+        area=VineArea.TREATMENT, row=502, label="Treatment Row 502 North",
+    ),
+    "Soil7": DeviceInfo(
+        uuid="842e5540-70e7-11ef-9360-f1ed9d9dc643", device_id=22,
+        area=VineArea.REFERENCE, row=504, label="Control 504 South",
+    ),
+    "Soil9": DeviceInfo(
+        uuid="91e44ff0-70e7-11ef-9360-f1ed9d9dc643", device_id=23,
+        area=VineArea.REFERENCE, row=504, label="Control 504 South (2nd probe)",
+    ),
+    "Irrigation1": DeviceInfo(
+        uuid="3a066c60-089a-11ef-9126-b746c27d34bd", device_id=15,
+        area=VineArea.TREATMENT, row=502, label="Irrigation Row 502",
+    ),
+    "Thermocouples1": DeviceInfo(
+        uuid="72ce88f0-c548-11ef-8bc2-fdab9f3349b7", device_id=2,
+        area=VineArea.TREATMENT, row=502, label="Panel surface temps Treatment 502",
+    ),
+    "Thermocouples2": DeviceInfo(
+        uuid="03e40ba0-cc0e-11ef-a2e9-55874793181d", device_id=3,
+        area=VineArea.REFERENCE, row=None, label="Panel/structure surface temps Reference",
+    ),
+    # Tracker controllers (panel angle + mode)
+    "Tracker501": DeviceInfo(
+        uuid="aac06e50-f769-11f0-b902-5ff1ea8c4cf9", device_id=0,
+        area=VineArea.TREATMENT, row=501, label="Tracker row 501",
+    ),
+    "Tracker502": DeviceInfo(
+        uuid="b99bd630-f769-11f0-b902-5ff1ea8c4cf9", device_id=0,
+        area=VineArea.TREATMENT, row=502, label="Tracker row 502",
+    ),
+    "Tracker503": DeviceInfo(
+        uuid="caffe4c0-f769-11f0-b902-5ff1ea8c4cf9", device_id=0,
+        area=VineArea.TREATMENT, row=503, label="Tracker row 503",
+    ),
+    "Tracker509": DeviceInfo(
+        uuid="bacf7c50-fcdc-11f0-b902-5ff1ea8c4cf9", device_id=0,
+        area=VineArea.TREATMENT, row=509, label="Tracker row 509",
+    ),
+}
+# ---------------------------------------------------------------------------
+# Asset registry (non-device entities — e.g. the plant-level energy asset)
+# ---------------------------------------------------------------------------
+@dataclass(frozen=True)
+class AssetInfo:
+    uuid: str
+    label: str
+ASSET_REGISTRY: Dict[str, AssetInfo] = {
+    "Plant": AssetInfo(
+        uuid="dc94ddb0-dbe6-11f0-9352-a53ca0b6a212",
+        label="Yeruham Vineyard — plant-level energy",
+    ),
+}
+ENERGY_KEYS: List[str] = ["power", "production"]
+TRACKER_KEYS: List[str] = ["angle", "manualMode", "setAngle", "setMode"]
+# ---------------------------------------------------------------------------
+# Telemetry key sets per device type
+# ---------------------------------------------------------------------------
+AIR_KEYS: List[str] = [
+    "airTemperature", "leafTemperature", "VPD", "CO2", "PAR", "DLI",
+    "airHumidity", "windSpeed", "windAngle", "rain", "airPressure",
+    "dewTemperature", "NDVI", "PRI", "airLeafDeltaT",
+]
+CROP_KEYS: List[str] = [
+    "PAR", "leafTemperature", "NDVI", "PRI", "DLI", "PARAvg1H", "PARAvg24H",
+]
+SOIL_KEYS: List[str] = [
+    "soilMoisture", "soilMoisture2",
+    "soilTemperature", "soilTemperature2",
+    "soilBulkEC", "soilpH",
+]
+IRRIGATION_KEYS: List[str] = [
+    "irrigationVolume", "irrigationMinutes", "irrigationFlowRate",
+    "irrigationEC", "irrigationPH", "waterTemperature",
+    "irrigationCycleVolume", "irrigationCycleMinutes",
+]
+THERMOCOUPLE_KEYS: List[str] = [
+    "thermocoupleTemperature_1", "thermocoupleTemperature_2",
+    "thermocoupleTemperature_3", "thermocoupleTemperature_4",
+]
+# ---------------------------------------------------------------------------
+# VineSnapshot dataclass
+# ---------------------------------------------------------------------------
+@dataclass
+class VineSnapshot:
+    """
+    Aggregated real-time vine state from all ThingsBoard sensors.
+    Fields are grouped by area:
+      - ambient   : Air1 (outdoor climate, site-level baseline)
+      - treatment : under solar panels (rows 501–502)
+      - reference : open sky / no panels (rows 503–504)
+    None means the sensor did not return a value.
+    """
+    snapshot_ts: datetime
+    staleness_minutes: float
+    # --- Ambient (Air1, outdoor baseline) ---
+    ambient_temp_c: Optional[float] = None
+    ambient_humidity_pct: Optional[float] = None
+    ambient_wind_speed_ms: Optional[float] = None
+    ambient_wind_angle_deg: Optional[float] = None
+    ambient_rain_mm: Optional[float] = None
+    # --- Treatment microclimate (avg of Air2 / Air3 / Air4) ---
+    treatment_air_temp_c: Optional[float] = None
+    treatment_leaf_temp_c: Optional[float] = None
+    treatment_vpd_kpa: Optional[float] = None
+    treatment_co2_ppm: Optional[float] = None
+    treatment_par_umol: Optional[float] = None
+    treatment_dli_mol_m2: Optional[float] = None
+    treatment_ndvi: Optional[float] = None
+    treatment_pri: Optional[float] = None
+    treatment_air_leaf_delta_t: Optional[float] = None
+    # --- Treatment crop (avg of Crop3 / Crop5 / Crop6 / Crop7) ---
+    treatment_crop_par_umol: Optional[float] = None
+    treatment_crop_leaf_temp_c: Optional[float] = None
+    treatment_crop_ndvi: Optional[float] = None
+    treatment_crop_dli_mol_m2: Optional[float] = None
+    treatment_crop_par_avg1h: Optional[float] = None
+    # Per-panel-position readings  {position_label: {par, leaf_temp, ndvi}}
+    treatment_crop_by_position: Dict[str, Dict[str, Optional[float]]] = field(default_factory=dict)
+    # --- Reference crop (avg of Crop1 / Crop2 / Crop4) ---
+    reference_crop_par_umol: Optional[float] = None
+    reference_crop_leaf_temp_c: Optional[float] = None
+    reference_crop_ndvi: Optional[float] = None
+    reference_crop_dli_mol_m2: Optional[float] = None
+    reference_crop_by_position: Dict[str, Dict[str, Optional[float]]] = field(default_factory=dict)
+    # --- PAR shading ratio: treatment_crop_par / reference_crop_par ---
+    par_shading_ratio: Optional[float] = None   # <1 = panels are shading
+    # --- Treatment soil (avg of Soil1 / Soil3 / Soil5 / Soil6) ---
+    treatment_soil_moisture_pct: Optional[float] = None
+    treatment_soil_temp_c: Optional[float] = None
+    treatment_soil_ec_ds_m: Optional[float] = None
+    treatment_soil_ph: Optional[float] = None
+    # --- Reference soil (avg of Soil2 / Soil4 / Soil7 / Soil9) ---
+    reference_soil_moisture_pct: Optional[float] = None
+    reference_soil_temp_c: Optional[float] = None
+    # --- Irrigation (Irrigation1, row 502 treatment) ---
+    irrigation_last_volume_l: Optional[float] = None
+    irrigation_last_minutes: Optional[float] = None
+    irrigation_ec: Optional[float] = None
+    irrigation_ph: Optional[float] = None
+    water_temp_c: Optional[float] = None
+    # --- Panel surface temperatures ---
+    treatment_panel_temp_c: Optional[float] = None   # avg Thermocouples1 positions 1-4
+    reference_panel_temp_c: Optional[float] = None   # avg Thermocouples2 positions 1-4
+    def to_advisor_text(self) -> str:
+        """Format snapshot for inclusion in an AI advisory prompt."""
+        age = f"{self.staleness_minutes:.0f}" if self.staleness_minutes < 120 else ">{:.0f}".format(self.staleness_minutes)
+        lines = [f"VINE STATE (ThingsBoard sensors, ~{age} min ago):"]
+        lines.append("  TREATMENT area (rows 501-502, under solar panels):")
+        if self.treatment_air_temp_c is not None:
+            lines.append(f"    Air temperature:      {self.treatment_air_temp_c:.1f} C")
+        if self.treatment_leaf_temp_c is not None:
+            lines.append(f"    Leaf temperature:     {self.treatment_leaf_temp_c:.1f} C")
+        if self.treatment_air_leaf_delta_t is not None:
+            lines.append(f"    Air-leaf delta-T:     {self.treatment_air_leaf_delta_t:+.1f} C  (proxy for heat stress)")
+        if self.treatment_vpd_kpa is not None:
+            lines.append(f"    VPD:                  {self.treatment_vpd_kpa:.2f} kPa")
+        if self.treatment_co2_ppm is not None:
+            lines.append(f"    CO2:                  {self.treatment_co2_ppm:.0f} ppm")
+        if self.treatment_crop_par_umol is not None:
+            lines.append(f"    Fruiting-zone PAR:    {self.treatment_crop_par_umol:.0f} umol/m2/s  (avg of Crop3/5/6/7)")
+        if self.treatment_crop_dli_mol_m2 is not None:
+            lines.append(f"    DLI today so far:     {self.treatment_crop_dli_mol_m2:.1f} mol/m2/day")
+        if self.treatment_crop_ndvi is not None:
+            lines.append(f"    Canopy NDVI:          {self.treatment_crop_ndvi:.3f}")
+        if self.treatment_soil_moisture_pct is not None:
+            lines.append(f"    Soil moisture:        {self.treatment_soil_moisture_pct:.1f}%  (avg Soil1/3/5/6)")
+        if self.treatment_soil_temp_c is not None:
+            lines.append(f"    Soil temperature:     {self.treatment_soil_temp_c:.1f} C")
+        if self.treatment_panel_temp_c is not None:
+            lines.append(f"    Panel surface temp:   {self.treatment_panel_temp_c:.1f} C")
+        if self.treatment_crop_by_position:
+            lines.append("    Per-position PAR (Crop sensors):")
+            for pos, vals in self.treatment_crop_by_position.items():
+                par = vals.get("par")
+                lt = vals.get("leaf_temp")
+                par_str = f"{par:.0f} umol/m2/s" if par is not None else "N/A"
+                lt_str = f" | leaf {lt:.1f} C" if lt is not None else ""
+                lines.append(f"      {pos}: PAR {par_str}{lt_str}")
+        lines.append("")
+        lines.append("  REFERENCE area (rows 503-504, open sky, no panels):")
+        if self.reference_crop_par_umol is not None:
+            lines.append(f"    Fruiting-zone PAR:    {self.reference_crop_par_umol:.0f} umol/m2/s  (avg of Crop1/2/4)")
+        if self.reference_crop_leaf_temp_c is not None:
+            lines.append(f"    Leaf temperature:     {self.reference_crop_leaf_temp_c:.1f} C")
+        if self.reference_crop_ndvi is not None:
+            lines.append(f"    Canopy NDVI:          {self.reference_crop_ndvi:.3f}")
+        if self.reference_soil_moisture_pct is not None:
+            lines.append(f"    Soil moisture:        {self.reference_soil_moisture_pct:.1f}%  (avg Soil2/4/7/9)")
+        if self.reference_crop_by_position:
+            lines.append("    Per-position PAR (Crop sensors):")
+            for pos, vals in self.reference_crop_by_position.items():
+                par = vals.get("par")
+                par_str = f"{par:.0f} umol/m2/s" if par is not None else "N/A"
+                lines.append(f"      {pos}: PAR {par_str}")
+        if self.par_shading_ratio is not None:
+            reduction_pct = (1 - self.par_shading_ratio) * 100
+            lines.append("")
+            lines.append(f"  PAR shading ratio (treatment/reference): {self.par_shading_ratio:.2f}"
+                         f"  ({reduction_pct:.0f}% reduction by panels)")
+        if self.ambient_temp_c is not None:
+            lines.append("")
+            lines.append("  AMBIENT (outdoor baseline, Air1):")
+            lines.append(f"    Air temperature:  {self.ambient_temp_c:.1f} C")
+            if self.ambient_wind_speed_ms is not None:
+                lines.append(f"    Wind speed:       {self.ambient_wind_speed_ms:.1f} m/s")
+            if self.ambient_rain_mm is not None and self.ambient_rain_mm > 0:
+                lines.append(f"    Rain:             {self.ambient_rain_mm:.1f} mm")
+        any_irrigation = any(v is not None for v in [
+            self.irrigation_last_volume_l, self.irrigation_last_minutes,
+            self.irrigation_ec, self.irrigation_ph,
+        ])
+        if any_irrigation:
+            lines.append("")
+            lines.append("  IRRIGATION (Irrigation1, row 502):")
+            if self.irrigation_last_volume_l is not None:
+                lines.append(f"    Last cycle volume:  {self.irrigation_last_volume_l:.0f} L")
+            if self.irrigation_last_minutes is not None:
+                lines.append(f"    Duration:           {self.irrigation_last_minutes:.0f} min")
+            if self.irrigation_ec is not None:
+                lines.append(f"    EC:                 {self.irrigation_ec:.2f} dS/m")
+            if self.irrigation_ph is not None:
+                lines.append(f"    pH:                 {self.irrigation_ph:.1f}")
+            if self.water_temp_c is not None:
+                lines.append(f"    Water temperature:  {self.water_temp_c:.1f} C")
+        return "\n".join(lines)
+    def to_dict(self) -> Dict[str, Any]:
+        """Return a flat dict suitable for JSON serialization (e.g., chatbot tool result)."""
+        out: Dict[str, Any] = {
+            "snapshot_ts": self.snapshot_ts.isoformat(),
+            "staleness_minutes": round(self.staleness_minutes, 1),
+        }
+        for attr in (
+            "ambient_temp_c", "ambient_humidity_pct", "ambient_wind_speed_ms",
+            "ambient_wind_angle_deg", "ambient_rain_mm",
+            "treatment_air_temp_c", "treatment_leaf_temp_c", "treatment_vpd_kpa",
+            "treatment_co2_ppm", "treatment_par_umol", "treatment_dli_mol_m2",
+            "treatment_ndvi", "treatment_pri", "treatment_air_leaf_delta_t",
+            "treatment_crop_par_umol", "treatment_crop_leaf_temp_c",
+            "treatment_crop_ndvi", "treatment_crop_dli_mol_m2", "treatment_crop_par_avg1h",
+            "reference_crop_par_umol", "reference_crop_leaf_temp_c",
+            "reference_crop_ndvi", "reference_crop_dli_mol_m2",
+            "par_shading_ratio",
+            "treatment_soil_moisture_pct", "treatment_soil_temp_c",
+            "treatment_soil_ec_ds_m", "treatment_soil_ph",
+            "reference_soil_moisture_pct", "reference_soil_temp_c",
+            "irrigation_last_volume_l", "irrigation_last_minutes",
+            "irrigation_ec", "irrigation_ph", "water_temp_c",
+            "treatment_panel_temp_c", "reference_panel_temp_c",
+        ):
+            val = getattr(self, attr)
+            out[attr] = round(val, 3) if val is not None else None
+        out["treatment_crop_by_position"] = self.treatment_crop_by_position
+        out["reference_crop_by_position"] = self.reference_crop_by_position
+        return out
+# ---------------------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------------------
+@dataclass
+class ThingsBoardConfig:
+    """ThingsBoard connection settings. Data retrieval always uses prod (Seymour)."""
+    # Prod only — test (eu.thingsboard.cloud) is for deploying apps, not data
+    host: str = os.environ.get("THINGSBOARD_HOST", "https://web.seymouragri.com/")
+    username: Optional[str] = (
+        os.environ.get("THINGSBOARD_USERNAME") or os.environ.get("TB_USERNAME")
+    )
+    password: Optional[str] = (
+        os.environ.get("THINGSBOARD_PASSWORD") or os.environ.get("TB_PASSWORD")
+    )
+    token: Optional[str] = os.environ.get("THINGSBOARD_TOKEN")
+# ---------------------------------------------------------------------------
+# Client
+# ---------------------------------------------------------------------------
+class ThingsBoardClient:
+    """
+    Minimal ThingsBoard client for the Seymour vineyard.
+    Authentication
+    --------------
+    Provide THINGSBOARD_TOKEN for a pre-generated JWT, or
+    THINGSBOARD_USERNAME + THINGSBOARD_PASSWORD for login-based auth.
+    Tokens are cached and refreshed automatically before they expire.
+    Usage
+    -----
+    client = ThingsBoardClient()
+    snapshot = client.get_vine_snapshot()
+    print(snapshot.to_advisor_text())
+    """
+    _TOKEN_TTL_SECONDS = 8_000   # ThingsBoard default is 9000 s; be conservative
+    def __init__(self, config: Optional[ThingsBoardConfig] = None) -> None:
+        self.config = config or ThingsBoardConfig()
+        self._session = requests.Session()
+        self._session.headers.update({"Content-Type": "application/json"})
+        self._jwt: Optional[str] = None
+        self._jwt_expires_at: float = 0.0
+    # ------------------------------------------------------------------
+    # Authentication
+    # ------------------------------------------------------------------
+    def _ensure_jwt(self) -> str:
+        """Return a valid JWT, obtaining or refreshing as needed."""
+        if self.config.token:
+            if "X-Authorization" not in self._session.headers:
+                self._session.headers["X-Authorization"] = f"Bearer {self.config.token}"
+            return self.config.token
+        if self._jwt and time.monotonic() < self._jwt_expires_at:
+            return self._jwt
+        if not self.config.username or not self.config.password:
+            raise RuntimeError(
+                "ThingsBoard authentication requires THINGSBOARD_TOKEN "
+                "or both THINGSBOARD_USERNAME and THINGSBOARD_PASSWORD."
+            )
+        url = f"{self.config.host.rstrip('/')}/api/auth/login"
+        resp = self._session.post(
+            url,
+            json={"username": self.config.username, "password": self.config.password},
+            timeout=10,
+        )
+        resp.raise_for_status()
+        token = resp.json()["token"]
+        self._jwt = token
+        self._jwt_expires_at = time.monotonic() + self._TOKEN_TTL_SECONDS
+        self._session.headers["X-Authorization"] = f"Bearer {token}"
+        return token
+    # ------------------------------------------------------------------
+    # Low-level API calls
+    # ------------------------------------------------------------------
+    # ------------------------------------------------------------------
+    # Shared low-level helpers (DEVICE and ASSET use the same REST API,
+    # differing only in the entity-type path segment).
+    # ------------------------------------------------------------------
+    def _fetch_latest_raw(
+        self,
+        entity_type: str,
+        uuid: str,
+        keys: List[str],
+    ) -> Tuple[Dict[str, Optional[float]], Optional[datetime]]:
+        """Fetch most-recent telemetry for any entity type (DEVICE or ASSET)."""
+        self._ensure_jwt()
+        url = (
+            f"{self.config.host.rstrip('/')}/api/plugins/telemetry/{entity_type}"
+            f"/{uuid}/values/timeseries"
+        )
+        resp = self._session.get(url, params={"keys": ",".join(keys)}, timeout=15)
+        resp.raise_for_status()
+        raw: Dict[str, List[Dict]] = resp.json()
+        values: Dict[str, Optional[float]] = {}
+        newest_ts_ms: Optional[int] = None
+        for key in keys:
+            entries = raw.get(key, [])
+            if entries:
+                values[key] = _safe_float(entries[0]["value"])
+                ts_ms = entries[0].get("ts")
+                if ts_ms and (newest_ts_ms is None or ts_ms > newest_ts_ms):
+                    newest_ts_ms = ts_ms
+            else:
+                values[key] = None
+        newest_ts = (
+            datetime.fromtimestamp(newest_ts_ms / 1000, tz=timezone.utc)
+            if newest_ts_ms else None
+        )
+        return values, newest_ts
+    def _fetch_timeseries_raw(
+        self,
+        entity_type: str,
+        uuid: str,
+        keys: List[str],
+        start: datetime,
+        end: datetime,
+        limit: int = 1000,
+        interval_ms: int = 900_000,
+        agg: str = "NONE",
+    ) -> pd.DataFrame:
+        """Fetch time-series telemetry for any entity type (DEVICE or ASSET)."""
+        self._ensure_jwt()
+        start_ms = int(start.timestamp() * 1000)
+        end_ms = int(end.timestamp() * 1000)
+        url = (
+            f"{self.config.host.rstrip('/')}/api/plugins/telemetry/{entity_type}"
+            f"/{uuid}/values/timeseries"
+        )
+        params: Dict[str, Any] = {
+            "keys": ",".join(keys),
+            "startTs": start_ms,
+            "endTs": end_ms,
+            "limit": limit,
+            "agg": agg,
+        }
+        if agg != "NONE":
+            params["interval"] = interval_ms
+        resp = self._session.get(url, params=params, timeout=30)
+        resp.raise_for_status()
+        raw: Dict[str, List[Dict]] = resp.json()
+        frames: Dict[str, pd.Series] = {}
+        for key, entries in raw.items():
+            if key in keys and entries:
+                ts = pd.to_datetime([e["ts"] for e in entries], unit="ms", utc=True)
+                vals = [_safe_float(e["value"]) for e in entries]
+                frames[key] = pd.Series(vals, index=ts)
+        if not frames:
+            return pd.DataFrame()
+        return pd.DataFrame(frames).sort_index()
+    # ------------------------------------------------------------------
+    # Device API (public)
+    # ------------------------------------------------------------------
+    def _fetch_latest(
+        self,
+        device_name: str,
+        keys: List[str],
+    ) -> Tuple[Dict[str, Optional[float]], Optional[datetime]]:
+        """Fetch most-recent values for a named device."""
+        info = DEVICE_REGISTRY[device_name]
+        return self._fetch_latest_raw("DEVICE", info.uuid, keys)
+    def get_latest_telemetry(
+        self,
+        device_name: str,
+        keys: List[str],
+    ) -> Dict[str, Optional[float]]:
+        """Return the most recent value for each key. Missing keys return None."""
+        if device_name not in DEVICE_REGISTRY:
+            raise KeyError(
+                f"Unknown device: {device_name!r}. "
+                f"Valid names: {sorted(DEVICE_REGISTRY)}"
+            )
+        values, _ = self._fetch_latest(device_name, keys)
+        return values
+    def get_timeseries(
+        self,
+        device_name: str,
+        keys: List[str],
+        start: datetime,
+        end: datetime,
+        limit: int = 1000,
+        interval_ms: int = 900_000,  # 15 minutes
+        agg: str = "NONE",
+    ) -> pd.DataFrame:
+        """Fetch time-series telemetry for a named device."""
+        if device_name not in DEVICE_REGISTRY:
+            raise KeyError(f"Unknown device: {device_name!r}")
+        info = DEVICE_REGISTRY[device_name]
+        return self._fetch_timeseries_raw(
+            "DEVICE", info.uuid, keys, start, end, limit, interval_ms, agg,
+        )
+    # ------------------------------------------------------------------
+    # Asset API (public)
+    # ------------------------------------------------------------------
+    def get_asset_timeseries(
+        self,
+        asset_name: str,
+        keys: List[str],
+        start: datetime,
+        end: datetime,
+        limit: int = 1000,
+        interval_ms: int = 3_600_000,  # 1 hour
+        agg: str = "SUM",
+    ) -> pd.DataFrame:
+        """Fetch time-series from a ThingsBoard ASSET (e.g. Plant energy)."""
+        if asset_name not in ASSET_REGISTRY:
+            raise KeyError(f"Unknown asset: {asset_name!r}. Valid: {sorted(ASSET_REGISTRY)}")
+        info = ASSET_REGISTRY[asset_name]
+        return self._fetch_timeseries_raw(
+            "ASSET", info.uuid, keys, start, end, limit, interval_ms, agg,
+        )
+    def get_asset_latest(
+        self,
+        asset_name: str,
+        keys: List[str],
+    ) -> Dict[str, Optional[float]]:
+        """Fetch latest telemetry from a ThingsBoard ASSET."""
+        if asset_name not in ASSET_REGISTRY:
+            raise KeyError(f"Unknown asset: {asset_name!r}")
+        info = ASSET_REGISTRY[asset_name]
+        values, _ = self._fetch_latest_raw("ASSET", info.uuid, keys)
+        return values
+    # ------------------------------------------------------------------
+    # Device commands (RPC + attribute writes)
+    # ------------------------------------------------------------------
+    def send_rpc_command(
+        self,
+        device_name: str,
+        method: str,
+        params: Any = None,
+        timeout: float = 10.0,
+    ) -> Dict[str, Any]:
+        """Send a two-way RPC command to a device.
+        Uses POST /api/plugins/rpc/twoway/{deviceId}.
+        Falls back to one-way if two-way returns 404.
+        """
+        if device_name not in DEVICE_REGISTRY:
+            raise KeyError(f"Unknown device: {device_name!r}")
+        info = DEVICE_REGISTRY[device_name]
+        self._ensure_jwt()
+        payload = {"method": method, "params": params if params is not None else {}}
+        # Try two-way RPC first
+        url = (
+            f"{self.config.host.rstrip('/')}/api/plugins/rpc/twoway"
+            f"/{info.uuid}"
+        )
+        resp = self._session.post(url, json=payload, timeout=timeout)
+        if resp.status_code in (404, 405):
+            # Fallback to one-way RPC
+            url = (
+                f"{self.config.host.rstrip('/')}/api/plugins/rpc/oneway"
+                f"/{info.uuid}"
+            )
+            resp = self._session.post(url, json=payload, timeout=timeout)
+        resp.raise_for_status()
+        try:
+            return resp.json()
+        except Exception:
+            return {"status": "ok", "status_code": resp.status_code}
+    def set_device_attributes(
+        self,
+        device_name: str,
+        attributes: Dict[str, Any],
+        scope: str = "SHARED_SCOPE",
+    ) -> None:
+        """Write server-side attributes to a device.
+        Uses POST /api/plugins/telemetry/DEVICE/{id}/attributes/{scope}.
+        This is an alternative to RPC for setting tracker targets.
+        """
+        if device_name not in DEVICE_REGISTRY:
+            raise KeyError(f"Unknown device: {device_name!r}")
+        info = DEVICE_REGISTRY[device_name]
+        self._ensure_jwt()
+        url = (
+            f"{self.config.host.rstrip('/')}/api/plugins/telemetry/DEVICE"
+            f"/{info.uuid}/attributes/{scope}"
+        )
+        resp = self._session.post(url, json=attributes, timeout=10)
+        resp.raise_for_status()
+    # ------------------------------------------------------------------
+    # High-level vine snapshot
+    # ------------------------------------------------------------------
+    # Dashboard-only: 4 devices for farmer view (temp, soil, irrigation)
+    _DASHBOARD_FETCH_PLAN: Dict[str, List[str]] = {
+        "Air1":        AIR_KEYS,        # ambient weather
+        "Air2":        AIR_KEYS,        # treatment air
+        "Soil1":       SOIL_KEYS,       # treatment soil
+        "Irrigation1": IRRIGATION_KEYS,
+    }
+    # Light mode: 6 devices (adds crop PAR for chatbot/detailed view)
+    _LIGHT_FETCH_PLAN: Dict[str, List[str]] = {
+        "Air1":        AIR_KEYS,        # ambient
+        "Air2":        AIR_KEYS,        # treatment air (one representative)
+        "Crop1":       CROP_KEYS,       # reference crop
+        "Crop3":       CROP_KEYS,       # treatment crop
+        "Soil1":       SOIL_KEYS,       # treatment soil
+        "Irrigation1": IRRIGATION_KEYS,
+    }
+    _FULL_FETCH_PLAN: Dict[str, List[str]] = {
+        "Air1":          AIR_KEYS,
+        "Air2":          AIR_KEYS,
+        "Air3":          AIR_KEYS,
+        "Air4":          AIR_KEYS,
+        "Crop1":         CROP_KEYS,
+        "Crop2":         CROP_KEYS,
+        "Crop3":         CROP_KEYS,
+        "Crop4":         CROP_KEYS,
+        "Crop5":         CROP_KEYS,
+        "Crop6":         CROP_KEYS,
+        "Crop7":         CROP_KEYS,
+        "Soil1":         SOIL_KEYS,
+        "Soil2":         SOIL_KEYS,
+        "Soil3":         SOIL_KEYS,
+        "Soil4":         SOIL_KEYS,
+        "Soil5":         SOIL_KEYS,
+        "Soil6":         SOIL_KEYS,
+        "Soil7":         SOIL_KEYS,
+        "Soil9":         SOIL_KEYS,
+        "Irrigation1":   IRRIGATION_KEYS,
+        "Thermocouples1": THERMOCOUPLE_KEYS,
+        "Thermocouples2": THERMOCOUPLE_KEYS,
+    }
+    def get_vine_snapshot(self, light: bool = False,
+                          mode: Optional[str] = None) -> VineSnapshot:
+        """
+        Fetch latest telemetry from all relevant devices and return an
+        aggregated VineSnapshot distinguishing treatment vs reference areas.
+        Uses a thread pool to parallelise HTTP requests.
+        Individual device failures are silently skipped (returns None fields).
+        Parameters
+        ----------
+        light : bool
+            If True, fetch only ~6 key devices instead of all 21.
+        mode : str, optional
+            "dashboard" = 4 devices only (air + soil + irrigation).
+            Overrides `light` when set.
+        """
+        if mode == "dashboard":
+            fetch_plan = self._DASHBOARD_FETCH_PLAN
+        elif light:
+            fetch_plan = self._LIGHT_FETCH_PLAN
+        else:
+            fetch_plan = self._FULL_FETCH_PLAN
+        # Ensure auth token before spawning threads (avoid race on login)
+        self._ensure_jwt()
+        raw_results: Dict[str, Dict[str, Optional[float]]] = {}
+        newest_ts_overall: Optional[datetime] = None
+        with ThreadPoolExecutor(max_workers=8) as pool:
+            future_map = {
+                pool.submit(self._fetch_latest, name, keys): name
+                for name, keys in fetch_plan.items()
+            }
+            for future in as_completed(future_map, timeout=25):
+                name = future_map[future]
+                try:
+                    values, ts = future.result()
+                    raw_results[name] = values
+                    if ts and (newest_ts_overall is None or ts > newest_ts_overall):
+                        newest_ts_overall = ts
+                except Exception:
+                    raw_results[name] = {}
+        now = datetime.now(tz=timezone.utc)
+        staleness = (
+            (now - newest_ts_overall).total_seconds() / 60
+            if newest_ts_overall else float("nan")
+        )
+        # ---------- Ambient (Air1) ----------
+        air1 = raw_results.get("Air1", {})
+        # ---------- Treatment microclimate (Air2/3/4) ----------
+        treatment_air = [raw_results.get(d, {}) for d in ("Air2", "Air3", "Air4")]
+        # ---------- Treatment crop by position ----------
+        position_labels = {
+            "Crop3": "502-west-bottom",
+            "Crop5": "502-east-upper",
+            "Crop6": "502-east-bottom",
+            "Crop7": "502-west-upper",
+        }
+        treatment_crop_devs = {
+            label: raw_results.get(dev, {})
+            for dev, label in position_labels.items()
+        }
+        treatment_crop_by_pos: Dict[str, Dict[str, Optional[float]]] = {
+            label: {
+                "par":       v.get("PAR"),
+                "leaf_temp": v.get("leafTemperature"),
+                "ndvi":      v.get("NDVI"),
+                "dli":       v.get("DLI"),
+            }
+            for label, v in treatment_crop_devs.items()
+        }
+        # ---------- Reference crop by position ----------
+        ref_position_labels = {
+            "Crop1": "503-ref",
+            "Crop2": "503-control",
+            "Crop4": "502-control",
+        }
+        reference_crop_devs = {
+            label: raw_results.get(dev, {})
+            for dev, label in ref_position_labels.items()
+        }
+        reference_crop_by_pos: Dict[str, Dict[str, Optional[float]]] = {
+            label: {
+                "par":       v.get("PAR"),
+                "leaf_temp": v.get("leafTemperature"),
+                "ndvi":      v.get("NDVI"),
+                "dli":       v.get("DLI"),
+            }
+            for label, v in reference_crop_devs.items()
+        }
+        # ---------- Soil averages ----------
+        treatment_soil_devs = [raw_results.get(d, {}) for d in ("Soil1", "Soil3", "Soil5", "Soil6")]
+        reference_soil_devs = [raw_results.get(d, {}) for d in ("Soil2", "Soil4", "Soil7", "Soil9")]
+        def _avg_soil_moisture(devs: List[Dict]) -> Optional[float]:
+            all_vals = []
+            for d in devs:
+                for k in ("soilMoisture", "soilMoisture2"):
+                    if d.get(k) is not None:
+                        all_vals.append(d[k])
+            lo, hi = _BOUNDS["soil_moisture"]
+            return _bounded_avg(lo, hi, *all_vals) if all_vals else None
+        def _avg_soil_temp(devs: List[Dict]) -> Optional[float]:
+            all_vals = []
+            for d in devs:
+                for k in ("soilTemperature", "soilTemperature2"):
+                    if d.get(k) is not None:
+                        all_vals.append(d[k])
+            lo, hi = _BOUNDS["soil_temp"]
+            return _bounded_avg(lo, hi, *all_vals) if all_vals else None
+        # ---------- Panel temps ----------
+        tc1 = raw_results.get("Thermocouples1", {})
+        tc2 = raw_results.get("Thermocouples2", {})
+        irr = raw_results.get("Irrigation1", {})
+        # ---------- PAR shading ratio (bounded to reject sensor faults) ----------
+        t_par = _bounded_avg(*_BOUNDS["par"], *[v.get("PAR") for v in treatment_crop_devs.values()])
+        r_par = _bounded_avg(*_BOUNDS["par"], *[v.get("PAR") for v in reference_crop_devs.values()])
+        par_ratio: Optional[float] = None
+        if t_par is not None and r_par is not None and r_par > 0:
+            par_ratio = t_par / r_par
+        snapshot = VineSnapshot(
+            snapshot_ts=now,
+            staleness_minutes=staleness,
+            # Ambient — apply bounds to catch single-device faults too
+            ambient_temp_c=_bounded_avg(*_BOUNDS["air_temp"], air1.get("airTemperature")),
+            ambient_humidity_pct=_bounded_avg(0, 100, air1.get("airHumidity")),
+            ambient_wind_speed_ms=_bounded_avg(0, 60, air1.get("windSpeed")),
+            ambient_wind_angle_deg=_bounded_avg(0, 360, air1.get("windAngle")),
+            ambient_rain_mm=_bounded_avg(0, 500, air1.get("rain")),
+            # Treatment climate — bounded to reject sensor faults
+            treatment_air_temp_c=_bounded_avg(*_BOUNDS["air_temp"], *[d.get("airTemperature") for d in treatment_air]),
+            treatment_leaf_temp_c=_bounded_avg(*_BOUNDS["leaf_temp"], *[d.get("leafTemperature") for d in treatment_air]),
+            treatment_vpd_kpa=_bounded_avg(*_BOUNDS["vpd"], *[d.get("VPD") for d in treatment_air]),
+            treatment_co2_ppm=_bounded_avg(*_BOUNDS["co2"], *[d.get("CO2") for d in treatment_air]),
+            treatment_par_umol=_bounded_avg(*_BOUNDS["par"], *[d.get("PAR") for d in treatment_air]),
+            treatment_dli_mol_m2=_bounded_avg(*_BOUNDS["dli"], *[d.get("DLI") for d in treatment_air]),
+            treatment_ndvi=_bounded_avg(*_BOUNDS["ndvi"], *[d.get("NDVI") for d in treatment_air]),
+            treatment_pri=_bounded_avg(*_BOUNDS["pri"], *[d.get("PRI") for d in treatment_air]),
+            treatment_air_leaf_delta_t=_bounded_avg(-20, 20, *[d.get("airLeafDeltaT") for d in treatment_air]),
+            # Treatment crop
+            treatment_crop_par_umol=t_par,
+            treatment_crop_leaf_temp_c=_bounded_avg(
+                *_BOUNDS["leaf_temp"], *[v.get("leafTemperature") for v in treatment_crop_devs.values()]
+            ),
+            treatment_crop_ndvi=_bounded_avg(
+                *_BOUNDS["ndvi"], *[v.get("NDVI") for v in treatment_crop_devs.values()]
+            ),
+            treatment_crop_dli_mol_m2=_bounded_avg(
+                *_BOUNDS["dli"], *[v.get("DLI") for v in treatment_crop_devs.values()]
+            ),
+            treatment_crop_par_avg1h=_bounded_avg(
+                *_BOUNDS["par"], *[v.get("PARAvg1H") for v in treatment_crop_devs.values()]
+            ),
+            treatment_crop_by_position=treatment_crop_by_pos,
+            # Reference crop
+            reference_crop_par_umol=r_par,
+            reference_crop_leaf_temp_c=_bounded_avg(
+                *_BOUNDS["leaf_temp"], *[v.get("leafTemperature") for v in reference_crop_devs.values()]
+            ),
+            reference_crop_ndvi=_bounded_avg(
+                *_BOUNDS["ndvi"], *[v.get("NDVI") for v in reference_crop_devs.values()]
+            ),
+            reference_crop_dli_mol_m2=_bounded_avg(
+                *_BOUNDS["dli"], *[v.get("DLI") for v in reference_crop_devs.values()]
+            ),
+            reference_crop_by_position=reference_crop_by_pos,
+            par_shading_ratio=par_ratio,
+            # Treatment soil
+            treatment_soil_moisture_pct=_avg_soil_moisture(treatment_soil_devs),
+            treatment_soil_temp_c=_avg_soil_temp(treatment_soil_devs),
+            treatment_soil_ec_ds_m=_safe_avg(*[d.get("soilBulkEC") for d in treatment_soil_devs]),
+            treatment_soil_ph=_safe_avg(*[d.get("soilpH") for d in treatment_soil_devs]),
+            # Reference soil
+            reference_soil_moisture_pct=_avg_soil_moisture(reference_soil_devs),
+            reference_soil_temp_c=_avg_soil_temp(reference_soil_devs),
+            # Irrigation
+            irrigation_last_volume_l=irr.get("irrigationCycleVolume") or irr.get("irrigationVolume"),
+            irrigation_last_minutes=irr.get("irrigationCycleMinutes") or irr.get("irrigationMinutes"),
+            irrigation_ec=irr.get("irrigationEC"),
+            irrigation_ph=irr.get("irrigationPH"),
+            water_temp_c=irr.get("waterTemperature"),
+            # Panel temps
+            treatment_panel_temp_c=_bounded_avg(
+                *_BOUNDS["panel_temp"], *[tc1.get(k) for k in THERMOCOUPLE_KEYS]
+            ),
+            reference_panel_temp_c=_bounded_avg(
+                *_BOUNDS["panel_temp"], *[tc2.get(k) for k in THERMOCOUPLE_KEYS]
+            ),
+        )
+        return snapshot
+# ---------------------------------------------------------------------------
+# Helpers (module-level so threads can share without self)
+# ---------------------------------------------------------------------------
+def _safe_float(val: Any) -> Optional[float]:
+    """Convert a TB telemetry value string/number to float, or None on failure."""
+    if val is None:
+        return None
+    try:
+        f = float(val)
+        return None if math.isnan(f) or math.isinf(f) else f
+    except (TypeError, ValueError):
+        return None
+def _safe_avg(*vals: Any) -> Optional[float]:
+    """Return the mean of non-None, finite values, or None if none available."""
+    valid = [v for v in vals if v is not None and isinstance(v, (int, float))
+             and not math.isnan(v) and not math.isinf(v)]
+    return sum(valid) / len(valid) if valid else None
+def _bounded_avg(lo: float, hi: float, *vals: Any) -> Optional[float]:
+    """Return the mean of values within [lo, hi], rejecting sensor faults outside that range."""
+    valid = [v for v in vals if v is not None and isinstance(v, (int, float))
+             and not math.isnan(v) and not math.isinf(v) and lo <= v <= hi]
+    return sum(valid) / len(valid) if valid else None
+# Physical plausibility bounds for Negev site
+_BOUNDS = {
+    "air_temp":       (-5.0,  55.0),   # °C — extreme Negev range
+    "leaf_temp":      (-5.0,  60.0),   # °C — leaves can exceed air under direct sun
+    "soil_temp":      (-2.0,  45.0),   # °C — soil in Negev
+    "soil_moisture":  (0.0,  100.0),   # %
+    "par":            (0.0, 3000.0),   # µmol m⁻² s⁻¹
+    "vpd":            (0.0,   10.0),   # kPa
+    "co2":            (300.0, 2000.0), # ppm
+    "ndvi":           (-1.0,   1.0),
+    "pri":            (-1.0,   1.0),
+    "dli":            (0.0,   80.0),   # mol m⁻² day⁻¹
+    "panel_temp":     (-10.0, 100.0),  # °C — panel surface
+}
+# ---------------------------------------------------------------------------
+# CLI smoke test
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    client = ThingsBoardClient()
+    print("Fetching vine snapshot from ThingsBoard...")
+    try:
+        snap = client.get_vine_snapshot()
+        print(snap.to_advisor_text())
+        print(f"\nSnapshot age: {snap.staleness_minutes:.1f} min")
+    except Exception as exc:
+        print(f"Error: {exc}")
+        print("Make sure THINGSBOARD_USERNAME/PASSWORD or THINGSBOARD_TOKEN are set in your .env")

src/data_providers.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """Backward-compatible re-export from src.data.data_providers."""
2	+ from src.data.data_providers import * # noqa: F401, F403

src/data_schema.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """Backward-compatible re-export from src.data.data_schema."""
2	+ from src.data.data_schema import * # noqa: F401, F403

src/day_ahead_advisor.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """Backward-compatible re-export from src.advisor.day_ahead_advisor."""
2	+ from src.advisor.day_ahead_advisor import * # noqa: F401, F403

src/day_ahead_planner.py ADDED Viewed

	@@ -0,0 +1,580 @@

+"""
+DayAheadPlanner: dynamic-programming trajectory optimizer for agrivoltaic control.
+Given a day-ahead weather forecast (temperature, GHI) and the current energy
+budget, finds the optimal tilt-offset trajectory for the next day that
+maximises a combined utility of crop protection and energy generation.
+Algorithm
+---------
+For each 15-min slot t from sunrise to sunset:
+  1. Predict vine state: Tleaf ≈ Tair (proxy), GHI from forecast, CWSI from
+     temperature heuristic, shading_helps from FvCB Rubisco transition.
+  2. Run InterventionGate — if blocked, slot must stay at θ_astro (offset=0).
+  3. For each candidate offset θ ∈ CANDIDATE_OFFSETS:
+       U_t(θ) = Price_energy · E_t(θ) + Price_crop · A_t(θ) − MovementCost(θ, θ_{t-1})
+     where E_t is energy generated and A_t is agronomic value (weighted by
+     phenological stage and zone).
+  4. DP recurrence: V_t(θ) = U_t(θ) + max_{θ'} V_{t-1}(θ')
+     with cumulative energy sacrifice ≤ daily budget constraint.
+The result is a DayAheadPlan: a list of SlotPlan objects, one per 15-min slot,
+each containing the chosen offset, expected energy cost, and explainability tags.
+References
+----------
+- config/settings.py §Day-Ahead DP Planner
+- context/2_plan.md §3.3
+"""
+from __future__ import annotations
+import math
+from dataclasses import dataclass, field
+from datetime import date, datetime, timedelta
+from typing import List, Optional
+import numpy as np
+import pandas as pd
+from config.settings import (
+    CANDIDATE_OFFSETS,
+    DP_BASE_CROP_VALUE,
+    DP_FLAT_ENERGY_PRICE_ILS_KWH,
+    DP_MOVEMENT_COST,
+    DP_SLOT_DURATION_MIN,
+    NO_SHADE_BEFORE_HOUR,
+    SEMILLON_TRANSITION_TEMP_C,
+    SHADE_ELIGIBLE_CWSI_ABOVE,
+    SHADE_ELIGIBLE_GHI_ABOVE,
+    SHADE_ELIGIBLE_TLEAF_ABOVE,
+    STAGE_CROP_MULTIPLIER,
+    ZONE_CROP_WEIGHTS,
+)
+# ---------------------------------------------------------------------------
+# Data containers
+# ---------------------------------------------------------------------------
+@dataclass
+class SlotPlan:
+    """Planned tilt offset for a single 15-min slot."""
+    time: str                           # "HH:MM" UTC
+    offset_deg: float                   # degrees off astronomical tracking (0 = full tracking)
+    energy_cost_kwh: float              # estimated energy sacrifice (kWh)
+    gate_passed: bool                   # whether InterventionGate allowed intervention
+    tags: List[str] = field(default_factory=list)  # explainability tags
+@dataclass
+class DayAheadPlan:
+    """Complete day-ahead tilt trajectory plan."""
+    target_date: str                    # ISO date string
+    slots: List[SlotPlan]               # one per daylight 15-min slot
+    total_energy_cost_kwh: float        # sum of all slot costs
+    daily_budget_kwh: float             # available daily budget
+    budget_utilisation_pct: float       # total_cost / budget × 100
+    stage_id: str                       # phenological stage used
+    n_intervention_slots: int           # slots where offset > 0
+    def to_dict(self) -> dict:
+        return {
+            "target_date": self.target_date,
+            "stage_id": self.stage_id,
+            "daily_budget_kwh": round(self.daily_budget_kwh, 4),
+            "total_energy_cost_kwh": round(self.total_energy_cost_kwh, 4),
+            "budget_utilisation_pct": round(self.budget_utilisation_pct, 1),
+            "n_intervention_slots": self.n_intervention_slots,
+            "slots": [
+                {
+                    "time": s.time,
+                    "offset_deg": s.offset_deg,
+                    "energy_cost_kwh": round(s.energy_cost_kwh, 6),
+                    "gate_passed": s.gate_passed,
+                    "tags": s.tags,
+                }
+                for s in self.slots
+            ],
+        }
+# ---------------------------------------------------------------------------
+# DayAheadPlanner
+# ---------------------------------------------------------------------------
+class DayAheadPlanner:
+    """DP-based day-ahead trajectory optimizer.
+    Parameters
+    ----------
+    shadow_model : object, optional
+        ShadowModel instance for solar position and tracker geometry.
+    baseline_predictor : BaselinePredictor, optional
+        Hybrid FvCB+ML predictor for per-slot photosynthesis baseline.
+        If provided, ``plan_day()`` uses predicted A for crop value instead
+        of the temperature-only heuristic.
+    energy_price : float
+        Energy price (ILS/kWh) for the utility function.
+    crop_value : float
+        Base crop value (ILS per µmol CO₂ m⁻² s⁻¹ per slot).
+    movement_cost : float
+        Penalty per degree of tilt change between consecutive slots (ILS-equivalent).
+    """
+    def __init__(
+        self,
+        shadow_model=None,
+        baseline_predictor=None,
+        energy_price: float = DP_FLAT_ENERGY_PRICE_ILS_KWH,
+        crop_value: float = DP_BASE_CROP_VALUE,
+        movement_cost: float = DP_MOVEMENT_COST,
+    ):
+        self._shadow_model = shadow_model
+        self._baseline_predictor = baseline_predictor
+        self.energy_price = energy_price
+        self.crop_value = crop_value
+        self.movement_cost = movement_cost
+    @property
+    def shadow_model(self):
+        if self._shadow_model is None:
+            from src.shading.solar_geometry import ShadowModel
+            self._shadow_model = ShadowModel()
+        return self._shadow_model
+    # ------------------------------------------------------------------
+    # Main entry point
+    # ------------------------------------------------------------------
+    def plan_day(
+        self,
+        target_date: date,
+        forecast_temps: List[float],
+        forecast_ghi: List[float],
+        daily_budget_kwh: float,
+        stage_id: Optional[str] = None,
+    ) -> DayAheadPlan:
+        """Generate an optimal tilt trajectory for the given day.
+        Parameters
+        ----------
+        target_date : date
+            The day to plan for.
+        forecast_temps : list of float
+            Forecast air temperature (°C) for each 15-min slot (96 values).
+            Only daylight slots are used; nighttime values are ignored.
+        forecast_ghi : list of float
+            Forecast GHI (W/m²) for each 15-min slot (96 values).
+        daily_budget_kwh : float
+            Available energy sacrifice budget for the day (kWh).
+        stage_id : str, optional
+            Phenological stage identifier. If None, estimated from date.
+        Returns
+        -------
+        DayAheadPlan
+        """
+        if stage_id is None:
+            from src.models.phenology import estimate_stage_for_date
+            stage_id = estimate_stage_for_date(target_date).id
+        # Crop value multiplier for this phenological stage
+        crop_multiplier = self._get_crop_multiplier(stage_id)
+        # Compute baseline A predictions if predictor is available
+        baseline_a: Optional[List[float]] = None
+        if self._baseline_predictor is not None:
+            try:
+                baseline_a = self._baseline_predictor.predict_day(
+                    forecast_temps, forecast_ghi,
+                )
+            except Exception as exc:
+                import logging
+                logging.getLogger(__name__).warning(
+                    "Baseline predictor failed, using temperature heuristic: %s", exc,
+                )
+        # Build slot timeline (sunrise to sunset only)
+        slots_info = self._build_slot_info(
+            target_date, forecast_temps, forecast_ghi, crop_multiplier,
+            baseline_a=baseline_a,
+        )
+        if not slots_info:
+            return DayAheadPlan(
+                target_date=str(target_date),
+                slots=[],
+                total_energy_cost_kwh=0.0,
+                daily_budget_kwh=daily_budget_kwh,
+                budget_utilisation_pct=0.0,
+                stage_id=stage_id,
+                n_intervention_slots=0,
+            )
+        # Run DP optimization
+        offsets = [0] + [o for o in CANDIDATE_OFFSETS if o > 0]
+        planned_slots = self._dp_optimize(
+            slots_info, offsets, daily_budget_kwh,
+        )
+        total_cost = sum(s.energy_cost_kwh for s in planned_slots)
+        n_interventions = sum(1 for s in planned_slots if s.offset_deg > 0)
+        utilisation = (total_cost / daily_budget_kwh * 100) if daily_budget_kwh > 0 else 0.0
+        return DayAheadPlan(
+            target_date=str(target_date),
+            slots=planned_slots,
+            total_energy_cost_kwh=total_cost,
+            daily_budget_kwh=daily_budget_kwh,
+            budget_utilisation_pct=utilisation,
+            stage_id=stage_id,
+            n_intervention_slots=n_interventions,
+        )
+    # ------------------------------------------------------------------
+    # Slot info builder
+    # ------------------------------------------------------------------
+    def _build_slot_info(
+        self,
+        target_date: date,
+        forecast_temps: List[float],
+        forecast_ghi: List[float],
+        crop_multiplier: float,
+        baseline_a: Optional[List[float]] = None,
+    ) -> List[dict]:
+        """Build per-slot metadata for daylight hours.
+        Returns list of dicts with keys: time_str, hour, temp_c, ghi,
+        solar_elevation, solar_azimuth, astro_tilt, gate_passed,
+        gate_reason, energy_per_slot_kwh, crop_value_weight.
+        """
+        day_start = pd.Timestamp(target_date, tz="UTC")
+        times = pd.date_range(day_start, periods=96, freq="15min")
+        # Solar positions for the whole day
+        solar_pos = self.shadow_model.get_solar_position(times)
+        slots = []
+        for i, ts in enumerate(times):
+            hour = ts.hour + ts.minute / 60.0
+            elev = float(solar_pos.iloc[i]["solar_elevation"])
+            # Skip nighttime
+            if elev <= 2:
+                continue
+            temp_c = forecast_temps[i] if i < len(forecast_temps) else 25.0
+            ghi = forecast_ghi[i] if i < len(forecast_ghi) else 0.0
+            # Skip slots with no meaningful irradiance
+            if ghi < 50:
+                continue
+            azim = float(solar_pos.iloc[i]["solar_azimuth"])
+            tracker = self.shadow_model.compute_tracker_tilt(azim, elev)
+            astro_tilt = float(tracker["tracker_theta"])
+            # Gate check (simplified — uses forecast data as proxy)
+            gate_passed, gate_reason = self._check_gate(
+                temp_c, ghi, hour,
+            )
+            # Energy at astronomical tracking (kWh per kWp for this slot)
+            aoi = float(tracker["aoi"])
+            energy_astro = max(0.0, math.cos(math.radians(aoi))) * 0.25
+            slot_dict = {
+                "time_str": ts.strftime("%H:%M"),
+                "hour": hour,
+                "temp_c": temp_c,
+                "ghi": ghi,
+                "solar_elevation": elev,
+                "solar_azimuth": azim,
+                "astro_tilt": astro_tilt,
+                "gate_passed": gate_passed,
+                "gate_reason": gate_reason,
+                "energy_astro_kwh": energy_astro,
+                "crop_multiplier": crop_multiplier,
+            }
+            # Attach baseline A if available (from BaselinePredictor)
+            if baseline_a is not None and i < len(baseline_a):
+                slot_dict["baseline_a"] = baseline_a[i]
+            slots.append(slot_dict)
+        return slots
+    def _check_gate(
+        self,
+        temp_c: float,
+        ghi: float,
+        hour: float,
+    ) -> tuple[bool, str]:
+        """Simplified gate check using forecast data.
+        Uses the same thresholds as InterventionGate but without sensor data.
+        CWSI is estimated from temperature (proxy).
+        """
+        # No shade before configured hour
+        if hour < NO_SHADE_BEFORE_HOUR:
+            return False, f"before_{NO_SHADE_BEFORE_HOUR}:00"
+        # Temperature below Rubisco transition
+        if temp_c < SHADE_ELIGIBLE_TLEAF_ABOVE:
+            return False, f"temp_{temp_c:.0f}C_below_threshold"
+        # GHI below meaningful radiation
+        if ghi < SHADE_ELIGIBLE_GHI_ABOVE:
+            return False, f"ghi_{ghi:.0f}_below_threshold"
+        # CWSI proxy from temperature (simplified: T>35 → stressed)
+        cwsi_proxy = max(0.0, min(1.0, (temp_c - 30.0) / 10.0))
+        if cwsi_proxy < SHADE_ELIGIBLE_CWSI_ABOVE:
+            return False, f"cwsi_proxy_{cwsi_proxy:.2f}_below_threshold"
+        # FvCB shading_helps: above transition temp + high GHI = Rubisco-limited
+        shading_helps = temp_c >= SEMILLON_TRANSITION_TEMP_C and ghi >= 400
+        if not shading_helps:
+            return False, "fvcb_shading_not_helpful"
+        return True, "gate_passed"
+    # ------------------------------------------------------------------
+    # DP optimizer
+    # ------------------------------------------------------------------
+    def _dp_optimize(
+        self,
+        slots_info: List[dict],
+        offsets: List[float],
+        daily_budget_kwh: float,
+    ) -> List[SlotPlan]:
+        """Dynamic programming over slots × offsets with budget constraint.
+        State: (slot_index, offset_index)
+        Constraint: cumulative energy cost ≤ daily_budget_kwh
+        Objective: maximise total utility (energy revenue + crop protection − movement cost)
+        """
+        n_slots = len(slots_info)
+        n_offsets = len(offsets)
+        # Discretise budget into steps for tractable DP
+        budget_steps = 100
+        budget_per_step = daily_budget_kwh / budget_steps if daily_budget_kwh > 0 else 0.001
+        # DP table: V[t][o][b] = best utility from slot t onwards
+        #   with offset o at slot t and b budget steps remaining
+        # Use forward pass to fill, then backtrack.
+        INF = float("-inf")
+        # Pre-compute per-slot utilities for each offset
+        slot_utilities = []   # [slot][offset] → (utility, energy_cost)
+        for si in slots_info:
+            utils_for_slot = []
+            for offset in offsets:
+                u, cost = self._slot_utility(si, offset)
+                utils_for_slot.append((u, cost))
+            slot_utilities.append(utils_for_slot)
+        # Forward DP
+        # V[t][o][b] = max total utility achievable from slots 0..t
+        #   ending at offset o with b budget steps consumed
+        V = np.full((n_slots, n_offsets, budget_steps + 1), INF)
+        choice = np.full((n_slots, n_offsets, budget_steps + 1), -1, dtype=int)
+        # Initialize slot 0
+        for oi, offset in enumerate(offsets):
+            if not slots_info[0]["gate_passed"] and offset > 0:
+                continue  # gate blocked
+            u, cost = slot_utilities[0][oi]
+            b_used = int(math.ceil(cost / budget_per_step)) if cost > 0 else 0
+            if b_used <= budget_steps:
+                V[0, oi, b_used] = u
+        # Fill forward
+        for t in range(1, n_slots):
+            gate_passed = slots_info[t]["gate_passed"]
+            for oi, offset in enumerate(offsets):
+                if not gate_passed and offset > 0:
+                    continue  # gate blocked — only offset=0 allowed
+                u_t, cost_t = slot_utilities[t][oi]
+                b_cost = int(math.ceil(cost_t / budget_per_step)) if cost_t > 0 else 0
+                for prev_oi, prev_offset in enumerate(offsets):
+                    # Movement cost between consecutive offsets
+                    move_penalty = self.movement_cost * abs(offset - prev_offset)
+                    for b_prev in range(budget_steps + 1):
+                        if V[t - 1, prev_oi, b_prev] == INF:
+                            continue
+                        b_total = b_prev + b_cost
+                        if b_total > budget_steps:
+                            continue  # budget exceeded
+                        val = V[t - 1, prev_oi, b_prev] + u_t - move_penalty
+                        if val > V[t, oi, b_total]:
+                            V[t, oi, b_total] = val
+                            choice[t, oi, b_total] = prev_oi
+        # Backtrack: find best final state
+        best_val = INF
+        best_oi = 0
+        best_b = 0
+        for oi in range(n_offsets):
+            for b in range(budget_steps + 1):
+                if V[n_slots - 1, oi, b] > best_val:
+                    best_val = V[n_slots - 1, oi, b]
+                    best_oi = oi
+                    best_b = b
+        # Trace back the path
+        path = [0] * n_slots
+        path[n_slots - 1] = best_oi
+        current_b = best_b
+        for t in range(n_slots - 1, 0, -1):
+            prev_oi = choice[t, path[t], current_b]
+            if prev_oi < 0:
+                prev_oi = 0  # fallback to astronomical
+            # Recover budget used at slot t
+            _, cost_t = slot_utilities[t][path[t]]
+            b_cost = int(math.ceil(cost_t / budget_per_step)) if cost_t > 0 else 0
+            current_b = max(0, current_b - b_cost)
+            path[t - 1] = prev_oi
+        # Build SlotPlan list
+        planned: List[SlotPlan] = []
+        for t, si in enumerate(slots_info):
+            oi = path[t]
+            offset = offsets[oi]
+            _, cost = slot_utilities[t][oi]
+            tags = []
+            if not si["gate_passed"]:
+                tags.append(f"gate_blocked:{si['gate_reason']}")
+            elif offset > 0:
+                tags.append(f"intervention:{offset}deg")
+            else:
+                tags.append("full_tracking")
+            planned.append(SlotPlan(
+                time=si["time_str"],
+                offset_deg=offset,
+                energy_cost_kwh=round(cost, 6),
+                gate_passed=si["gate_passed"],
+                tags=tags,
+            ))
+        return planned
+    def _slot_utility(self, si: dict, offset_deg: float) -> tuple[float, float]:
+        """Compute utility and energy cost for a slot at a given offset.
+        Utility = energy_revenue + crop_protection_value
+        Energy cost = energy_astro − energy_at_offset (kWh)
+        Returns (utility, energy_cost_kwh).
+        """
+        energy_astro = si["energy_astro_kwh"]
+        # Energy at offset: cos(AOI + offset) approximation
+        sacrifice_frac = 1.0 - math.cos(math.radians(offset_deg))
+        energy_at_offset = energy_astro * (1.0 - sacrifice_frac)
+        energy_cost = energy_astro - energy_at_offset  # kWh sacrificed
+        # Energy revenue (ILS)
+        energy_revenue = energy_at_offset * self.energy_price
+        # Crop protection value: non-zero only when gate passes and offset > 0
+        crop_value = 0.0
+        if si["gate_passed"] and offset_deg > 0:
+            # Higher offset → more shade → more crop protection (diminishing returns)
+            shade_benefit = math.sqrt(offset_deg / 20.0)  # diminishing returns
+            if "baseline_a" in si and si["baseline_a"] > 0:
+                # Use actual photosynthesis prediction for stress severity.
+                # Higher A under full sun means more to protect; the benefit of
+                # shading scales with how much photosynthesis is at risk.
+                baseline_a = si["baseline_a"]
+                # Normalize: A ~ 10-20 µmol typical → severity 1.0-2.0
+                stress_severity = baseline_a / 10.0
+            else:
+                # Fallback: temperature heuristic
+                stress_severity = max(0.0, si["temp_c"] - SEMILLON_TRANSITION_TEMP_C) / 10.0
+            crop_value = (
+                self.crop_value
+                * si["crop_multiplier"]
+                * stress_severity
+                * shade_benefit
+            )
+        utility = energy_revenue + crop_value
+        return utility, energy_cost
+    # ------------------------------------------------------------------
+    # Helpers
+    # ------------------------------------------------------------------
+    @staticmethod
+    def _get_crop_multiplier(stage_id: str) -> float:
+        """Map phenological stage ID to crop value multiplier."""
+        # Map stage IDs to the STAGE_CROP_MULTIPLIER keys
+        stage_map = {
+            "budburst_vegetative": "pre_flowering",
+            "flowering_fruit_set": "fruit_set",
+            "berry_growth": "fruit_set",
+            "veraison_ripening": "veraison",
+            "post_harvest_reserves": "post_harvest",
+            "winter_dormancy": "post_harvest",
+        }
+        mapped = stage_map.get(stage_id, "fruit_set")
+        return STAGE_CROP_MULTIPLIER.get(mapped, 1.0)
+# ---------------------------------------------------------------------------
+# CLI smoke test
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    from src.shading.solar_geometry import ShadowModel
+    shadow = ShadowModel()
+    planner = DayAheadPlanner(shadow_model=shadow)
+    # Simulate a hot July day in Sde Boker
+    test_date = date(2025, 7, 15)
+    # Generate synthetic forecast: sinusoidal temperature peaking at 38°C at 14:00 UTC
+    temps = []
+    ghis = []
+    for slot in range(96):
+        hour = slot * 0.25
+        # Temperature: 25°C at night, peaks at 38°C around 11:00 UTC (14:00 local)
+        t = 25.0 + 13.0 * max(0, math.sin(math.pi * (hour - 5) / 14)) if 5 <= hour <= 19 else 25.0
+        temps.append(t)
+        # GHI: 0 at night, peaks at 950 W/m² at solar noon (~9:40 UTC)
+        g = max(0, 950 * math.sin(math.pi * (hour - 4) / 12)) if 4 <= hour <= 16 else 0.0
+        ghis.append(g)
+    plan = planner.plan_day(
+        target_date=test_date,
+        forecast_temps=temps,
+        forecast_ghi=ghis,
+        daily_budget_kwh=2.0,  # typical daily budget from EnergyBudgetPlanner
+    )
+    print(f"Day-Ahead Plan for {plan.target_date}")
+    print(f"  Stage: {plan.stage_id}")
+    print(f"  Budget: {plan.daily_budget_kwh:.2f} kWh")
+    print(f"  Total cost: {plan.total_energy_cost_kwh:.4f} kWh ({plan.budget_utilisation_pct:.1f}%)")
+    print(f"  Intervention slots: {plan.n_intervention_slots}/{len(plan.slots)}")
+    print()
+    print(f"  {'Time':>5}  {'Offset':>7}  {'Cost':>10}  {'Gate':>6}  Tags")
+    print(f"  {'-' * 60}")
+    for s in plan.slots:
+        status = "PASS" if s.gate_passed else "BLOCK"
+        print(f"  {s.time:>5}  {s.offset_deg:>5.0f}°  {s.energy_cost_kwh:>10.6f}  {status:>6}  {', '.join(s.tags)}")

src/energy_budget.py ADDED Viewed

	@@ -0,0 +1,309 @@

+"""
+EnergyBudgetPlanner: hierarchical energy sacrifice budget for agrivoltaic control.
+Budget hierarchy:
+  Annual → Monthly → Weekly → Daily → 15-min Slot
+The system defaults to full astronomical tracking (max energy). Shading
+interventions draw from a tight budget (default 5% of annual generation).
+Budget is pre-allocated down the hierarchy so that hot days/hours get more,
+and the system never overspends.
+References:
+  - config/settings.py for all thresholds and weights
+  - context/2_plan.md §3.1 for design rationale
+"""
+from __future__ import annotations
+from datetime import date, timedelta
+from typing import Optional
+import numpy as np
+import pandas as pd
+from config.settings import (
+    ANNUAL_RESERVE_PCT,
+    DAILY_MARGIN_PCT,
+    MAX_ENERGY_REDUCTION_PCT,
+    MONTHLY_BUDGET_WEIGHTS,
+    NO_SHADE_BEFORE_HOUR,
+    WEEKLY_RESERVE_PCT,
+)
+class EnergyBudgetPlanner:
+    """Hierarchical energy sacrifice budget for agrivoltaic shading control.
+    Parameters
+    ----------
+    max_energy_reduction_pct : float
+        Maximum fraction of annual PV generation the vines can "spend" on
+        shading (default from config: 5%).
+    shadow_model : object, optional
+        ShadowModel instance used to estimate slot-level energy potential.
+        If None, annual plan uses a simplified analytical estimate.
+    """
+    def __init__(
+        self,
+        max_energy_reduction_pct: float = MAX_ENERGY_REDUCTION_PCT,
+        shadow_model=None,
+    ):
+        self.max_pct = max_energy_reduction_pct
+        self.shadow = shadow_model
+    # ------------------------------------------------------------------
+    # Annual plan
+    # ------------------------------------------------------------------
+    def compute_annual_plan(self, year: int) -> dict:
+        """Compute seasonal energy potential and allocate monthly budgets.
+        Iterates every 15-min slot from May 1 to Sep 30, computing energy
+        under astronomical tracking.  Then distributes the sacrifice budget
+        across months using MONTHLY_BUDGET_WEIGHTS.
+        Returns dict with:
+          year, total_potential_kWh, total_budget_kWh, annual_reserve_kWh,
+          monthly_budgets (dict[int, float]), budget_spent_kWh
+        """
+        season_start = pd.Timestamp(f"{year}-05-01", tz="UTC")
+        season_end = pd.Timestamp(f"{year}-09-30 23:45", tz="UTC")
+        times = pd.date_range(season_start, season_end, freq="15min")
+        if self.shadow is not None:
+            energy_per_slot = self._energy_from_shadow_model(times)
+        else:
+            energy_per_slot = self._energy_analytical(times)
+        total_potential = float(np.sum(energy_per_slot))
+        total_budget = total_potential * self.max_pct / 100.0
+        annual_reserve = total_budget * ANNUAL_RESERVE_PCT / 100.0
+        distributable = total_budget - annual_reserve
+        monthly_budgets = {
+            month: distributable * weight
+            for month, weight in MONTHLY_BUDGET_WEIGHTS.items()
+        }
+        return {
+            "year": year,
+            "total_potential_kWh": round(total_potential, 2),
+            "total_budget_kWh": round(total_budget, 2),
+            "annual_reserve_kWh": round(annual_reserve, 2),
+            "monthly_budgets": {m: round(v, 4) for m, v in monthly_budgets.items()},
+            "budget_spent_kWh": 0.0,
+        }
+    def _energy_from_shadow_model(self, times: pd.DatetimeIndex) -> np.ndarray:
+        """Estimate per-slot energy using the ShadowModel's solar position."""
+        solar_pos = self.shadow.get_solar_position(times)
+        energy = []
+        for _, sp in solar_pos.iterrows():
+            if sp["solar_elevation"] <= 0:
+                energy.append(0.0)
+                continue
+            tracker = self.shadow.compute_tracker_tilt(
+                sp["solar_azimuth"], sp["solar_elevation"]
+            )
+            # cos(AOI) × 0.25h slot duration → kWh per kWp
+            e = max(0.0, np.cos(np.radians(tracker["aoi"]))) * 0.25
+            energy.append(e)
+        return np.array(energy)
+    @staticmethod
+    def _energy_analytical(times: pd.DatetimeIndex) -> np.ndarray:
+        """Simplified analytical estimate when no ShadowModel is available.
+        Vectorized: computes all ~15k slots in one numpy pass.
+        Uses a sinusoidal day profile peaking at solar noon.  Good enough
+        for budget planning; not used for real-time control.
+        """
+        from config.settings import SITE_LATITUDE
+        hour_utc = times.hour + times.minute / 60.0
+        solar_noon_utc = 12.0 - 34.8 / 15.0  # ≈ 9.68 UTC
+        hour_angle = (hour_utc - solar_noon_utc) * 15.0  # degrees
+        lat_rad = np.radians(SITE_LATITUDE)
+        doy = times.dayofyear
+        decl_rad = np.radians(23.45 * np.sin(np.radians(360.0 / 365.0 * (doy - 81))))
+        ha_rad = np.radians(hour_angle)
+        sin_elev = (
+            np.sin(lat_rad) * np.sin(decl_rad)
+            + np.cos(lat_rad) * np.cos(decl_rad) * np.cos(ha_rad)
+        )
+        # Astronomical tracking → AOI ≈ 0 → cos(AOI) ≈ 1
+        # Scale by clearness (~0.75 for Sde Boker) and slot duration (0.25h)
+        return np.where(sin_elev > 0, sin_elev * 0.75 * 0.25, 0.0)
+    # ------------------------------------------------------------------
+    # Weekly plan
+    # ------------------------------------------------------------------
+    def compute_weekly_plan(
+        self,
+        week_start: pd.Timestamp | date,
+        monthly_remaining: float,
+        forecast_tmax: Optional[list[float]] = None,
+        rollover: float = 0.0,
+    ) -> dict:
+        """Distribute weekly budget to days, weighted by (Tmax - 30)².
+        Days with forecast Tmax < 30°C get zero allocation (no stress
+        expected).  Hot days get quadratically more budget.
+        Parameters
+        ----------
+        week_start : date-like
+            First day of the week.
+        monthly_remaining : float
+            Remaining monthly budget (kWh).
+        forecast_tmax : list of 7 floats, optional
+            Forecast daily maximum temperature for each day of the week.
+            If None, budget is split evenly.
+        rollover : float
+            Unspent budget rolled over from the previous week.
+        Returns dict with:
+          weekly_total_kWh, weekly_reserve_kWh, daily_budgets_kWh (list[7])
+        """
+        if not isinstance(week_start, pd.Timestamp):
+            week_start = pd.Timestamp(week_start)
+        month = week_start.month
+        # Estimate weeks remaining in the month
+        if month == 12:
+            month_end = pd.Timestamp(f"{week_start.year}-12-31")
+        elif month == 9:
+            month_end = pd.Timestamp(f"{week_start.year}-09-30")
+        else:
+            month_end = pd.Timestamp(
+                f"{week_start.year}-{month + 1:02d}-01"
+            ) - timedelta(days=1)
+        days_left = max(1, (month_end - week_start).days)
+        weeks_left = max(1, days_left // 7)
+        weekly_raw = monthly_remaining / weeks_left + rollover
+        weekly_reserve = weekly_raw * WEEKLY_RESERVE_PCT / 100.0
+        distributable = weekly_raw - weekly_reserve
+        if forecast_tmax is not None and len(forecast_tmax) == 7:
+            weights = [max(0.0, t - 30.0) ** 2 for t in forecast_tmax]
+            total_w = sum(weights)
+            if total_w > 0:
+                daily = [distributable * w / total_w for w in weights]
+            else:
+                daily = [0.0] * 7  # all days < 30°C → no budget needed
+        else:
+            daily = [distributable / 7.0] * 7
+        return {
+            "weekly_total_kWh": round(weekly_raw, 4),
+            "weekly_reserve_kWh": round(weekly_reserve, 4),
+            "daily_budgets_kWh": [round(d, 4) for d in daily],
+        }
+    # ------------------------------------------------------------------
+    # Daily plan
+    # ------------------------------------------------------------------
+    def compute_daily_plan(
+        self,
+        day: date | pd.Timestamp,
+        daily_budget: float,
+        rollover: float = 0.0,
+    ) -> dict:
+        """Distribute daily budget to 15-min slots.
+        Zero before NO_SHADE_BEFORE_HOUR (10:00).  Peak allocation at
+        11:00–14:00 (60% of planned budget).
+        Returns dict with:
+          date, daily_total_kWh, daily_margin_kWh, daily_margin_remaining_kWh,
+          slot_budgets (dict[str, float]), cumulative_spent
+        """
+        daily_raw = daily_budget + rollover
+        daily_margin = daily_raw * DAILY_MARGIN_PCT / 100.0
+        planned = daily_raw - daily_margin
+        # Time blocks with their share of the planned budget.
+        # The non-zero weights must sum to 1.0.
+        transition_end = max(NO_SHADE_BEFORE_HOUR + 1, 11)
+        blocks = [
+            ((5, NO_SHADE_BEFORE_HOUR), 0.00),         # morning — no shade
+            ((NO_SHADE_BEFORE_HOUR, transition_end), 0.05),  # transition
+            ((transition_end, 14), 0.60),               # peak stress window
+            ((14, 16), 0.30),                            # sustained heat
+            ((16, 20), 0.05),                            # rare late stress
+        ]
+        slot_budgets: dict[str, float] = {}
+        for (h_start, h_end), weight in blocks:
+            block_budget = planned * weight
+            n_slots = (h_end - h_start) * 4  # 4 slots per hour
+            per_slot = block_budget / n_slots if n_slots > 0 else 0.0
+            for h in range(h_start, h_end):
+                for m in (0, 15, 30, 45):
+                    slot_budgets[f"{h:02d}:{m:02d}"] = round(per_slot, 6)
+        return {
+            "date": str(day),
+            "daily_total_kWh": round(daily_raw, 4),
+            "daily_margin_kWh": round(daily_margin, 4),
+            "daily_margin_remaining_kWh": round(daily_margin, 4),
+            "slot_budgets": slot_budgets,
+            "cumulative_spent": 0.0,
+        }
+    # ------------------------------------------------------------------
+    # Slot-level execution helpers
+    # ------------------------------------------------------------------
+    def spend_slot(self, daily_plan: dict, slot_key: str, amount: float) -> float:
+        """Deduct energy from a slot's budget. Returns amount actually spent.
+        If the slot budget is insufficient, draws from the daily margin.
+        """
+        available = daily_plan["slot_budgets"].get(slot_key, 0.0)
+        if amount <= available:
+            daily_plan["slot_budgets"][slot_key] -= amount
+            daily_plan["cumulative_spent"] += amount
+            return amount
+        # Slot budget exhausted — try daily margin
+        shortfall = amount - available
+        margin = daily_plan["daily_margin_remaining_kWh"]
+        from_margin = min(shortfall, margin)
+        total_spent = available + from_margin
+        daily_plan["slot_budgets"][slot_key] = 0.0
+        daily_plan["daily_margin_remaining_kWh"] -= from_margin
+        daily_plan["cumulative_spent"] += total_spent
+        return round(total_spent, 6)
+    def emergency_draw(self, annual_plan: dict, amount: float) -> float:
+        """Draw from annual reserve for extreme heat events.
+        Returns the amount actually drawn (may be less than requested if
+        the reserve is depleted).
+        """
+        available = annual_plan["annual_reserve_kWh"]
+        drawn = min(amount, available)
+        annual_plan["annual_reserve_kWh"] = round(available - drawn, 4)
+        annual_plan["budget_spent_kWh"] = round(
+            annual_plan["budget_spent_kWh"] + drawn, 4
+        )
+        return round(drawn, 4)
+    # ------------------------------------------------------------------
+    # Rollover helper
+    # ------------------------------------------------------------------
+    def compute_daily_rollover(self, daily_plan: dict) -> float:
+        """Compute unspent budget at end of day (available for next day)."""
+        unspent_slots = sum(daily_plan["slot_budgets"].values())
+        unspent_margin = daily_plan["daily_margin_remaining_kWh"]
+        return round(unspent_slots + unspent_margin, 4)