Eli Safra commited on
Commit
938949f
·
1 Parent(s): a04833b

Deploy SolarWine API (FastAPI + Docker, port 7860)

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Dockerfile +27 -0
  2. README.md +22 -7
  3. backend/__init__.py +0 -0
  4. backend/api/__init__.py +0 -0
  5. backend/api/auth.py +120 -0
  6. backend/api/deps.py +21 -0
  7. backend/api/main.py +120 -0
  8. backend/api/routes/__init__.py +0 -0
  9. backend/api/routes/biology.py +38 -0
  10. backend/api/routes/chatbot.py +79 -0
  11. backend/api/routes/control.py +77 -0
  12. backend/api/routes/energy.py +45 -0
  13. backend/api/routes/health.py +49 -0
  14. backend/api/routes/login.py +58 -0
  15. backend/api/routes/photosynthesis.py +30 -0
  16. backend/api/routes/sensors.py +47 -0
  17. backend/api/routes/weather.py +48 -0
  18. backend/requirements.txt +7 -0
  19. backend/workers/__init__.py +0 -0
  20. backend/workers/control_tick.py +124 -0
  21. backend/workers/daily_planner.py +72 -0
  22. config/settings.py +204 -0
  23. requirements.txt +18 -0
  24. src/__init__.py +52 -0
  25. src/advisor/__init__.py +1 -0
  26. src/advisor/day_ahead_advisor.py +632 -0
  27. src/advisor/safety_rails.py +179 -0
  28. src/baseline_predictor.py +248 -0
  29. src/canopy_photosynthesis.py +2 -0
  30. src/chatbot/__init__.py +1 -0
  31. src/chatbot/feedback.py +104 -0
  32. src/chatbot/guardrails.py +363 -0
  33. src/chatbot/llm_data_engineer.py +559 -0
  34. src/chatbot/routing_agent.py +233 -0
  35. src/chatbot/vineyard_chatbot.py +939 -0
  36. src/chronos_forecaster.py +2 -0
  37. src/command_arbiter.py +327 -0
  38. src/control_loop.py +779 -0
  39. src/data/__init__.py +1 -0
  40. src/data/data_providers.py +1180 -0
  41. src/data/data_schema.py +519 -0
  42. src/data/ims_client.py +215 -0
  43. src/data/redis_cache.py +152 -0
  44. src/data/sensor_data_loader.py +87 -0
  45. src/data/thingsboard_client.py +1058 -0
  46. src/data_providers.py +2 -0
  47. src/data_schema.py +2 -0
  48. src/day_ahead_advisor.py +2 -0
  49. src/day_ahead_planner.py +580 -0
  50. src/energy_budget.py +309 -0
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install dependencies first (layer caching)
6
+ COPY requirements.txt .
7
+ COPY backend/requirements.txt backend/
8
+ RUN pip install --no-cache-dir -r requirements.txt -r backend/requirements.txt
9
+
10
+ # Non-root user for security
11
+ RUN groupadd -r solarwine && useradd -r -g solarwine solarwine
12
+
13
+ # Copy application code (no Data/ — mount or fetch at runtime)
14
+ COPY src/ src/
15
+ COPY config/ config/
16
+ COPY backend/ backend/
17
+
18
+ ENV PYTHONPATH=/app
19
+
20
+ # Switch to non-root
21
+ USER solarwine
22
+
23
+ # HuggingFace Spaces requires port 7860
24
+ EXPOSE 7860
25
+ HEALTHCHECK --interval=30s --timeout=5s --retries=3 \
26
+ CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7860/api/health')" || exit 1
27
+ CMD ["uvicorn", "backend.api.main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,11 +1,26 @@
1
  ---
2
- title: Api
3
- emoji: 👁
4
- colorFrom: indigo
5
- colorTo: pink
6
  sdk: docker
7
- pinned: false
8
- license: mit
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: SolarWine API
3
+ emoji: 🌿
4
+ colorFrom: green
5
+ colorTo: yellow
6
  sdk: docker
7
+ app_port: 7860
8
+ private: true
9
  ---
10
 
11
+ # SolarWine API
12
+
13
+ FastAPI backend for the SolarWine agrivoltaic vineyard control system.
14
+
15
+ ## Endpoints
16
+
17
+ - `GET /api/health` — health check
18
+ - `GET /api/weather/current` — current weather (IMS station 43)
19
+ - `GET /api/sensors/snapshot` — vine sensor readings (ThingsBoard)
20
+ - `GET /api/energy/current` — current power output
21
+ - `GET /api/photosynthesis/current` — photosynthesis rate (FvCB/ML)
22
+ - `GET /api/control/status` — last control loop tick
23
+ - `POST /api/chatbot/message` — AI vineyard advisor
24
+ - `GET /api/biology/rules` — biology rules
25
+
26
+ Interactive docs at `/docs`.
backend/__init__.py ADDED
File without changes
backend/api/__init__.py ADDED
File without changes
backend/api/auth.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ JWT authentication for the SolarWine API.
3
+
4
+ Initially optional — endpoints work without auth.
5
+ Enable by setting JWT_SECRET in environment.
6
+
7
+ Usage in routes::
8
+
9
+ from backend.api.auth import require_auth
10
+
11
+ @router.get("/protected")
12
+ async def protected(user: dict = Depends(require_auth)):
13
+ return {"user": user}
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import os
19
+ import time
20
+ import logging
21
+ from typing import Optional
22
+
23
+ from fastapi import Depends, HTTPException, status
24
+ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
25
+
26
+ log = logging.getLogger(__name__)
27
+
28
+ _security = HTTPBearer(auto_error=False)
29
+
30
+ JWT_SECRET = os.environ.get("JWT_SECRET", "")
31
+ JWT_ALGORITHM = "HS256"
32
+ JWT_EXPIRY_HOURS = 24
33
+
34
+ if not JWT_SECRET:
35
+ log.warning("JWT_SECRET not set — authentication is DISABLED (all requests get guest/admin access)")
36
+
37
+
38
+ def _get_jwt():
39
+ """Lazy import PyJWT."""
40
+ try:
41
+ import jwt
42
+ return jwt
43
+ except ImportError:
44
+ log.warning("PyJWT not installed — auth disabled")
45
+ return None
46
+
47
+
48
+ # ---------------------------------------------------------------------------
49
+ # Token creation
50
+ # ---------------------------------------------------------------------------
51
+
52
+ def create_token(username: str, role: str = "user") -> Optional[str]:
53
+ """Create a signed JWT token."""
54
+ jwt = _get_jwt()
55
+ if not jwt or not JWT_SECRET:
56
+ return None
57
+ payload = {
58
+ "sub": username,
59
+ "role": role,
60
+ "iat": int(time.time()),
61
+ "exp": int(time.time()) + JWT_EXPIRY_HOURS * 3600,
62
+ }
63
+ return jwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM)
64
+
65
+
66
+ # ---------------------------------------------------------------------------
67
+ # Token validation (FastAPI dependency)
68
+ # ---------------------------------------------------------------------------
69
+
70
+ async def require_auth(
71
+ credentials: Optional[HTTPAuthorizationCredentials] = Depends(_security),
72
+ ) -> dict:
73
+ """Validate JWT and return payload. Raises 401 if invalid.
74
+
75
+ When JWT_SECRET is not set, auth is bypassed (returns guest user).
76
+ """
77
+ # Auth disabled — allow all
78
+ if not JWT_SECRET:
79
+ return {"sub": "guest", "role": "admin"}
80
+
81
+ if not credentials:
82
+ raise HTTPException(
83
+ status_code=status.HTTP_401_UNAUTHORIZED,
84
+ detail="Missing authorization header",
85
+ headers={"WWW-Authenticate": "Bearer"},
86
+ )
87
+
88
+ jwt = _get_jwt()
89
+ if not jwt:
90
+ return {"sub": "guest", "role": "admin"}
91
+
92
+ try:
93
+ payload = jwt.decode(
94
+ credentials.credentials,
95
+ JWT_SECRET,
96
+ algorithms=[JWT_ALGORITHM],
97
+ )
98
+ return payload
99
+ except jwt.ExpiredSignatureError:
100
+ raise HTTPException(
101
+ status_code=status.HTTP_401_UNAUTHORIZED,
102
+ detail="Token expired",
103
+ )
104
+ except jwt.InvalidTokenError:
105
+ raise HTTPException(
106
+ status_code=status.HTTP_401_UNAUTHORIZED,
107
+ detail="Invalid token",
108
+ )
109
+
110
+
111
+ async def optional_auth(
112
+ credentials: Optional[HTTPAuthorizationCredentials] = Depends(_security),
113
+ ) -> dict:
114
+ """Like require_auth but returns guest if no token provided or invalid."""
115
+ if not credentials or not JWT_SECRET:
116
+ return {"sub": "guest", "role": "guest"}
117
+ try:
118
+ return await require_auth(credentials)
119
+ except HTTPException:
120
+ return {"sub": "guest", "role": "guest"}
backend/api/deps.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Shared FastAPI dependencies — DataHub singleton + Redis.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from functools import lru_cache
8
+
9
+ from src.data.data_providers import DataHub
10
+ from src.data.redis_cache import get_redis
11
+
12
+
13
+ @lru_cache(maxsize=1)
14
+ def get_datahub() -> DataHub:
15
+ """Return a singleton DataHub (all services with Redis-backed caches)."""
16
+ return DataHub.default()
17
+
18
+
19
+ def get_redis_client():
20
+ """Return the Redis client (or None)."""
21
+ return get_redis()
backend/api/main.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FastAPI application — SolarWine API Gateway.
3
+
4
+ Deployed on HuggingFace Spaces (Docker SDK, port 7860).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ import os
11
+ import time
12
+ from contextlib import asynccontextmanager
13
+
14
+ from fastapi import FastAPI, Request, Response
15
+ from fastapi.middleware.cors import CORSMiddleware
16
+ from slowapi import Limiter, _rate_limit_exceeded_handler
17
+ from slowapi.errors import RateLimitExceeded
18
+ from slowapi.util import get_remote_address
19
+
20
+ from backend.api.routes import health, weather, sensors, energy, photosynthesis, control, chatbot, biology, login
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Structured logging
24
+ # ---------------------------------------------------------------------------
25
+
26
+ logging.basicConfig(
27
+ level=logging.INFO,
28
+ format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
29
+ )
30
+ log = logging.getLogger("solarwine.api")
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Sentry (optional — set SENTRY_DSN env var to enable)
34
+ # ---------------------------------------------------------------------------
35
+
36
+ _sentry_dsn = os.environ.get("SENTRY_DSN", "")
37
+ if _sentry_dsn:
38
+ try:
39
+ import sentry_sdk
40
+ from sentry_sdk.integrations.fastapi import FastApiIntegration
41
+ from sentry_sdk.integrations.starlette import StarletteIntegration
42
+ sentry_sdk.init(
43
+ dsn=_sentry_dsn,
44
+ integrations=[StarletteIntegration(), FastApiIntegration()],
45
+ traces_sample_rate=0.1,
46
+ environment=os.environ.get("SENTRY_ENV", "production"),
47
+ )
48
+ log.info("Sentry enabled (env=%s)", os.environ.get("SENTRY_ENV", "production"))
49
+ except ImportError:
50
+ log.warning("SENTRY_DSN set but sentry-sdk not installed — skipping")
51
+
52
+ # ---------------------------------------------------------------------------
53
+ # Lifespan — one-time startup / shutdown
54
+ # ---------------------------------------------------------------------------
55
+
56
+ _start_time: float = 0.0
57
+
58
+
59
+ @asynccontextmanager
60
+ async def lifespan(app: FastAPI):
61
+ global _start_time
62
+ _start_time = time.time()
63
+ log.info("SolarWine API starting (port 7860)")
64
+ yield
65
+ log.info("SolarWine API shutting down (uptime=%.0fs)", get_uptime())
66
+
67
+
68
+ def get_uptime() -> float:
69
+ return time.time() - _start_time
70
+
71
+
72
+ # ---------------------------------------------------------------------------
73
+ # App
74
+ # ---------------------------------------------------------------------------
75
+
76
+ limiter = Limiter(key_func=get_remote_address, default_limits=["60/minute"])
77
+
78
+ app = FastAPI(
79
+ title="SolarWine API",
80
+ version="0.1.0",
81
+ description="Agrivoltaic vineyard control system API",
82
+ lifespan=lifespan,
83
+ )
84
+ app.state.limiter = limiter
85
+ app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
86
+
87
+
88
+ # -- Request logging --------------------------------------------------------
89
+
90
+ @app.middleware("http")
91
+ async def log_requests(request: Request, call_next):
92
+ start = time.time()
93
+ response = await call_next(request)
94
+ duration = (time.time() - start) * 1000
95
+ if request.url.path != "/api/health": # skip noisy health checks
96
+ log.info("%s %s %d %.0fms", request.method, request.url.path, response.status_code, duration)
97
+ return response
98
+
99
+ # -- CORS -------------------------------------------------------------------
100
+
101
+ allowed_origins = os.environ.get("ALLOWED_ORIGINS", "http://localhost:3000,http://localhost:5173").split(",")
102
+ app.add_middleware(
103
+ CORSMiddleware,
104
+ allow_origins=[o.strip() for o in allowed_origins],
105
+ allow_credentials=True,
106
+ allow_methods=["GET", "POST", "OPTIONS"],
107
+ allow_headers=["Content-Type", "Authorization"],
108
+ )
109
+
110
+ # -- Routes -----------------------------------------------------------------
111
+
112
+ app.include_router(health.router, prefix="/api", tags=["health"])
113
+ app.include_router(weather.router, prefix="/api/weather", tags=["weather"])
114
+ app.include_router(sensors.router, prefix="/api/sensors", tags=["sensors"])
115
+ app.include_router(energy.router, prefix="/api/energy", tags=["energy"])
116
+ app.include_router(photosynthesis.router, prefix="/api/photosynthesis", tags=["photosynthesis"])
117
+ app.include_router(control.router, prefix="/api/control", tags=["control"])
118
+ app.include_router(chatbot.router, prefix="/api/chatbot", tags=["chatbot"])
119
+ app.include_router(biology.router, prefix="/api/biology", tags=["biology"])
120
+ app.include_router(login.router, prefix="/api/auth", tags=["auth"])
backend/api/routes/__init__.py ADDED
File without changes
backend/api/routes/biology.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Biology endpoints — wraps BiologyService."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+
7
+ from fastapi import APIRouter, Depends
8
+ from fastapi.responses import JSONResponse
9
+
10
+ from backend.api.deps import get_datahub
11
+ from src.data.data_providers import DataHub
12
+
13
+ log = logging.getLogger(__name__)
14
+ router = APIRouter()
15
+
16
+
17
+ @router.get("/phenology")
18
+ async def phenology(hub: DataHub = Depends(get_datahub)):
19
+ """Current phenological stage (GDD-based)."""
20
+ try:
21
+ from src.models.phenology import estimate_stage_combined
22
+ stage = estimate_stage_combined()
23
+ return {"stage": stage.name if hasattr(stage, "name") else str(stage)}
24
+ except Exception as exc:
25
+ log.error("Phenology estimation failed: %s", exc)
26
+ return JSONResponse(status_code=500, content={"error": "Phenology estimation failed"})
27
+
28
+
29
+ @router.get("/rules")
30
+ async def biology_rules(hub: DataHub = Depends(get_datahub)):
31
+ """List all biology rules."""
32
+ return hub.biology.list_rules()
33
+
34
+
35
+ @router.get("/rules/{rule_name}")
36
+ async def biology_rule_detail(rule_name: str, hub: DataHub = Depends(get_datahub)):
37
+ """Explain a specific biology rule."""
38
+ return hub.biology.explain_rule(rule_name)
backend/api/routes/chatbot.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Chatbot endpoints — wraps VineyardChatbot."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import threading
7
+
8
+ from pydantic import BaseModel, Field
9
+ from fastapi import APIRouter, Depends, Request
10
+ from fastapi.responses import JSONResponse
11
+ from slowapi import Limiter
12
+ from slowapi.util import get_remote_address
13
+
14
+ from backend.api.deps import get_datahub
15
+ from src.data.data_providers import DataHub
16
+
17
+ log = logging.getLogger(__name__)
18
+ limiter = Limiter(key_func=get_remote_address)
19
+ router = APIRouter()
20
+
21
+
22
+ class ChatRequest(BaseModel):
23
+ message: str = Field(..., min_length=1, max_length=4000)
24
+ session_id: str = "default"
25
+
26
+
27
+ class FeedbackRequest(BaseModel):
28
+ session_id: str
29
+ message_id: str
30
+ rating: str = Field(..., pattern=r"^(up|down|flag)$")
31
+ comment: str = Field("", max_length=2000)
32
+
33
+
34
+ # Thread-safe lazy chatbot init
35
+ _chatbot = None
36
+ _chatbot_lock = threading.Lock()
37
+
38
+
39
+ def _get_chatbot(hub: DataHub):
40
+ global _chatbot
41
+ if _chatbot is not None:
42
+ return _chatbot
43
+ with _chatbot_lock:
44
+ if _chatbot is None:
45
+ from src.chatbot.vineyard_chatbot import VineyardChatbot
46
+ _chatbot = VineyardChatbot(hub=hub)
47
+ return _chatbot
48
+
49
+
50
+ @router.post("/message")
51
+ @limiter.limit("10/minute")
52
+ async def chat_message(request: Request, req: ChatRequest, hub: DataHub = Depends(get_datahub)):
53
+ bot = _get_chatbot(hub)
54
+ response = bot.chat(req.message)
55
+ return {
56
+ "message": response.message,
57
+ "confidence": getattr(response, "confidence", None),
58
+ "sources": getattr(response, "sources", []),
59
+ "caveats": getattr(response, "caveats", []),
60
+ "rule_violations": getattr(response, "rule_violations", []),
61
+ "response_mode": getattr(response, "response_mode", "info"),
62
+ }
63
+
64
+
65
+ @router.post("/feedback")
66
+ @limiter.limit("60/minute")
67
+ async def chat_feedback(request: Request, req: FeedbackRequest):
68
+ try:
69
+ from src.chatbot.feedback import log_feedback
70
+ log_feedback(
71
+ session_id=req.session_id,
72
+ message_id=req.message_id,
73
+ rating=req.rating,
74
+ comment=req.comment,
75
+ )
76
+ return {"status": "ok"}
77
+ except Exception as exc:
78
+ log.error("Feedback logging failed: %s", exc)
79
+ return JSONResponse(status_code=500, content={"error": "Feedback logging failed"})
backend/api/routes/control.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Control system endpoints — reads state from Redis."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+
7
+ from fastapi import APIRouter
8
+ from fastapi.responses import JSONResponse
9
+
10
+ from backend.api.deps import get_datahub, get_redis_client
11
+
12
+ log = logging.getLogger(__name__)
13
+ router = APIRouter()
14
+
15
+
16
+ @router.get("/status")
17
+ async def control_status():
18
+ """Last ControlLoop tick result (stored in Redis by the worker)."""
19
+ redis = get_redis_client()
20
+ if redis:
21
+ data = redis.get_json("control:last_tick")
22
+ if data:
23
+ return data
24
+ return JSONResponse(
25
+ status_code=503,
26
+ content={"error": "No tick result available (worker may not have run yet)"},
27
+ )
28
+
29
+
30
+ @router.get("/plan")
31
+ async def control_plan():
32
+ """Current day-ahead plan."""
33
+ redis = get_redis_client()
34
+ if redis:
35
+ data = redis.get_json("control:plan")
36
+ if data:
37
+ return data
38
+ # Fallback: try loading from file
39
+ try:
40
+ import json
41
+ from config.settings import DAILY_PLAN_PATH
42
+ with open(DAILY_PLAN_PATH) as f:
43
+ return json.load(f)
44
+ except FileNotFoundError:
45
+ return JSONResponse(status_code=404, content={"error": "No plan available"})
46
+ except Exception as exc:
47
+ log.error("Failed to load plan from file: %s", exc)
48
+ return JSONResponse(status_code=500, content={"error": "Plan loading failed"})
49
+
50
+
51
+ @router.get("/budget")
52
+ async def control_budget():
53
+ """Current energy budget state."""
54
+ redis = get_redis_client()
55
+ if redis:
56
+ data = redis.get_json("control:budget")
57
+ if data:
58
+ return data
59
+ return JSONResponse(
60
+ status_code=503,
61
+ content={"error": "No budget data available"},
62
+ )
63
+
64
+
65
+ @router.get("/trackers")
66
+ async def control_trackers():
67
+ """Live tracker angles from ThingsBoard."""
68
+ hub = get_datahub()
69
+ try:
70
+ snapshot = hub.vine_sensors.get_snapshot(light=True)
71
+ return {"trackers": snapshot.get("trackers", {}), "source": "ThingsBoard"}
72
+ except Exception as exc:
73
+ log.error("Tracker fetch failed: %s", exc)
74
+ return JSONResponse(
75
+ status_code=502,
76
+ content={"error": "Tracker fetch failed"},
77
+ )
backend/api/routes/energy.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Energy endpoints — wraps EnergyService."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from fastapi import APIRouter, Depends, HTTPException, Query
8
+
9
+ from backend.api.deps import get_datahub
10
+ from src.data.data_providers import DataHub
11
+
12
+ router = APIRouter()
13
+
14
+ _DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$")
15
+
16
+
17
+ def _validate_date(value: str) -> str:
18
+ if not _DATE_RE.match(value):
19
+ raise HTTPException(status_code=400, detail=f"Invalid date format: {value!r}. Expected YYYY-MM-DD")
20
+ return value
21
+
22
+
23
+ @router.get("/current")
24
+ async def energy_current(hub: DataHub = Depends(get_datahub)):
25
+ return hub.energy.get_current()
26
+
27
+
28
+ @router.get("/daily/{target_date}")
29
+ async def energy_daily(target_date: str, hub: DataHub = Depends(get_datahub)):
30
+ _validate_date(target_date)
31
+ return hub.energy.get_daily_production(target_date=target_date)
32
+
33
+
34
+ @router.get("/history")
35
+ async def energy_history(
36
+ hours: int = Query(24, ge=1, le=8760, description="Hours of history (1–8760)"),
37
+ hub: DataHub = Depends(get_datahub),
38
+ ):
39
+ return hub.energy.get_history(hours_back=hours)
40
+
41
+
42
+ @router.get("/predict/{target_date}")
43
+ async def energy_predict(target_date: str, hub: DataHub = Depends(get_datahub)):
44
+ _validate_date(target_date)
45
+ return hub.energy.predict(target_date=target_date)
backend/api/routes/health.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Health check endpoint."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import os
7
+
8
+ from fastapi import APIRouter
9
+
10
+ from backend.api.deps import get_redis_client
11
+
12
+ router = APIRouter()
13
+
14
+
15
+ async def _check_thingsboard() -> bool:
16
+ """Check ThingsBoard connectivity without blocking the event loop."""
17
+ tb_host = os.environ.get("THINGSBOARD_HOST", "")
18
+ if not tb_host:
19
+ return False
20
+ try:
21
+ import urllib.request
22
+ url = f"{tb_host.rstrip('/')}/api/noauth/health"
23
+ # Run sync urllib in thread pool to avoid blocking event loop
24
+ loop = asyncio.get_event_loop()
25
+ resp = await loop.run_in_executor(
26
+ None, lambda: urllib.request.urlopen(url, timeout=3)
27
+ )
28
+ return resp.status == 200
29
+ except Exception:
30
+ return False
31
+
32
+
33
+ @router.get("/health")
34
+ async def health():
35
+ redis = get_redis_client()
36
+ redis_ok = redis.ping() if redis else False
37
+
38
+ from backend.api.main import get_uptime
39
+
40
+ tb_ok = await _check_thingsboard()
41
+
42
+ return {
43
+ "status": "ok",
44
+ "uptime_seconds": round(get_uptime(), 1),
45
+ "redis_connected": redis_ok,
46
+ "thingsboard_reachable": tb_ok,
47
+ "ims_configured": bool(os.environ.get("IMS_API_TOKEN")),
48
+ "gemini_configured": bool(os.environ.get("GOOGLE_API_KEY")),
49
+ }
backend/api/routes/login.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Login endpoint — issues JWT tokens."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ import secrets
8
+
9
+ from pydantic import BaseModel, Field
10
+ from fastapi import APIRouter, HTTPException, Request
11
+ from slowapi import Limiter
12
+ from slowapi.util import get_remote_address
13
+
14
+ from backend.api.auth import create_token
15
+
16
+ log = logging.getLogger(__name__)
17
+
18
+ limiter = Limiter(key_func=get_remote_address)
19
+ router = APIRouter()
20
+
21
+ # Simple user store — replace with a real DB later.
22
+ # For now, a single admin user configured via environment.
23
+ _ADMIN_USER = os.environ.get("ADMIN_USERNAME", "admin")
24
+ _ADMIN_PASS = os.environ.get("ADMIN_PASSWORD", "")
25
+
26
+ if not _ADMIN_PASS:
27
+ log.warning("ADMIN_PASSWORD not set — login endpoint will return 503")
28
+
29
+
30
+ class LoginRequest(BaseModel):
31
+ username: str = Field(..., min_length=1, max_length=100)
32
+ password: str = Field(..., min_length=1, max_length=200)
33
+
34
+
35
+ class LoginResponse(BaseModel):
36
+ access_token: str
37
+ token_type: str = "bearer"
38
+
39
+
40
+ @router.post("/login", response_model=LoginResponse)
41
+ @limiter.limit("5/minute")
42
+ async def login(request: Request, req: LoginRequest):
43
+ """Authenticate and return a JWT token."""
44
+ if not _ADMIN_PASS:
45
+ raise HTTPException(status_code=503, detail="Auth not configured")
46
+
47
+ # Constant-time comparison to prevent timing attacks
48
+ user_ok = secrets.compare_digest(req.username, _ADMIN_USER)
49
+ pass_ok = secrets.compare_digest(req.password, _ADMIN_PASS)
50
+
51
+ if not user_ok or not pass_ok:
52
+ raise HTTPException(status_code=401, detail="Invalid credentials")
53
+
54
+ token = create_token(username=req.username, role="admin")
55
+ if not token:
56
+ raise HTTPException(status_code=503, detail="JWT not configured")
57
+
58
+ return LoginResponse(access_token=token)
backend/api/routes/photosynthesis.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Photosynthesis endpoints — wraps PhotosynthesisService."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from enum import Enum
6
+
7
+ from fastapi import APIRouter, Depends, Query
8
+
9
+ from backend.api.deps import get_datahub
10
+ from src.data.data_providers import DataHub
11
+
12
+ router = APIRouter()
13
+
14
+
15
+ class PSModel(str, Enum):
16
+ fvcb = "fvcb"
17
+ ml = "ml"
18
+
19
+
20
+ @router.get("/current")
21
+ async def ps_current(
22
+ model: PSModel = Query(PSModel.fvcb, description="Model: fvcb or ml"),
23
+ hub: DataHub = Depends(get_datahub),
24
+ ):
25
+ return hub.photosynthesis.get_current(model=model.value)
26
+
27
+
28
+ @router.get("/forecast")
29
+ async def ps_forecast(hub: DataHub = Depends(get_datahub)):
30
+ return hub.photosynthesis.forecast_day_ahead()
backend/api/routes/sensors.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Vine sensor endpoints — wraps VineSensorService."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from enum import Enum
6
+ from typing import Optional
7
+
8
+ from fastapi import APIRouter, Depends, Query
9
+
10
+ from backend.api.deps import get_datahub
11
+ from src.data.data_providers import DataHub
12
+
13
+ router = APIRouter()
14
+
15
+
16
+ class DeviceType(str, Enum):
17
+ crop = "crop"
18
+ air = "air"
19
+ soil = "soil"
20
+
21
+
22
+ class AreaType(str, Enum):
23
+ treatment = "treatment"
24
+ reference = "reference"
25
+ ambient = "ambient"
26
+
27
+
28
+ @router.get("/snapshot")
29
+ async def sensors_snapshot(
30
+ light: bool = False,
31
+ hub: DataHub = Depends(get_datahub),
32
+ ):
33
+ return hub.vine_sensors.get_snapshot(light=light)
34
+
35
+
36
+ @router.get("/history")
37
+ async def sensors_history(
38
+ type: DeviceType = Query(DeviceType.crop, description="Device type"),
39
+ area: Optional[AreaType] = Query(None, description="Area filter"),
40
+ hours: int = Query(24, ge=1, le=8760, description="Hours of history (1–8760)"),
41
+ hub: DataHub = Depends(get_datahub),
42
+ ):
43
+ return hub.vine_sensors.get_history(
44
+ device_type=type.value,
45
+ area=area.value if area else None,
46
+ hours_back=hours,
47
+ )
backend/api/routes/weather.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Weather endpoints — wraps WeatherService."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from datetime import date, timedelta
7
+
8
+ from fastapi import APIRouter, Depends, HTTPException, Query
9
+ from fastapi.responses import JSONResponse
10
+
11
+ from backend.api.deps import get_datahub
12
+ from src.data.data_providers import DataHub
13
+
14
+ router = APIRouter()
15
+
16
+ _DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$")
17
+
18
+
19
+ def _validate_date(value: str) -> str:
20
+ if not _DATE_RE.match(value):
21
+ raise HTTPException(status_code=400, detail=f"Invalid date format: {value!r}. Expected YYYY-MM-DD")
22
+ return value
23
+
24
+
25
+ @router.get("/current")
26
+ async def weather_current(hub: DataHub = Depends(get_datahub)):
27
+ return hub.weather.get_current()
28
+
29
+
30
+ @router.get("/history")
31
+ async def weather_history(
32
+ start_date: str = Query(None, description="Start date (YYYY-MM-DD). Default: 7 days ago"),
33
+ end_date: str = Query(None, description="End date (YYYY-MM-DD). Default: today"),
34
+ hub: DataHub = Depends(get_datahub),
35
+ ):
36
+ end = end_date or str(date.today())
37
+ start = start_date or str(date.today() - timedelta(days=7))
38
+ _validate_date(start)
39
+ _validate_date(end)
40
+ return hub.weather.get_history(start_date=start, end_date=end)
41
+
42
+
43
+ @router.get("/forecast")
44
+ async def weather_forecast(hub: DataHub = Depends(get_datahub)):
45
+ try:
46
+ return hub.weather.get_forecast()
47
+ except AttributeError:
48
+ return JSONResponse(status_code=501, content={"error": "Forecast not implemented yet"})
backend/requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # Backend-specific dependencies (on top of root requirements.txt)
2
+ fastapi>=0.115.0
3
+ uvicorn[standard]>=0.34.0
4
+ pydantic>=2.0
5
+ slowapi>=0.2.0
6
+ PyJWT>=2.8.0
7
+ sentry-sdk[fastapi]>=2.0 # optional: set SENTRY_DSN to enable
backend/workers/__init__.py ADDED
File without changes
backend/workers/control_tick.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ControlLoop single-tick worker.
3
+
4
+ Entry point for GitHub Actions cron (every 15 min).
5
+ Usage:
6
+ python -m backend.workers.control_tick
7
+ python -m backend.workers.control_tick --dry-run
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import json
14
+ import logging
15
+ import sys
16
+ from datetime import datetime, timezone
17
+ from pathlib import Path
18
+
19
+ # Ensure project root is on sys.path
20
+ PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
21
+ if str(PROJECT_ROOT) not in sys.path:
22
+ sys.path.insert(0, str(PROJECT_ROOT))
23
+
24
+ # Load .env if present (local dev)
25
+ try:
26
+ from dotenv import load_dotenv
27
+ load_dotenv(PROJECT_ROOT / ".env")
28
+ except ImportError:
29
+ pass
30
+
31
+ logging.basicConfig(
32
+ level=logging.INFO,
33
+ format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
34
+ )
35
+ log = logging.getLogger("control_tick")
36
+
37
+
38
+ def main():
39
+ parser = argparse.ArgumentParser(description="Run one ControlLoop tick")
40
+ parser.add_argument("--dry-run", action="store_true", help="Compute decisions without dispatching")
41
+ args = parser.parse_args()
42
+
43
+ from src.control_loop import ControlLoop
44
+ from src.data.redis_cache import get_redis
45
+
46
+ log.info("Starting control tick (dry_run=%s)", args.dry_run)
47
+
48
+ loop = ControlLoop(dry_run=args.dry_run)
49
+ result = loop.tick()
50
+
51
+ # Serialise result
52
+ result_dict = result.__dict__ if hasattr(result, "__dict__") else {"raw": str(result)}
53
+ result_dict["_timestamp"] = datetime.now(timezone.utc).isoformat()
54
+ result_dict["_dry_run"] = args.dry_run
55
+
56
+ # Store in Redis for the API to read
57
+ redis = get_redis()
58
+ if redis:
59
+ # Convert to JSON-safe dict
60
+ safe = json.loads(json.dumps(result_dict, default=str))
61
+ redis.set_json("control:last_tick", safe, ttl=1200) # 20 min TTL
62
+ log.info("Tick result saved to Redis")
63
+ else:
64
+ log.warning("Redis not available — tick result not persisted")
65
+
66
+ log.info("Tick complete: %s", json.dumps(result_dict, default=str, indent=2)[:500])
67
+
68
+ # Budget alert: warn if >80% spent before 14:00 IST
69
+ _check_budget_alert(result_dict)
70
+
71
+
72
+ def _check_budget_alert(tick: dict) -> None:
73
+ """Log a warning (visible in GitHub Actions) if budget is nearly exhausted."""
74
+ import os
75
+ try:
76
+ from datetime import datetime, timezone, timedelta
77
+ now_utc = datetime.now(timezone.utc)
78
+ now_israel = now_utc + timedelta(hours=2) # approximate IST
79
+
80
+ remaining = tick.get("budget_remaining_kwh", None)
81
+ if remaining is None or remaining == 0:
82
+ return # no budget data or dormant season
83
+
84
+ # Only alert before 14:00 IST (still daylight hours left)
85
+ if now_israel.hour >= 14:
86
+ return
87
+
88
+ # Get today's total budget from Redis
89
+ from src.data.redis_cache import get_redis
90
+ redis = get_redis()
91
+ if not redis:
92
+ return
93
+ budget_data = redis.get_json("control:budget")
94
+ if not budget_data or "plan" not in budget_data:
95
+ return
96
+
97
+ plan = budget_data["plan"]
98
+ total = sum(plan.get("slot_budgets", {}).values()) + plan.get("daily_margin_remaining_kWh", 0)
99
+ spent = plan.get("cumulative_spent", 0)
100
+
101
+ if total > 0 and spent / (total + spent) > 0.8:
102
+ log.warning(
103
+ "BUDGET ALERT: %.1f%% of daily budget spent before %02d:00 IST "
104
+ "(spent=%.3f kWh, remaining=%.3f kWh)",
105
+ spent / (total + spent) * 100,
106
+ now_israel.hour,
107
+ spent,
108
+ remaining,
109
+ )
110
+ # Future: send webhook/email here
111
+ webhook_url = os.environ.get("BUDGET_ALERT_WEBHOOK")
112
+ if webhook_url:
113
+ import requests
114
+ requests.post(webhook_url, json={
115
+ "text": f"SolarWine Budget Alert: {spent/(total+spent)*100:.0f}% spent before {now_israel.hour}:00 IST",
116
+ "spent_kwh": round(spent, 3),
117
+ "remaining_kwh": round(remaining, 3),
118
+ }, timeout=5)
119
+ except Exception as exc:
120
+ log.debug("Budget alert check failed: %s", exc)
121
+
122
+
123
+ if __name__ == "__main__":
124
+ main()
backend/workers/daily_planner.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Day-ahead planner worker.
3
+
4
+ Entry point for GitHub Actions cron (daily 05:00 IST = 02:00 UTC).
5
+ Usage:
6
+ python -m backend.workers.daily_planner
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import logging
13
+ import sys
14
+ from datetime import date, datetime, timezone
15
+ from pathlib import Path
16
+
17
+ # Ensure project root is on sys.path
18
+ PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
19
+ if str(PROJECT_ROOT) not in sys.path:
20
+ sys.path.insert(0, str(PROJECT_ROOT))
21
+
22
+ # Load .env if present (local dev)
23
+ try:
24
+ from dotenv import load_dotenv
25
+ load_dotenv(PROJECT_ROOT / ".env")
26
+ except ImportError:
27
+ pass
28
+
29
+ logging.basicConfig(
30
+ level=logging.INFO,
31
+ format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
32
+ )
33
+ log = logging.getLogger("daily_planner")
34
+
35
+
36
+ def main():
37
+ from src.day_ahead_planner import DayAheadPlanner
38
+ from src.data.redis_cache import get_redis
39
+ from config.settings import DAILY_PLAN_PATH
40
+
41
+ target = date.today()
42
+ log.info("Computing day-ahead plan for %s", target)
43
+
44
+ planner = DayAheadPlanner()
45
+ plan = planner.plan(target_date=target)
46
+
47
+ plan_dict = plan.to_dict() if hasattr(plan, "to_dict") else {"raw": str(plan)}
48
+ plan_dict["_computed_at"] = datetime.now(timezone.utc).isoformat()
49
+
50
+ # Save to file (backup)
51
+ try:
52
+ Path(DAILY_PLAN_PATH).parent.mkdir(parents=True, exist_ok=True)
53
+ with open(DAILY_PLAN_PATH, "w") as f:
54
+ json.dump(plan_dict, f, default=str, indent=2)
55
+ log.info("Plan saved to %s", DAILY_PLAN_PATH)
56
+ except Exception as exc:
57
+ log.error("Failed to save plan file: %s", exc)
58
+
59
+ # Save to Redis
60
+ redis = get_redis()
61
+ if redis:
62
+ safe = json.loads(json.dumps(plan_dict, default=str))
63
+ redis.set_json("control:plan", safe, ttl=86400) # 24h TTL
64
+ log.info("Plan saved to Redis")
65
+ else:
66
+ log.warning("Redis not available — plan not shared")
67
+
68
+ log.info("Plan complete: %d slots", len(plan_dict.get("slots", [])))
69
+
70
+
71
+ if __name__ == "__main__":
72
+ main()
config/settings.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configuration: paths, IMS station/channel config, model params
2
+
3
+ from pathlib import Path
4
+
5
+ PROJECT_ROOT = Path(__file__).resolve().parent.parent
6
+ DATA_DIR = PROJECT_ROOT / "Data"
7
+ IMS_CACHE_DIR = DATA_DIR / "ims"
8
+ PROCESSED_DIR = DATA_DIR / "processed"
9
+ OUTPUTS_DIR = PROJECT_ROOT / "outputs"
10
+
11
+ # On-site sensor data (Stage 1)
12
+ SEYMOUR_DIR = DATA_DIR / "Seymour"
13
+ SENSORS_WIDE_PATH = SEYMOUR_DIR / "sensors_wide.csv"
14
+ SENSORS_WIDE_SAMPLE_PATH = SEYMOUR_DIR / "sensors_wide_sample.csv"
15
+ SENSORS_WIDE_METADATA_PATH = SEYMOUR_DIR / "sensors_wide_metadata.csv"
16
+
17
+ # IMS API (station 43 - Sde Boker)
18
+ IMS_STATION_ID = 43
19
+ IMS_BASE_URL = "https://api.ims.gov.il/v1/envista/stations"
20
+
21
+ # Station 43 channel IDs -> output column names (from --list-channels)
22
+ IMS_CHANNEL_MAP = {
23
+ 6: "air_temperature_c", # TD
24
+ 8: "tdmax_c", # TDmax
25
+ 9: "tdmin_c", # TDmin
26
+ 10: "ghi_w_m2", # Grad (GHI)
27
+ 7: "rh_percent", # RH
28
+ 20: "rain_mm", # Rain
29
+ 3: "wind_speed_ms", # WS
30
+ # Station 43 has no BP; WD optional: 4
31
+ }
32
+
33
+ # Preprocessor
34
+ TRAIN_RATIO = 0.75
35
+
36
+ # Growing season: vine is dormant Oct–April (no photosynthesis). Keep May–September only.
37
+ GROWING_SEASON_MONTHS = (5, 6, 7, 8, 9) # May through September
38
+
39
+ # Site location (Sde Boker, Israel)
40
+ SITE_LATITUDE = 30.87
41
+ SITE_LONGITUDE = 34.79
42
+ SITE_ALTITUDE = 475.0 # meters
43
+
44
+ # Agrivoltaic panel geometry
45
+ PANEL_WIDTH = 1.13 # m (E-W dimension)
46
+ PANEL_HEIGHT = 2.05 # m above ground
47
+ ROW_SPACING = 3.0 # m between vine row centers
48
+ CANOPY_HEIGHT = 1.2 # m (VSP trellis)
49
+ CANOPY_WIDTH = 0.6 # m
50
+ ROW_AZIMUTH = 315.0 # degrees CW from north (NW–SE row orientation)
51
+
52
+ # === TRACKER CONSTRAINTS ===
53
+ TRACKER_MAX_ANGLE = 60.0 # degrees — mechanical limit of single-axis tracker
54
+ TRACKER_GCR = 0.377 # ground coverage ratio (panel_width / row_spacing)
55
+
56
+ # === TRACKER ID MAPPING ===
57
+ # Canonical mapping between integer IDs (DB/fleet) and string names (ThingsBoard)
58
+ TRACKER_ID_MAP = {
59
+ 501: "Tracker501",
60
+ 502: "Tracker502",
61
+ 503: "Tracker503",
62
+ 509: "Tracker509",
63
+ }
64
+
65
+ # ---------------------------------------------------------------------------
66
+ # SolarWine 2.0 — Control System Parameters
67
+ # ---------------------------------------------------------------------------
68
+
69
+ # === PV SYSTEM ===
70
+ SYSTEM_CAPACITY_KW = 48.0 # DC nameplate capacity (from ThingsBoard Digital Twin)
71
+ STC_IRRADIANCE_W_M2 = 1000.0 # Standard Test Conditions irradiance for normalisation
72
+
73
+ # === ENERGY BUDGET ===
74
+ # Hard ceiling: fraction of annual PV generation the vines can "spend" on shading.
75
+ MAX_ENERGY_REDUCTION_PCT = 5.0 # % of annual generation (user's hard ceiling)
76
+ ANNUAL_RESERVE_PCT = 15.0 # emergency reserve — not allocated to any month
77
+ WEEKLY_RESERVE_PCT = 20.0 # within-week flexibility buffer
78
+ DAILY_MARGIN_PCT = 20.0 # real-time response pool within the day
79
+
80
+ # Monthly budget weights — must sum to 1.0 across growing season.
81
+ # May budget is very low (extreme heat emergency only); the 3D model will
82
+ # naturally produce no effective dose in most May slots because fruit-set
83
+ # geometry and low stress do not warrant intervention.
84
+ MONTHLY_BUDGET_WEIGHTS = {
85
+ 5: 0.02, # May — near-zero; extreme emergency only (fruit-set geometry protects naturally)
86
+ 6: 0.05, # June — rare; only extreme heat spikes
87
+ 7: 0.45, # July — peak heat; primary shading window
88
+ 8: 0.40, # August — sustained heat; fruit ripening / sunburn risk
89
+ 9: 0.08, # Sept — occasional late heat waves
90
+ }
91
+
92
+ # === NO-SHADE WINDOWS (hard constraints — shading PROHIBITED) ===
93
+ # These are enforced by the InterventionGate AND the chatbot guardrails.
94
+ NO_SHADE_BEFORE_HOUR = 10 # local solar time — morning light is sacred for carbon fixation
95
+ NO_SHADE_MONTHS = [5] # May — full spring exposure for flowering / fruit set
96
+ NO_SHADE_GHI_BELOW = 300 # W/m² — overcast, already diffuse; no stress to relieve
97
+ NO_SHADE_TLEAF_BELOW = 28.0 # °C — below RuBP→Rubisco transition zone; vine wants light
98
+
99
+ # === SHADE-ELIGIBLE CONDITIONS (ALL must be true to allow intervention) ===
100
+ SHADE_ELIGIBLE_TLEAF_ABOVE = 30.0 # °C — Semillon Rubisco transition (heat bottleneck)
101
+ SHADE_ELIGIBLE_CWSI_ABOVE = 0.4 # moderate water stress confirmed by sensors
102
+ SHADE_ELIGIBLE_GHI_ABOVE = 400 # W/m² — significant direct radiation load (night/deep-overcast guard)
103
+ SHADE_ELIGIBLE_HOURS = (10, 16) # local solar time window (10:00–16:00)
104
+
105
+ # Minimum GHI below which the sun is too weak to cause stress (night, dense cloud).
106
+ # No offset can help; skip shadow computation entirely.
107
+ MIN_MEANINGFUL_GHI = 100 # W/m²
108
+
109
+ # === FRUITING ZONE ===
110
+ FRUITING_ZONE_INDEX = 1 # mid-canopy zone in the 3-zone ShadowModel (0=basal, 1=fruiting, 2=apical)
111
+ FRUITING_ZONE_HEIGHT_M = 0.6 # center height of grape cluster zone (m)
112
+ BERRY_SUNBURN_TEMP_C = 35.0 # berry surface temperature damage threshold (°C)
113
+ FRUITING_ZONE_TARGET_PAR = 400 # µmol/m²/s — quality threshold; above this → sunburn risk
114
+
115
+ # === TRADEOFF ENGINE ===
116
+ # Candidate shading offsets tested in order (minimum-dose search: stop at first effective offset).
117
+ CANDIDATE_OFFSETS = [0, 3, 5, 8, 10, 15, 20] # degrees off astronomical position
118
+ SIMULATION_TIMEOUT_SEC = 5 # max seconds for one offset simulation
119
+
120
+ # === SAFETY RAILS ===
121
+ DIVERGENCE_THRESHOLD = 0.12 # 12% — if |FvCB_A - ML_A| / max > threshold → fallback to FvCB
122
+
123
+ # === SEMILLON FvCB — Rubisco transition ===
124
+ SEMILLON_TRANSITION_TEMP_C = 30.0 # °C — below: RuBP-limited (light bottleneck); above: Rubisco-limited (heat bottleneck)
125
+
126
+ # === WEATHER PROTECTION / OPERATIONAL MODES ===
127
+ WIND_STOW_SPEED_MS = 15.0 # m/s — panels stow flat (0°) above this wind speed
128
+ HEAT_SHIELD_TEMP_C = 38.0 # °C — emergency heat shield: maximum shade regardless of budget
129
+ HEAT_SHIELD_CWSI = 0.6 # CWSI threshold that activates heat shield
130
+
131
+ # === MECHANICAL HARVESTING ===
132
+ HARVEST_PARK_CLEARANCE_CM = 250 # cm — minimum clearance for harvesting machine
133
+ HARVEST_LATERAL_WIDTH_CM = 18 # cm — lateral harvester arm width
134
+ HARVESTER_RPM_RANGE = (430, 460) # harvester operating RPM range
135
+
136
+ # === HYSTERESIS (command arbiter) ===
137
+ HYSTERESIS_WINDOW_MIN = 15 # minutes — minimum time between consecutive tilt changes
138
+ ANGLE_TOLERANCE_DEG = 2.0 # degrees — changes smaller than this are suppressed
139
+
140
+ # === PLAN DIVERGENCE RE-PLANNING ===
141
+ PLAN_DIVERGENCE_THRESHOLD_KWH = 0.5 # cumulative |planned − actual| energy that triggers re-plan
142
+ PLAN_DIVERGENCE_THRESHOLD_SLOTS = 4 # consecutive divergent slots that triggers re-plan
143
+ PLAN_REPLAN_COOLDOWN_SLOTS = 8 # minimum slots between re-plans (~2 hours)
144
+
145
+ # === ROI / LAND EQUIVALENT RATIO ===
146
+ TARGET_LER = 1.5 # Land Equivalent Ratio target (energy + crop combined)
147
+
148
+ # ---------------------------------------------------------------------------
149
+ # Agronomic Value Weighting
150
+ # ---------------------------------------------------------------------------
151
+
152
+ # Spatial zone weights for crop value calculation.
153
+ # The 3-zone ShadowModel: zone 0 = basal/trunk (~0.2m), zone 1 = fruiting (~0.6m), zone 2 = apical (~1.0m).
154
+ # During veraison, zone 2 (upper canopy) has the highest marginal value for sugar loading.
155
+ ZONE_CROP_WEIGHTS = {
156
+ "pre_veraison": [0.25, 0.35, 0.40], # [zone0, zone1, zone2]
157
+ "veraison": [0.10, 0.30, 0.60], # apical leaves dominate sugar loading
158
+ "post_harvest": [0.15, 0.15, 0.70], # reserve building; top canopy matters most
159
+ }
160
+
161
+ # Temporal (phenological stage) crop value multipliers.
162
+ # Applied on top of zone weights; reflects how much each unit of photosynthesis
163
+ # contributes to final economic yield at different growth stages.
164
+ STAGE_CROP_MULTIPLIER = {
165
+ "pre_flowering": 1.2, # setting yield capacity (bunch number, berry set)
166
+ "fruit_set": 1.0, # baseline — rapid cell division
167
+ "veraison": 1.5, # sugar loading; highest crop value per unit carbon
168
+ "post_harvest": 0.5, # reserve building only; energy production prioritized
169
+ }
170
+
171
+ # Growing Degree Day thresholds for Semillon at Sde Boker (base temperature 10°C).
172
+ PHENOLOGY_GDD_THRESHOLDS = {
173
+ "budburst": 0, # GDD accumulation starts ~March
174
+ "flowering": 350, # ~May
175
+ "fruit_set": 500, # ~early June
176
+ "veraison": 1200, # ~mid July
177
+ "harvest": 1800, # ~late August / early September
178
+ }
179
+
180
+ # ---------------------------------------------------------------------------
181
+ # Day-Ahead DP Planner
182
+ # ---------------------------------------------------------------------------
183
+
184
+ DP_SLOTS_PER_DAY = 96 # 15-min intervals × 24 h
185
+ DP_SLOT_DURATION_MIN = 15 # minutes per slot
186
+ DP_MOVEMENT_COST = 0.5 # penalty per degree of tilt change (kWh-equivalent)
187
+ # biases optimizer toward smooth trajectories
188
+
189
+ # Flat energy price (ILS/kWh) used when real-time tariff is unavailable.
190
+ # Replace with time-of-use tariff schedule for production.
191
+ DP_FLAT_ENERGY_PRICE_ILS_KWH = 0.35
192
+
193
+ # Base crop value (ILS / µmol CO₂ m⁻² s⁻¹ per 15-min slot) used in the
194
+ # DP utility function U_t(θ) = Price_energy · E_t(θ) + Price_crop · A_t(θ).
195
+ # Calibrate from vineyard revenue per kg grape × expected yield per A unit.
196
+ DP_BASE_CROP_VALUE = 0.10
197
+
198
+ # ---------------------------------------------------------------------------
199
+ # Simulation Log Storage
200
+ # ---------------------------------------------------------------------------
201
+
202
+ SIMULATION_LOG_DIR = DATA_DIR / "simulation_logs"
203
+ SIMULATION_LOG_PATH = SIMULATION_LOG_DIR / "control_loop.parquet"
204
+ DAILY_PLAN_PATH = DATA_DIR / "daily_plan.json"
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Photosynthesis Prediction Model - dependencies
2
+ # Install: pip install -r requirements.txt
3
+
4
+ pandas==2.3.3
5
+ numpy==2.4.2
6
+ scikit-learn==1.8.0
7
+ matplotlib==3.10.8
8
+ seaborn==0.13.2
9
+ requests==2.32.5
10
+ python-dotenv==1.2.1
11
+ streamlit==1.54.0
12
+ plotly==6.5.2
13
+ xgboost>=2.0
14
+ pvlib>=0.10.0
15
+ astral>=3.2
16
+ chronos-forecasting>=2.0
17
+ torch>=2.0
18
+ google-genai>=1.0
src/__init__.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SolarWine src package — re-exports for backward compatibility
2
+ # Modules live in subpackages: data, models, forecasting, shading, advisor, chatbot, genai
3
+
4
+ import sys
5
+
6
+ # Map old flat names to new subpackage locations
7
+ _REDIRECTS = {
8
+ # data
9
+ "ims_client": "src.data.ims_client",
10
+ "sensor_data_loader": "src.data.sensor_data_loader",
11
+ "data_schema": "src.data.data_schema",
12
+ "thingsboard_client": "src.data.thingsboard_client",
13
+ "data_providers": "src.data.data_providers",
14
+ # models
15
+ "farquhar_model": "src.models.farquhar_model",
16
+ "canopy_photosynthesis": "src.models.canopy_photosynthesis",
17
+ "phenology": "src.models.phenology",
18
+ # forecasting
19
+ "predictor": "src.forecasting.predictor",
20
+ "ts_predictor": "src.forecasting.ts_predictor",
21
+ "chronos_forecaster": "src.forecasting.chronos_forecaster",
22
+ "preprocessor": "src.forecasting.preprocessor",
23
+ "time_features": "src.forecasting.time_features",
24
+ # shading
25
+ "solar_geometry": "src.shading.solar_geometry",
26
+ "tracker_optimizer": "src.shading.tracker_optimizer",
27
+ "vine_3d_scene": "src.shading.vine_3d_scene",
28
+ "tradeoff_engine": "src.shading.tradeoff_engine",
29
+ # advisor
30
+ "day_ahead_advisor": "src.advisor.day_ahead_advisor",
31
+ "safety_rails": "src.advisor.safety_rails",
32
+ # chatbot
33
+ "vineyard_chatbot": "src.chatbot.vineyard_chatbot",
34
+ "routing_agent": "src.chatbot.routing_agent",
35
+ "llm_data_engineer": "src.chatbot.llm_data_engineer",
36
+ # genai
37
+ "genai_utils": "src.genai.utils",
38
+ }
39
+
40
+
41
+ def __getattr__(name: str):
42
+ if name in _REDIRECTS:
43
+ import importlib
44
+
45
+ mod = importlib.import_module(_REDIRECTS[name])
46
+ sys.modules[f"{__name__}.{name}"] = mod
47
+ return mod
48
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
49
+
50
+
51
+ def __dir__():
52
+ return sorted(_REDIRECTS.keys())
src/advisor/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Advisor: day-ahead, safety rails."""
src/advisor/day_ahead_advisor.py ADDED
@@ -0,0 +1,632 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ DayAheadAdvisor: Gemini-powered qualitative day-ahead stress advisory for
3
+ Semillon grapevine in the SolarWine agrivoltaic system.
4
+
5
+ Analyzes IMS weather forecast through vine biology rules to produce:
6
+ - Hourly stress profile (RuBP vs Rubisco limitation)
7
+ - Energy budget recommendations (time-block distribution)
8
+ - Model routing preferences (FvCB vs ML by time of day)
9
+ - Chronos forecast sanity check (optional)
10
+
11
+ Sits between raw forecast data and the future Phase 3.5 day-ahead planner.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ from dataclasses import dataclass, field, asdict
18
+ from typing import Optional
19
+
20
+ import numpy as np
21
+ import pandas as pd
22
+
23
+ from src.genai_utils import extract_json_object, get_genai_client, get_google_api_key
24
+
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # Data structures
28
+ # ---------------------------------------------------------------------------
29
+
30
+ @dataclass
31
+ class HourlyStressEntry:
32
+ hour: int
33
+ limiting_state: str # "rubp" | "rubisco" | "transition"
34
+ stress_severity: str # "none" | "low" | "moderate" | "high" | "extreme"
35
+ shading_recommended: bool
36
+
37
+
38
+ @dataclass
39
+ class StressProfile:
40
+ rubisco_limited_hours: int
41
+ peak_stress_hour: int
42
+ peak_stress_severity: str
43
+ hourly_detail: list[HourlyStressEntry]
44
+ summary: str
45
+
46
+
47
+ @dataclass
48
+ class BudgetRecommendation:
49
+ daily_budget_fraction: float # 0–1 of remaining weekly budget
50
+ time_block_pct: dict[str, float] # e.g. {"10-11": 5, "11-14": 60, ...}
51
+ rationale: str
52
+
53
+
54
+ @dataclass
55
+ class ModelRoutingPreference:
56
+ morning: str # "fvcb" or "ml"
57
+ midday: str
58
+ afternoon: str
59
+ rationale: str
60
+
61
+
62
+ @dataclass
63
+ class ChronosSanityCheck:
64
+ plausible: bool
65
+ flags: list[str]
66
+ overall_assessment: str
67
+
68
+
69
+ @dataclass
70
+ class AdvisorReport:
71
+ date: str
72
+ phenological_stage: str
73
+ stress_profile: StressProfile
74
+ budget_recommendation: BudgetRecommendation
75
+ model_routing: ModelRoutingPreference
76
+ chronos_sanity: Optional[ChronosSanityCheck]
77
+ confidence_notes: str
78
+ raw_llm_response: str = ""
79
+
80
+
81
+ # ---------------------------------------------------------------------------
82
+ # System prompt — encodes vine biology rules
83
+ # ---------------------------------------------------------------------------
84
+
85
+ SYSTEM_PROMPT = """\
86
+ You are an agrivoltaic advisor for a Semillon grapevine vineyard in the Negev \
87
+ desert (Sde Boker, Israel). You analyze day-ahead weather forecasts and produce \
88
+ structured stress assessments for the tracker control system.
89
+
90
+ CONTROL OBJECTIVE:
91
+ - Primary goal: maximise annual PV energy production.
92
+ - Secondary goal: protect vines from heat, water stress, and sunburn using a \
93
+ limited shading budget (see energy budget rule).
94
+ - When in doubt and there is no clear sign of dangerous stress, prefer keeping \
95
+ panels in their energy-maximising position.
96
+
97
+ BIOLOGICAL GUIDELINES (strong constraints; balance them with the energy objective):
98
+
99
+ 1. TEMPERATURE TRANSITION: Below 30°C, Semillon photosynthesis is RuBP-limited \
100
+ (light is the bottleneck — shading HURTS). Above 30°C, it becomes Rubisco-limited \
101
+ (heat is the bottleneck — shading MAY help). The transition is gradual (28–32°C).
102
+
103
+ 2. NO SHADE BEFORE 10:00: Morning light is critical for carbon fixation. Avoid \
104
+ recommending shading before 10:00 unless there is an extreme heat or safety event.
105
+
106
+ 3. MAY SENSITIVITY: May is the flowering/fruit-set period. Yield protection has \
107
+ priority: avoid shading in May under normal conditions because even small losses \
108
+ can reduce cluster number and berry set. Only recommend shade in May as a last \
109
+ resort in extreme heat to prevent serious damage (e.g. severe sunburn or lethal stress).
110
+
111
+ 4. CWSI THRESHOLD: Crop Water Stress Index > 0.4 indicates real water stress. \
112
+ Below 0.4, the vine is coping adequately.
113
+
114
+ 5. BERRY SUNBURN: Direct exposure at air temperature > 35°C risks berry sunburn, \
115
+ especially on the southwest-facing side of clusters in the afternoon.
116
+
117
+ 6. ENERGY BUDGET: Annual energy sacrifice ceiling is 5%. Suggested monthly caps: \
118
+ May=0%, Jun=15%, Jul=30%, Aug=30%, Sep=20%, Oct=5%. Treat these as soft caps: \
119
+ stay below them unless there is an exceptional agronomic reason.
120
+
121
+ 7. MODEL ROUTING: Use FvCB (Farquhar model) for standard conditions (T < 30°C, \
122
+ VPD < 2.5 kPa, adequate water). Use ML ensemble for stress conditions (T > 30°C, \
123
+ high VPD, water stress, or any non-linear regime).
124
+
125
+ 8. PHENOLOGICAL MULTIPLIER: Stress during veraison (berry ripening) is 1.5× more \
126
+ damaging than during vegetative growth. Protect veraison at higher cost.
127
+
128
+ SEVERITY SCALE (anchored to air temperature):
129
+ - none: T < 28°C
130
+ - low: 28-30°C
131
+ - moderate: 30-33°C
132
+ - high: 33-37°C
133
+ - extreme: T > 37°C
134
+
135
+ OUTPUT FORMAT — Return ONLY a JSON object (no markdown fences, no explanation) \
136
+ with this exact schema:
137
+
138
+ {
139
+ "stress_profile": {
140
+ "rubisco_limited_hours": <int>,
141
+ "peak_stress_hour": <int 0-23>,
142
+ "peak_stress_severity": "<none|low|moderate|high|extreme>",
143
+ "hourly_detail": [
144
+ {"hour": <int>, "limiting_state": "<rubp|rubisco|transition>", \
145
+ "stress_severity": "<severity>", "shading_recommended": <bool>}
146
+ ],
147
+ "summary": "<2-3 sentence natural language summary>"
148
+ },
149
+ "budget_recommendation": {
150
+ "daily_budget_fraction": <float 0-1>,
151
+ "time_block_pct": {"10-11": <float>, "11-14": <float>, "14-16": <float>, \
152
+ "16+": <float>},
153
+ "rationale": "<1-2 sentences>"
154
+ },
155
+ "model_routing": {
156
+ "morning": "<fvcb|ml>",
157
+ "midday": "<fvcb|ml>",
158
+ "afternoon": "<fvcb|ml>",
159
+ "rationale": "<1 sentence>"
160
+ },
161
+ "chronos_sanity": {
162
+ "plausible": <bool>,
163
+ "flags": ["<flag1>", ...],
164
+ "overall_assessment": "<1 sentence>"
165
+ },
166
+ "confidence_notes": "<any caveats about forecast quality or unusual conditions>"
167
+ }
168
+
169
+ Include hourly_detail entries only for hours 6-20 (daytime). \
170
+ If no Chronos forecast is provided, set chronos_sanity to null.
171
+ """
172
+
173
+
174
+ # ---------------------------------------------------------------------------
175
+ # Helper: robust JSON extraction from LLM response
176
+ # ---------------------------------------------------------------------------
177
+
178
+ def _extract_json(text: str) -> dict:
179
+ """Thin wrapper around the shared genai_utils implementation."""
180
+ return extract_json_object(text)
181
+
182
+
183
+ # ---------------------------------------------------------------------------
184
+ # Main class
185
+ # ---------------------------------------------------------------------------
186
+
187
+ class DayAheadAdvisor:
188
+ """
189
+ Gemini-powered day-ahead stress advisory for agrivoltaic tracker control.
190
+
191
+ Usage
192
+ -----
193
+ advisor = DayAheadAdvisor()
194
+ report = advisor.advise(
195
+ date="2025-07-15",
196
+ weather_forecast=df_ims,
197
+ phenological_stage="veraison",
198
+ remaining_weekly_budget_kWh=12.5,
199
+ remaining_monthly_budget_kWh=45.0,
200
+ )
201
+ """
202
+
203
+ def __init__(
204
+ self,
205
+ model_name: str = "gemini-2.5-flash",
206
+ api_key: Optional[str] = None,
207
+ verbose: bool = True,
208
+ ):
209
+ self.model_name = model_name
210
+ self._api_key = api_key
211
+ self._client = None
212
+ self.verbose = verbose
213
+ # Cache advisory per date+stage (same day = same forecast)
214
+ self._report_cache: dict[str, AdvisorReport] = {}
215
+
216
+ # ------------------------------------------------------------------
217
+ # Internal helpers
218
+ # ------------------------------------------------------------------
219
+
220
+ @property
221
+ def api_key(self) -> str:
222
+ return get_google_api_key(self._api_key)
223
+
224
+ @property
225
+ def client(self):
226
+ if self._client is None:
227
+ self._client = get_genai_client(self._api_key)
228
+ return self._client
229
+
230
+ def _call_gemini(self, user_prompt: str) -> str:
231
+ """Send a prompt to Gemini and return the raw text response."""
232
+ response = self.client.models.generate_content(
233
+ model=self.model_name,
234
+ contents=user_prompt,
235
+ config={"system_instruction": SYSTEM_PROMPT},
236
+ )
237
+ return response.text
238
+
239
+ def _log(self, msg: str) -> None:
240
+ if self.verbose:
241
+ print(f"[DayAheadAdvisor] {msg}")
242
+
243
+ # ------------------------------------------------------------------
244
+ # Forecast formatting
245
+ # ------------------------------------------------------------------
246
+
247
+ def _format_weather_forecast(self, weather_df: pd.DataFrame) -> str:
248
+ """Aggregate 15-min IMS data to hourly and format as text for Gemini."""
249
+ df = weather_df.copy()
250
+
251
+ # Ensure datetime index
252
+ if not isinstance(df.index, pd.DatetimeIndex):
253
+ for col in ["timestamp_utc", "time", "datetime", "timestamp"]:
254
+ if col in df.columns:
255
+ df.index = pd.to_datetime(df[col], utc=True)
256
+ break
257
+
258
+ # Map common column names
259
+ col_map = {}
260
+ for c in df.columns:
261
+ cl = c.lower()
262
+ if "temp" in cl and "dew" not in cl:
263
+ col_map["temperature_c"] = c
264
+ elif "ghi" in cl or "radiation" in cl or "irradiance" in cl:
265
+ col_map["ghi_w_m2"] = c
266
+ elif "rh" in cl or "humid" in cl:
267
+ col_map["rh_percent"] = c
268
+ elif "wind" in cl and "speed" in cl:
269
+ col_map["wind_speed_ms"] = c
270
+ elif "vpd" in cl:
271
+ col_map["vpd_kpa"] = c
272
+
273
+ # Resample to hourly
274
+ hourly = df.resample("1h").mean(numeric_only=True)
275
+
276
+ lines = ["HOURLY WEATHER FORECAST:"]
277
+ lines.append(f"{'Hour':>4} {'T(°C)':>7} {'GHI':>7} {'RH(%)':>7} {'Wind':>7}")
278
+ lines.append("-" * 45)
279
+
280
+ temp_col = col_map.get("temperature_c")
281
+ ghi_col = col_map.get("ghi_w_m2")
282
+ rh_col = col_map.get("rh_percent")
283
+ wind_col = col_map.get("wind_speed_ms")
284
+
285
+ for idx, row in hourly.iterrows():
286
+ hour = idx.hour if hasattr(idx, "hour") else "?"
287
+ t = f"{row[temp_col]:.1f}" if temp_col and temp_col in row.index else "N/A"
288
+ g = f"{row[ghi_col]:.0f}" if ghi_col and ghi_col in row.index else "N/A"
289
+ r = f"{row[rh_col]:.0f}" if rh_col and rh_col in row.index else "N/A"
290
+ w = f"{row[wind_col]:.1f}" if wind_col and wind_col in row.index else "N/A"
291
+ lines.append(f"{hour:>4} {t:>7} {g:>7} {r:>7} {w:>7}")
292
+
293
+ # Summary stats
294
+ if temp_col and temp_col in hourly.columns:
295
+ temps = hourly[temp_col].dropna()
296
+ if not temps.empty:
297
+ lines.append(f"\nSummary: Tmax={temps.max():.1f}°C, "
298
+ f"Tmin={temps.min():.1f}°C, "
299
+ f"Hours above 30°C: {int((temps > 30).sum())}, "
300
+ f"Hours above 35°C: {int((temps > 35).sum())}")
301
+
302
+ return "\n".join(lines)
303
+
304
+ def _format_chronos_forecast(self, chronos_df: pd.DataFrame) -> str:
305
+ """Format Chronos A forecast as text for Gemini."""
306
+ df = chronos_df.copy()
307
+
308
+ if not isinstance(df.index, pd.DatetimeIndex):
309
+ for col in ["timestamp_utc", "time", "datetime", "timestamp"]:
310
+ if col in df.columns:
311
+ df.index = pd.to_datetime(df[col], utc=True)
312
+ break
313
+
314
+ # Resample to hourly
315
+ hourly = df.resample("1h").agg({
316
+ c: "median" for c in df.select_dtypes(include=[np.number]).columns
317
+ })
318
+
319
+ # Look for A / prediction columns
320
+ a_col = None
321
+ for c in df.columns:
322
+ cl = c.lower()
323
+ if cl in ("a", "a_n", "predicted_a", "forecast", "median"):
324
+ a_col = c
325
+ break
326
+ if a_col is None and len(df.select_dtypes(include=[np.number]).columns) > 0:
327
+ a_col = df.select_dtypes(include=[np.number]).columns[0]
328
+
329
+ if a_col is None:
330
+ return "CHRONOS FORECAST: No numeric prediction column found."
331
+
332
+ lines = ["CHRONOS A FORECAST (hourly median):"]
333
+ for idx, row in hourly.iterrows():
334
+ hour = idx.hour if hasattr(idx, "hour") else "?"
335
+ val = row[a_col] if a_col in row.index else float("nan")
336
+ lines.append(f" Hour {hour:2d}: A = {val:.2f} µmol m⁻² s⁻¹")
337
+
338
+ a_vals = hourly[a_col].dropna()
339
+ if not a_vals.empty:
340
+ lines.append(f"\nPeak A: {a_vals.max():.2f} at hour "
341
+ f"{hourly[a_col].idxmax().hour if hasattr(hourly[a_col].idxmax(), 'hour') else '?'}")
342
+
343
+ return "\n".join(lines)
344
+
345
+ # ------------------------------------------------------------------
346
+ # Default (fallback) report
347
+ # ------------------------------------------------------------------
348
+
349
+ def _default_report(self, date: str, stage: str) -> AdvisorReport:
350
+ """
351
+ Conservative fallback report when Gemini is unavailable.
352
+
353
+ Assumes moderate midday stress, standard budget distribution,
354
+ FvCB morning + ML midday/afternoon.
355
+ """
356
+ self._log("Using conservative fallback report (API unavailable).")
357
+
358
+ hourly = []
359
+ for h in range(6, 21):
360
+ if h < 10:
361
+ entry = HourlyStressEntry(h, "rubp", "none", False)
362
+ elif h < 12:
363
+ entry = HourlyStressEntry(h, "transition", "low", False)
364
+ elif h < 16:
365
+ entry = HourlyStressEntry(h, "rubisco", "moderate", True)
366
+ else:
367
+ entry = HourlyStressEntry(h, "transition", "low", False)
368
+ hourly.append(entry)
369
+
370
+ return AdvisorReport(
371
+ date=date,
372
+ phenological_stage=stage,
373
+ stress_profile=StressProfile(
374
+ rubisco_limited_hours=4,
375
+ peak_stress_hour=14,
376
+ peak_stress_severity="moderate",
377
+ hourly_detail=hourly,
378
+ summary=(
379
+ "Fallback estimate: moderate midday stress assumed (12:00-16:00). "
380
+ "Conservative shading recommended during peak hours. "
381
+ "Actual conditions may differ — advisory generated without API access."
382
+ ),
383
+ ),
384
+ budget_recommendation=BudgetRecommendation(
385
+ daily_budget_fraction=0.15,
386
+ time_block_pct={"10-11": 5, "11-14": 60, "14-16": 30, "16+": 5},
387
+ rationale="Standard budget distribution (fallback). "
388
+ "Concentrates 60% of daily budget in the 11-14 peak stress window.",
389
+ ),
390
+ model_routing=ModelRoutingPreference(
391
+ morning="fvcb",
392
+ midday="ml",
393
+ afternoon="ml",
394
+ rationale="FvCB for cool morning (T < 30°C), ML for midday/afternoon stress (fallback).",
395
+ ),
396
+ chronos_sanity=None,
397
+ confidence_notes="Fallback report — Gemini API was unavailable. "
398
+ "Using biologically conservative defaults.",
399
+ )
400
+
401
+ # ------------------------------------------------------------------
402
+ # Parse Gemini JSON response → AdvisorReport
403
+ # ------------------------------------------------------------------
404
+
405
+ def _parse_report(
406
+ self, date: str, stage: str, parsed: dict, raw_response: str
407
+ ) -> AdvisorReport:
408
+ """Convert parsed JSON dict to AdvisorReport with safe defaults."""
409
+
410
+ # --- Stress profile ---
411
+ sp = parsed.get("stress_profile", {})
412
+ hourly_raw = sp.get("hourly_detail", [])
413
+ hourly_entries = []
414
+ for h in hourly_raw:
415
+ hourly_entries.append(HourlyStressEntry(
416
+ hour=h.get("hour", 0),
417
+ limiting_state=h.get("limiting_state", "rubp"),
418
+ stress_severity=h.get("stress_severity", "none"),
419
+ shading_recommended=h.get("shading_recommended", False),
420
+ ))
421
+
422
+ stress_profile = StressProfile(
423
+ rubisco_limited_hours=sp.get("rubisco_limited_hours", 0),
424
+ peak_stress_hour=sp.get("peak_stress_hour", 12),
425
+ peak_stress_severity=sp.get("peak_stress_severity", "none"),
426
+ hourly_detail=hourly_entries,
427
+ summary=sp.get("summary", "No summary provided."),
428
+ )
429
+
430
+ # --- Budget recommendation ---
431
+ br = parsed.get("budget_recommendation", {})
432
+ budget_rec = BudgetRecommendation(
433
+ daily_budget_fraction=br.get("daily_budget_fraction", 0.15),
434
+ time_block_pct=br.get("time_block_pct", {"10-11": 5, "11-14": 60, "14-16": 30, "16+": 5}),
435
+ rationale=br.get("rationale", "No rationale provided."),
436
+ )
437
+
438
+ # --- Model routing ---
439
+ mr = parsed.get("model_routing", {})
440
+ model_routing = ModelRoutingPreference(
441
+ morning=mr.get("morning", "fvcb"),
442
+ midday=mr.get("midday", "ml"),
443
+ afternoon=mr.get("afternoon", "ml"),
444
+ rationale=mr.get("rationale", "No rationale provided."),
445
+ )
446
+
447
+ # --- Chronos sanity check (optional) ---
448
+ cs = parsed.get("chronos_sanity")
449
+ chronos_sanity = None
450
+ if cs is not None:
451
+ chronos_sanity = ChronosSanityCheck(
452
+ plausible=cs.get("plausible", True),
453
+ flags=cs.get("flags", []),
454
+ overall_assessment=cs.get("overall_assessment", "No assessment."),
455
+ )
456
+
457
+ return AdvisorReport(
458
+ date=date,
459
+ phenological_stage=stage,
460
+ stress_profile=stress_profile,
461
+ budget_recommendation=budget_rec,
462
+ model_routing=model_routing,
463
+ chronos_sanity=chronos_sanity,
464
+ confidence_notes=parsed.get("confidence_notes", ""),
465
+ raw_llm_response=raw_response,
466
+ )
467
+
468
+ # ------------------------------------------------------------------
469
+ # Main advisory method
470
+ # ------------------------------------------------------------------
471
+
472
+ def advise(
473
+ self,
474
+ date: str,
475
+ weather_forecast: pd.DataFrame,
476
+ phenological_stage: str = "vegetative",
477
+ remaining_weekly_budget_kWh: float = 20.0,
478
+ remaining_monthly_budget_kWh: float = 80.0,
479
+ chronos_forecast: Optional[pd.DataFrame] = None,
480
+ gdd_cumulative: Optional[float] = None,
481
+ vine_snapshot: Optional[object] = None,
482
+ ) -> AdvisorReport:
483
+ """
484
+ Analyze day-ahead weather forecast and produce structured advisory.
485
+
486
+ Parameters
487
+ ----------
488
+ date : target date string (e.g. "2025-07-15")
489
+ weather_forecast : DataFrame of IMS weather data (15-min or hourly)
490
+ phenological_stage : current vine stage (vegetative/flowering/veraison/harvest)
491
+ remaining_weekly_budget_kWh : remaining shading budget for the week
492
+ remaining_monthly_budget_kWh : remaining shading budget for the month
493
+ chronos_forecast : optional Chronos A prediction DataFrame
494
+ gdd_cumulative : optional cumulative growing degree days
495
+ vine_snapshot : optional VineSnapshot from ThingsBoardClient.get_vine_snapshot();
496
+ seeds the advisory with current on-site sensor state (soil moisture,
497
+ fruiting-zone PAR, treatment vs reference comparison)
498
+
499
+ Returns
500
+ -------
501
+ AdvisorReport with stress profile, budget, routing, and sanity check
502
+ """
503
+ self._log(f"Generating advisory for {date} (stage: {phenological_stage})")
504
+
505
+ # Return cached report if same date+stage already advised
506
+ cache_key = f"{date}|{phenological_stage}"
507
+ if cache_key in self._report_cache:
508
+ self._log("Returning cached advisory for this date+stage.")
509
+ return self._report_cache[cache_key]
510
+
511
+ # Build user prompt
512
+ weather_text = self._format_weather_forecast(weather_forecast)
513
+
514
+ prompt_parts = [
515
+ f"DATE: {date}",
516
+ f"PHENOLOGICAL STAGE: {phenological_stage}",
517
+ f"REMAINING WEEKLY BUDGET: {remaining_weekly_budget_kWh:.1f} kWh",
518
+ f"REMAINING MONTHLY BUDGET: {remaining_monthly_budget_kWh:.1f} kWh",
519
+ ]
520
+ if gdd_cumulative is not None:
521
+ prompt_parts.append(f"CUMULATIVE GDD: {gdd_cumulative:.0f}")
522
+
523
+ if vine_snapshot is not None:
524
+ prompt_parts.append("")
525
+ try:
526
+ prompt_parts.append(vine_snapshot.to_advisor_text())
527
+ except Exception:
528
+ pass
529
+
530
+ prompt_parts.append("")
531
+ prompt_parts.append(weather_text)
532
+
533
+ if chronos_forecast is not None:
534
+ prompt_parts.append("")
535
+ prompt_parts.append(self._format_chronos_forecast(chronos_forecast))
536
+ else:
537
+ prompt_parts.append("\nNo Chronos forecast available — set chronos_sanity to null.")
538
+
539
+ user_prompt = "\n".join(prompt_parts)
540
+
541
+ # Call Gemini
542
+ try:
543
+ raw = self._call_gemini(user_prompt)
544
+ parsed = _extract_json(raw)
545
+ report = self._parse_report(date, phenological_stage, parsed, raw)
546
+ self._report_cache[cache_key] = report
547
+ self._log("Advisory generated successfully via Gemini.")
548
+ return report
549
+ except Exception as exc:
550
+ self._log(f"Gemini API error: {exc}")
551
+ return self._default_report(date, phenological_stage)
552
+
553
+ # ------------------------------------------------------------------
554
+ # Serialization
555
+ # ------------------------------------------------------------------
556
+
557
+ @staticmethod
558
+ def report_to_dict(report: AdvisorReport) -> dict:
559
+ """Convert AdvisorReport to a plain dict (JSON-serializable)."""
560
+ return asdict(report)
561
+
562
+ @staticmethod
563
+ def report_to_json(report: AdvisorReport, indent: int = 2) -> str:
564
+ """Convert AdvisorReport to a JSON string."""
565
+ return json.dumps(asdict(report), indent=indent, default=str)
566
+
567
+
568
+ # ---------------------------------------------------------------------------
569
+ # CLI entry point
570
+ # ---------------------------------------------------------------------------
571
+
572
+ if __name__ == "__main__":
573
+ from pathlib import Path
574
+
575
+ IMS_CSV = Path(__file__).resolve().parent.parent / "Data" / "ims" / "ims_merged_15min.csv"
576
+
577
+ if not IMS_CSV.exists():
578
+ print("No IMS cache data found. Cannot run advisory demo.")
579
+ print(f"Looked in: {IMS_CSV}")
580
+ raise SystemExit(1)
581
+
582
+ print(f"Loading IMS data from: {IMS_CSV.name}")
583
+ df = pd.read_csv(IMS_CSV, parse_dates=True)
584
+
585
+ # Try to parse datetime
586
+ for col in ["timestamp_utc", "datetime", "time", "timestamp"]:
587
+ if col in df.columns:
588
+ df.index = pd.to_datetime(df[col])
589
+ break
590
+
591
+ # Use last day of data
592
+ if isinstance(df.index, pd.DatetimeIndex):
593
+ last_date = df.index.date[-1]
594
+ day_data = df[df.index.date == last_date]
595
+ date_str = str(last_date)
596
+ else:
597
+ day_data = df.tail(96) # ~24h of 15-min data
598
+ date_str = "unknown"
599
+
600
+ print(f"Date: {date_str}, rows: {len(day_data)}")
601
+
602
+ advisor = DayAheadAdvisor(verbose=True)
603
+ report = advisor.advise(
604
+ date=date_str,
605
+ weather_forecast=day_data,
606
+ phenological_stage="veraison",
607
+ remaining_weekly_budget_kWh=15.0,
608
+ remaining_monthly_budget_kWh=50.0,
609
+ )
610
+
611
+ print("\n" + "=" * 60)
612
+ print("DAY-AHEAD STRESS ADVISORY")
613
+ print("=" * 60)
614
+ print(f"Date: {report.date}")
615
+ print(f"Stage: {report.phenological_stage}")
616
+ print(f"\nStress Summary: {report.stress_profile.summary}")
617
+ print(f"Rubisco-limited hours: {report.stress_profile.rubisco_limited_hours}")
618
+ print(f"Peak stress: {report.stress_profile.peak_stress_severity} "
619
+ f"at hour {report.stress_profile.peak_stress_hour}")
620
+ print(f"\nBudget: {report.budget_recommendation.daily_budget_fraction:.0%} "
621
+ f"of weekly budget")
622
+ print(f"Time blocks: {report.budget_recommendation.time_block_pct}")
623
+ print(f"Rationale: {report.budget_recommendation.rationale}")
624
+ print(f"\nModel routing: morning={report.model_routing.morning}, "
625
+ f"midday={report.model_routing.midday}, "
626
+ f"afternoon={report.model_routing.afternoon}")
627
+ if report.chronos_sanity:
628
+ print(f"\nChronos sanity: plausible={report.chronos_sanity.plausible}")
629
+ print(f" Flags: {report.chronos_sanity.flags}")
630
+ print(f"\nConfidence: {report.confidence_notes}")
631
+ print("\n--- Full JSON ---")
632
+ print(DayAheadAdvisor.report_to_json(report))
src/advisor/safety_rails.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SafetyRails: FvCB vs ML divergence guard for the SolarWine 2.0 control loop.
3
+
4
+ Position in the control loop (Phase 3, Step 7):
5
+ After TradeoffEngine selects a minimum dose, SafetyRails validates that
6
+ the FvCB and ML photosynthesis predictions are sufficiently consistent.
7
+
8
+ If the two models disagree by more than DIVERGENCE_THRESHOLD (12%), the
9
+ system cannot confidently predict that shading will help, so it falls back
10
+ to full astronomical tracking (zero energy sacrifice, zero risk).
11
+
12
+ Rationale
13
+ ---------
14
+ The FvCB mechanistic model and ML ensemble are calibrated on different
15
+ assumptions:
16
+ - FvCB is reliable in standard conditions (T < 30°C, moderate VPD).
17
+ - ML handles non-linear stress regimes better.
18
+
19
+ When both agree → high confidence → proceed with intervention.
20
+ When they disagree significantly → sensor fault, regime change, or edge
21
+ case not covered by calibration. The safe default is no intervention.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ from dataclasses import dataclass
27
+ from typing import Optional
28
+
29
+ from config.settings import DIVERGENCE_THRESHOLD
30
+
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Result dataclass
34
+ # ---------------------------------------------------------------------------
35
+
36
+ @dataclass
37
+ class SafetyCheckResult:
38
+ """Outcome of a single FvCB vs ML divergence check."""
39
+
40
+ passed: bool
41
+ fvcb_a: float
42
+ ml_a: float
43
+ divergence_pct: float # |fvcb_a - ml_a| / max(fvcb_a, ml_a) × 100
44
+ fallback_needed: bool # True when divergence > threshold
45
+ reason: str # human-readable explanation
46
+
47
+ def __str__(self) -> str:
48
+ status = "PASS" if self.passed else "FAIL → fallback to θ_astro"
49
+ return (
50
+ f"SafetyRails [{status}] "
51
+ f"FvCB={self.fvcb_a:.2f} ML={self.ml_a:.2f} "
52
+ f"divergence={self.divergence_pct:.1f}% "
53
+ f"(threshold={DIVERGENCE_THRESHOLD * 100:.0f}%)"
54
+ )
55
+
56
+
57
+ # ---------------------------------------------------------------------------
58
+ # SafetyRails
59
+ # ---------------------------------------------------------------------------
60
+
61
+ class SafetyRails:
62
+ """
63
+ Validates that FvCB and ML model outputs are consistent before any
64
+ shading command is issued.
65
+
66
+ Usage
67
+ -----
68
+ rails = SafetyRails()
69
+ result = rails.check(fvcb_a=14.3, ml_a=14.8)
70
+ if result.fallback_needed:
71
+ # stay at θ_astro, log result
72
+ """
73
+
74
+ def __init__(self, threshold: Optional[float] = None) -> None:
75
+ """
76
+ Parameters
77
+ ----------
78
+ threshold : divergence fraction (0–1) that triggers fallback.
79
+ Defaults to DIVERGENCE_THRESHOLD (0.12) from settings.
80
+ """
81
+ self.threshold = threshold if threshold is not None else DIVERGENCE_THRESHOLD
82
+
83
+ def check(
84
+ self,
85
+ fvcb_a: float,
86
+ ml_a: float,
87
+ context: Optional[str] = None,
88
+ ) -> SafetyCheckResult:
89
+ """
90
+ Compare FvCB and ML photosynthesis outputs.
91
+
92
+ Parameters
93
+ ----------
94
+ fvcb_a : net A from FarquharModel (µmol CO₂ m⁻² s⁻¹)
95
+ ml_a : net A from ML ensemble (µmol CO₂ m⁻² s⁻¹)
96
+ context : optional string for logging (e.g. "2025-07-15 13:00")
97
+
98
+ Returns
99
+ -------
100
+ SafetyCheckResult
101
+ """
102
+ denominator = max(abs(fvcb_a), abs(ml_a), 1e-6)
103
+ divergence = abs(fvcb_a - ml_a) / denominator
104
+ divergence_pct = divergence * 100.0
105
+
106
+ fallback_needed = divergence > self.threshold
107
+
108
+ if fallback_needed:
109
+ reason = (
110
+ f"Models diverge by {divergence_pct:.1f}% "
111
+ f"(FvCB={fvcb_a:.2f}, ML={ml_a:.2f}) — "
112
+ f"exceeds {self.threshold * 100:.0f}% threshold. "
113
+ f"Falling back to full astronomical tracking."
114
+ )
115
+ elif fvcb_a < 0 and ml_a < 0:
116
+ reason = "Both models predict carbon loss (dark/night); no shading beneficial."
117
+ fallback_needed = True
118
+ else:
119
+ reason = (
120
+ f"Models agree within {self.threshold * 100:.0f}% threshold "
121
+ f"(FvCB={fvcb_a:.2f}, ML={ml_a:.2f}, "
122
+ f"divergence={divergence_pct:.1f}%). Proceeding."
123
+ )
124
+
125
+ return SafetyCheckResult(
126
+ passed=not fallback_needed,
127
+ fvcb_a=fvcb_a,
128
+ ml_a=ml_a,
129
+ divergence_pct=round(divergence_pct, 2),
130
+ fallback_needed=fallback_needed,
131
+ reason=reason,
132
+ )
133
+
134
+ def check_from_log(self, fvcb_a: Optional[float], ml_a: Optional[float]) -> SafetyCheckResult:
135
+ """
136
+ Variant that handles None inputs gracefully (e.g. ML model not loaded).
137
+
138
+ If either value is None, defaults to passing with a warning — the
139
+ calling code should use whichever model is available.
140
+ """
141
+ if fvcb_a is None or ml_a is None:
142
+ available = fvcb_a if fvcb_a is not None else ml_a
143
+ return SafetyCheckResult(
144
+ passed=True,
145
+ fvcb_a=fvcb_a or 0.0,
146
+ ml_a=ml_a or 0.0,
147
+ divergence_pct=0.0,
148
+ fallback_needed=False,
149
+ reason=(
150
+ f"Only one model available (value={available:.2f}). "
151
+ "Cannot check divergence; proceeding with available model."
152
+ ),
153
+ )
154
+ return self.check(fvcb_a, ml_a)
155
+
156
+
157
+ # ---------------------------------------------------------------------------
158
+ # CLI smoke test
159
+ # ---------------------------------------------------------------------------
160
+
161
+ if __name__ == "__main__":
162
+ rails = SafetyRails()
163
+
164
+ cases = [
165
+ (14.3, 14.8, "Normal agreement (3.4%)"),
166
+ (14.3, 16.5, "Borderline (15.4% — over threshold)"),
167
+ (14.3, 12.0, "Below threshold (17.6% — over)"),
168
+ (14.3, 14.3, "Perfect agreement"),
169
+ (14.3, None, "ML unavailable"),
170
+ (-2.0, -1.8, "Carbon loss (night)"),
171
+ ]
172
+
173
+ print(f"SafetyRails — threshold={rails.threshold * 100:.0f}%\n")
174
+ for fvcb, ml, label in cases:
175
+ result = rails.check_from_log(fvcb, ml)
176
+ status = "FALLBACK" if result.fallback_needed else "OK "
177
+ print(f" [{status}] {label}")
178
+ print(f" {result.reason}")
179
+ print()
src/baseline_predictor.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ BaselinePredictor: hybrid FvCB + ML photosynthesis baseline for day-ahead planning.
3
+
4
+ Provides a single ``predict_day()`` method that:
5
+ 1. Runs FvCB (Farquhar–Greer–Weedon) for each slot using forecast weather
6
+ 2. Optionally runs a trained ML model for the same slots
7
+ 3. Uses the RoutingAgent's rule-based logic to pick the better prediction per slot
8
+ 4. Returns a 96-slot profile of predicted photosynthesis rate A (µmol CO₂ m⁻² s⁻¹)
9
+
10
+ This feeds into the DayAheadPlanner to estimate crop value for each slot,
11
+ replacing the current temperature-only heuristic with an actual photosynthesis
12
+ prediction that captures the Rubisco transition more accurately.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import logging
18
+ import math
19
+ from datetime import date
20
+ from typing import List, Optional
21
+
22
+ import numpy as np
23
+
24
+ from config.settings import SEMILLON_TRANSITION_TEMP_C
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class BaselinePredictor:
30
+ """Hybrid FvCB + ML photosynthesis prediction for day-ahead planning.
31
+
32
+ Parameters
33
+ ----------
34
+ fvcb_model : FarquharModel, optional
35
+ Lazy-initialised if not provided.
36
+ ml_predictor : PhotosynthesisPredictor, optional
37
+ Trained ML model. If None, FvCB-only mode is used.
38
+ routing_agent : RoutingAgent, optional
39
+ Model router for per-slot FvCB/ML selection.
40
+ If None, uses rule-based routing only (no API calls).
41
+ """
42
+
43
+ def __init__(
44
+ self,
45
+ fvcb_model=None,
46
+ ml_predictor=None,
47
+ routing_agent=None,
48
+ ):
49
+ self._fvcb = fvcb_model
50
+ self._ml = ml_predictor
51
+ self._router = routing_agent
52
+
53
+ @property
54
+ def fvcb(self):
55
+ if self._fvcb is None:
56
+ from src.models.farquhar_model import FarquharModel
57
+ self._fvcb = FarquharModel()
58
+ return self._fvcb
59
+
60
+ # ------------------------------------------------------------------
61
+ # Main API
62
+ # ------------------------------------------------------------------
63
+
64
+ def predict_day(
65
+ self,
66
+ forecast_temps: List[float],
67
+ forecast_ghi: List[float],
68
+ co2_ppm: float = 400.0,
69
+ rh_pct: float = 40.0,
70
+ ) -> List[float]:
71
+ """Predict photosynthesis rate A for each 15-min slot.
72
+
73
+ Parameters
74
+ ----------
75
+ forecast_temps : list of 96 floats
76
+ Forecast air temperature (°C) per slot.
77
+ forecast_ghi : list of 96 floats
78
+ Forecast GHI (W/m²) per slot.
79
+ co2_ppm : float
80
+ Atmospheric CO₂ concentration (default 400 ppm).
81
+ rh_pct : float
82
+ Relative humidity (%) for VPD estimation (default 40%).
83
+
84
+ Returns
85
+ -------
86
+ list of 96 floats
87
+ Predicted net photosynthesis A (µmol CO₂ m⁻² s⁻¹) per slot.
88
+ 0.0 for nighttime slots.
89
+ """
90
+ assert len(forecast_temps) == 96 and len(forecast_ghi) == 96
91
+
92
+ # FvCB predictions for all 96 slots
93
+ fvcb_predictions = self._predict_fvcb(
94
+ forecast_temps, forecast_ghi, co2_ppm, rh_pct,
95
+ )
96
+
97
+ # If no ML model, return FvCB-only
98
+ if self._ml is None:
99
+ return fvcb_predictions
100
+
101
+ # ML predictions for all 96 slots
102
+ ml_predictions = self._predict_ml(forecast_temps, forecast_ghi)
103
+
104
+ # Route each slot
105
+ predictions = self._route_predictions(
106
+ forecast_temps, forecast_ghi,
107
+ fvcb_predictions, ml_predictions,
108
+ )
109
+
110
+ return predictions
111
+
112
+ # ------------------------------------------------------------------
113
+ # FvCB predictions
114
+ # ------------------------------------------------------------------
115
+
116
+ def _predict_fvcb(
117
+ self,
118
+ temps: List[float],
119
+ ghis: List[float],
120
+ co2_ppm: float,
121
+ rh_pct: float,
122
+ ) -> List[float]:
123
+ """Run FvCB for each slot. Returns 96 A values."""
124
+ predictions = []
125
+ for i in range(96):
126
+ temp = temps[i]
127
+ ghi = ghis[i]
128
+
129
+ # Nighttime or negligible light
130
+ if ghi < 50:
131
+ predictions.append(0.0)
132
+ continue
133
+
134
+ # Estimate PAR from GHI (roughly 2× conversion for photosynthetically active)
135
+ par = ghi * 2.0
136
+
137
+ # Estimate Tleaf from Tair (proxy: +2°C under sun)
138
+ tleaf = temp + 2.0
139
+
140
+ # Estimate VPD from temperature and RH
141
+ vpd = self._estimate_vpd(temp, rh_pct)
142
+
143
+ try:
144
+ result = self.fvcb.calc_photosynthesis_semillon(
145
+ PAR=par,
146
+ Tleaf=tleaf,
147
+ CO2=co2_ppm,
148
+ VPD=vpd,
149
+ Tair=temp,
150
+ )
151
+ # Returns (A, limiting_state, shading_helps)
152
+ A = result[0] if isinstance(result, tuple) else result
153
+ predictions.append(max(0.0, float(A)))
154
+ except Exception as exc:
155
+ logger.debug("FvCB failed at slot %d: %s", i, exc)
156
+ predictions.append(0.0)
157
+
158
+ return predictions
159
+
160
+ @staticmethod
161
+ def _estimate_vpd(tair_c: float, rh_pct: float) -> float:
162
+ """Estimate VPD (kPa) from air temperature and relative humidity."""
163
+ # Tetens formula for saturated vapor pressure
164
+ es = 0.6108 * math.exp(17.27 * tair_c / (tair_c + 237.3))
165
+ ea = es * rh_pct / 100.0
166
+ return max(0.0, es - ea)
167
+
168
+ # ------------------------------------------------------------------
169
+ # ML predictions
170
+ # ------------------------------------------------------------------
171
+
172
+ def _predict_ml(
173
+ self,
174
+ temps: List[float],
175
+ ghis: List[float],
176
+ ) -> List[float]:
177
+ """Run ML model for each slot. Returns 96 A values."""
178
+ if self._ml is None:
179
+ return [0.0] * 96
180
+
181
+ try:
182
+ import pandas as pd
183
+
184
+ # Build feature DataFrame matching ML model's expected features
185
+ hours = [i * 0.25 for i in range(96)]
186
+ df = pd.DataFrame({
187
+ "air_temperature_c": temps,
188
+ "ghi_w_m2": ghis,
189
+ "hour": [int(h) for h in hours],
190
+ "minute": [int((h % 1) * 60) for h in hours],
191
+ })
192
+
193
+ # Try prediction with the best model
194
+ best_model = None
195
+ best_mae = float("inf")
196
+ for name, result in self._ml.results.items():
197
+ if result.get("mae", float("inf")) < best_mae:
198
+ best_mae = result["mae"]
199
+ best_model = name
200
+
201
+ if best_model and best_model in self._ml.models:
202
+ model = self._ml.models[best_model]
203
+ # Use whatever features the model was trained on
204
+ feature_cols = [c for c in df.columns if c in getattr(model, "feature_names_in_", df.columns)]
205
+ if feature_cols:
206
+ preds = model.predict(df[feature_cols])
207
+ return [max(0.0, float(p)) for p in preds]
208
+
209
+ except Exception as exc:
210
+ logger.warning("ML prediction failed: %s", exc)
211
+
212
+ return [0.0] * 96
213
+
214
+ # ------------------------------------------------------------------
215
+ # Routing
216
+ # ------------------------------------------------------------------
217
+
218
+ def _route_predictions(
219
+ self,
220
+ temps: List[float],
221
+ ghis: List[float],
222
+ fvcb_preds: List[float],
223
+ ml_preds: List[float],
224
+ ) -> List[float]:
225
+ """Pick FvCB or ML per slot using routing logic."""
226
+ from src.chatbot.routing_agent import RoutingAgent
227
+
228
+ predictions = []
229
+ for i in range(96):
230
+ telemetry = {
231
+ "temp_c": temps[i],
232
+ "ghi_w_m2": ghis[i],
233
+ "hour": i // 4,
234
+ }
235
+
236
+ # Use rule-based routing only (no API calls for batch prediction)
237
+ choice = RoutingAgent._rule_based_route(telemetry)
238
+ if choice is None:
239
+ # Transition zone: weight FvCB 60% / ML 40% as compromise
240
+ a = 0.6 * fvcb_preds[i] + 0.4 * ml_preds[i]
241
+ elif choice == "ml":
242
+ a = ml_preds[i]
243
+ else:
244
+ a = fvcb_preds[i]
245
+
246
+ predictions.append(a)
247
+
248
+ return predictions
src/canopy_photosynthesis.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ """Backward-compatible re-export from src.models.canopy_photosynthesis."""
2
+ from src.models.canopy_photosynthesis import * # noqa: F401, F403
src/chatbot/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Chatbot: vineyard chat, routing agent, LLM data engineer."""
src/chatbot/feedback.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Feedback storage for the Vineyard Advisor chatbot.
3
+
4
+ Logs user feedback (thumbs up/down, flags) to a JSON-lines file.
5
+ Each entry captures the query, response, tool results, rules applied,
6
+ and the user's feedback action.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import logging
13
+ from datetime import datetime, timezone
14
+ from pathlib import Path
15
+ from typing import Optional
16
+
17
+ from config.settings import DATA_DIR
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ FEEDBACK_FILE = DATA_DIR / "advisor_feedback.jsonl"
22
+
23
+
24
+ def log_feedback(
25
+ query: str,
26
+ response: str,
27
+ feedback: str,
28
+ confidence: str = "",
29
+ sources: Optional[list[str]] = None,
30
+ tool_calls: Optional[list[dict]] = None,
31
+ rule_violations: Optional[list[dict]] = None,
32
+ response_mode: str = "",
33
+ comment: str = "",
34
+ ) -> None:
35
+ """Append a feedback entry to the JSONL file.
36
+
37
+ Parameters
38
+ ----------
39
+ query : str
40
+ The user's original question.
41
+ response : str
42
+ The chatbot's response text.
43
+ feedback : str
44
+ One of: "thumbs_up", "thumbs_down", "flag_incorrect".
45
+ confidence, sources, tool_calls, rule_violations, response_mode :
46
+ Metadata from the ChatResponse.
47
+ comment : str
48
+ Optional free-text comment from the user.
49
+ """
50
+ entry = {
51
+ "timestamp": datetime.now(tz=timezone.utc).isoformat(),
52
+ "query": query,
53
+ "response": response[:500], # truncate for storage
54
+ "feedback": feedback,
55
+ "confidence": confidence,
56
+ "sources": sources or [],
57
+ "tool_calls": [
58
+ {"name": tc.get("name", ""), "args": tc.get("args", {})}
59
+ for tc in (tool_calls or [])
60
+ ],
61
+ "rule_violations": rule_violations or [],
62
+ "response_mode": response_mode,
63
+ "comment": comment,
64
+ }
65
+
66
+ try:
67
+ FEEDBACK_FILE.parent.mkdir(parents=True, exist_ok=True)
68
+ with open(FEEDBACK_FILE, "a") as f:
69
+ f.write(json.dumps(entry, default=str) + "\n")
70
+ logger.info("Feedback logged: %s for query: %s", feedback, query[:50])
71
+ except Exception as exc:
72
+ logger.warning("Failed to log feedback: %s", exc)
73
+
74
+
75
+ def load_feedback(limit: int = 100) -> list[dict]:
76
+ """Load recent feedback entries."""
77
+ if not FEEDBACK_FILE.exists():
78
+ return []
79
+
80
+ entries = []
81
+ try:
82
+ with open(FEEDBACK_FILE) as f:
83
+ for line in f:
84
+ line = line.strip()
85
+ if line:
86
+ entries.append(json.loads(line))
87
+ except Exception as exc:
88
+ logger.warning("Failed to load feedback: %s", exc)
89
+
90
+ return entries[-limit:]
91
+
92
+
93
+ def feedback_summary() -> dict:
94
+ """Return a summary of feedback stats."""
95
+ entries = load_feedback(limit=10000)
96
+ if not entries:
97
+ return {"total": 0}
98
+
99
+ return {
100
+ "total": len(entries),
101
+ "thumbs_up": sum(1 for e in entries if e.get("feedback") == "thumbs_up"),
102
+ "thumbs_down": sum(1 for e in entries if e.get("feedback") == "thumbs_down"),
103
+ "flagged": sum(1 for e in entries if e.get("feedback") == "flag_incorrect"),
104
+ }
src/chatbot/guardrails.py ADDED
@@ -0,0 +1,363 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Guardrails for the Vineyard Advisor chatbot.
3
+
4
+ Three components:
5
+ 1. QueryClassifier — determines if a query requires tool data or can be
6
+ answered from biology rules alone.
7
+ 2. ResponseValidator — deterministic post-response check that catches
8
+ rule violations before the answer reaches the user.
9
+ 3. confidence_from_context — estimates answer confidence based on data
10
+ freshness and availability.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import re
16
+ from dataclasses import dataclass, field
17
+ from datetime import datetime
18
+ from typing import Optional
19
+
20
+ from config.settings import (
21
+ NO_SHADE_BEFORE_HOUR,
22
+ NO_SHADE_MONTHS,
23
+ NO_SHADE_TLEAF_BELOW,
24
+ )
25
+
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # 1. Query classifier — decides whether a tool call is mandatory
29
+ # ---------------------------------------------------------------------------
30
+
31
+ # Keywords that indicate user is asking about real-time / site-specific data
32
+ _DATA_KEYWORDS = [
33
+ # Weather / environment
34
+ r"\btemperature\b", r"\btemp\b", r"\bhow hot\b", r"\bhow cold\b",
35
+ r"\bweather\b", r"\bforecast\b", r"\brain\b", r"\bwind\b",
36
+ r"\bhumidity\b", r"\bghi\b", r"\bradiation\b", r"\birradiance\b",
37
+ # Sensors
38
+ r"\bsensor\b", r"\bsoil\b", r"\bmoisture\b", r"\bleaf temp\b",
39
+ r"\bpar\b", r"\bndvi\b", r"\bcwsi\b", r"\bvpd\b",
40
+ # Photosynthesis / predictions
41
+ r"\bphotosynthesis\b", r"\bassimilation\b", r"\bpredict\b",
42
+ r"\bforecast\b", r"\bA rate\b", r"\bcarbon\b",
43
+ # Energy
44
+ r"\benergy\b", r"\bkwh\b", r"\bpower\b", r"\bgeneration\b",
45
+ r"\binverter\b",
46
+ # Irrigation
47
+ r"\birrigat\b", r"\bwater\b",
48
+ # Shading — action-oriented
49
+ r"\bshade\b", r"\bshading\b", r"\btilt\b", r"\bangle\b", r"\bpanel\b",
50
+ # Temporal / current state
51
+ r"\bright now\b", r"\bcurrent\b", r"\btoday\b", r"\btomorrow\b",
52
+ r"\byesterday\b", r"\bthis week\b", r"\blast \d+ (hour|day|minute)",
53
+ # Direct data ask
54
+ r"\bshow me\b", r"\bwhat is\b", r"\bwhat are\b", r"\bhow much\b",
55
+ r"\bcheck\b", r"\bstatus\b", r"\bstate\b",
56
+ ]
57
+
58
+ # Compile once
59
+ _DATA_PATTERNS = [re.compile(p, re.IGNORECASE) for p in _DATA_KEYWORDS]
60
+
61
+ # Keywords for pure knowledge / biology rule questions (no tool needed)
62
+ _KNOWLEDGE_KEYWORDS = [
63
+ r"\bwhy\b.*\brule\b", r"\bexplain\b.*\brule\b",
64
+ r"\bwhat is rubisco\b", r"\bwhat is fvcb\b", r"\bwhat is farquhar\b",
65
+ r"\btell me about\b.*\bbiology\b", r"\bhow does photosynthesis work\b",
66
+ r"\bwhat does .* mean\b",
67
+ ]
68
+
69
+ _KNOWLEDGE_PATTERNS = [re.compile(p, re.IGNORECASE) for p in _KNOWLEDGE_KEYWORDS]
70
+
71
+
72
+ @dataclass
73
+ class QueryClass:
74
+ """Result of query classification."""
75
+ requires_data: bool # True = tool call is mandatory
76
+ category: str # "data", "knowledge", "greeting", "ambiguous"
77
+ matched_keywords: list[str] = field(default_factory=list)
78
+
79
+
80
+ def classify_query(user_message: str) -> QueryClass:
81
+ """Classify whether a user query requires tool-grounded data."""
82
+ msg = user_message.strip()
83
+
84
+ # Very short / greeting
85
+ if len(msg) < 5 or re.match(r"^(hi|hello|hey|thanks|thank you|ok|bye)\b", msg, re.I):
86
+ return QueryClass(requires_data=False, category="greeting")
87
+
88
+ # Check knowledge patterns first (more specific)
89
+ for pat in _KNOWLEDGE_PATTERNS:
90
+ if pat.search(msg):
91
+ return QueryClass(requires_data=False, category="knowledge")
92
+
93
+ # Check data patterns
94
+ matched = []
95
+ for pat in _DATA_PATTERNS:
96
+ m = pat.search(msg)
97
+ if m:
98
+ matched.append(m.group())
99
+
100
+ if matched:
101
+ # If the only match is a generic question word ("what is", "show me")
102
+ # but no domain-specific data keyword, treat as ambiguous
103
+ domain_matches = [m for m in matched if m.lower() not in
104
+ {"what is", "what are", "show me", "how much", "check", "status", "state"}]
105
+ if not domain_matches:
106
+ return QueryClass(requires_data=False, category="ambiguous")
107
+ return QueryClass(requires_data=True, category="data", matched_keywords=matched)
108
+
109
+ # Default: ambiguous — allow LLM to decide
110
+ return QueryClass(requires_data=False, category="ambiguous")
111
+
112
+
113
+ # ---------------------------------------------------------------------------
114
+ # 2. Response validator — deterministic rule checks
115
+ # ---------------------------------------------------------------------------
116
+
117
+ @dataclass
118
+ class RuleViolation:
119
+ """A detected rule violation in a chatbot response."""
120
+ rule_name: str
121
+ severity: str # "block" or "warn"
122
+ message: str
123
+ correction: str # What to tell the user instead
124
+
125
+
126
+ def validate_response(
127
+ response_text: str,
128
+ action: Optional[str] = None,
129
+ context: Optional[dict] = None,
130
+ ) -> list[RuleViolation]:
131
+ """
132
+ Check a chatbot response for rule violations.
133
+
134
+ Parameters
135
+ ----------
136
+ response_text : str
137
+ The chatbot's response text.
138
+ action : str or None
139
+ Extracted action ("shade", "irrigate", "no_action", etc.).
140
+ context : dict or None
141
+ Current conditions: hour, month, temp_c, stage_id, etc.
142
+
143
+ Returns
144
+ -------
145
+ List of RuleViolation objects. Empty list = all good.
146
+ """
147
+ violations: list[RuleViolation] = []
148
+ ctx = context or {}
149
+ text_lower = response_text.lower()
150
+
151
+ hour = ctx.get("hour")
152
+ month = ctx.get("month")
153
+ temp_c = ctx.get("temp_c")
154
+ stage_id = ctx.get("stage_id")
155
+
156
+ # Detect if the response recommends shading
157
+ _recommends_shade = _text_recommends_shading(text_lower)
158
+
159
+ # Rule: No shading before NO_SHADE_BEFORE_HOUR
160
+ if _recommends_shade and hour is not None and hour < NO_SHADE_BEFORE_HOUR:
161
+ violations.append(RuleViolation(
162
+ rule_name="no_shade_before_10",
163
+ severity="block",
164
+ message=f"Response recommends shading before {NO_SHADE_BEFORE_HOUR}:00.",
165
+ correction=(
166
+ "Morning light is critical for carbon fixation. "
167
+ f"Shading should not be recommended before {NO_SHADE_BEFORE_HOUR}:00 regardless "
168
+ "of temperature. Panels should remain at full tracking."
169
+ ),
170
+ ))
171
+
172
+ # Rule: No shading in restricted months (unless extreme)
173
+ if _recommends_shade and month in NO_SHADE_MONTHS:
174
+ # Check if the response mentions extreme conditions
175
+ _mentions_extreme = any(w in text_lower for w in [
176
+ "extreme", "lethal", "emergency", "severe sunburn", "last resort",
177
+ ])
178
+ if not _mentions_extreme:
179
+ violations.append(RuleViolation(
180
+ rule_name="no_shade_in_may",
181
+ severity="block",
182
+ message="Response recommends shading in May without citing extreme conditions.",
183
+ correction=(
184
+ "May is the flowering/fruit-set period. Shading should be "
185
+ "avoided in May unless there is extreme heat causing lethal "
186
+ "stress. Panels should remain at full tracking."
187
+ ),
188
+ ))
189
+
190
+ # Rule: Below transition temp shading hurts (RuBP-limited)
191
+ if _recommends_shade and temp_c is not None and temp_c < NO_SHADE_TLEAF_BELOW:
192
+ violations.append(RuleViolation(
193
+ rule_name="temperature_transition",
194
+ severity="warn",
195
+ message=f"Response recommends shading at {temp_c:.0f}°C (below 28°C transition zone).",
196
+ correction=(
197
+ f"At {temp_c:.0f}°C, photosynthesis is RuBP-limited — "
198
+ f"the vine needs light, not shade. Shading would reduce "
199
+ f"photosynthesis. Keep panels at full tracking."
200
+ ),
201
+ ))
202
+
203
+ # Rule: Dormant season — shading is irrelevant, not harmful
204
+ if stage_id in ("winter_dormancy",) and _recommends_shade:
205
+ violations.append(RuleViolation(
206
+ rule_name="no_leaves_no_shade_problem",
207
+ severity="warn",
208
+ message="Response discusses shading during dormancy.",
209
+ correction=(
210
+ "The vine is dormant with no leaves. Shading is irrelevant "
211
+ "(not harmful, just pointless). Panels should track for "
212
+ "maximum energy."
213
+ ),
214
+ ))
215
+
216
+ # Rule: "No shading" answers must explain why
217
+ _recommends_no_shade = _text_recommends_no_shading(text_lower)
218
+ if _recommends_no_shade:
219
+ _has_reason = any(reason in text_lower for reason in [
220
+ "light-limited", "rubp", "need light", "needs light",
221
+ "full sun", "below 30", "below 28",
222
+ "dormant", "no leaves", "no canopy",
223
+ "night", "dark", "no radiation", "ghi", "no sun",
224
+ "carbon fixation", "morning light",
225
+ "not photosynthesi", "not active",
226
+ ])
227
+ if not _has_reason:
228
+ violations.append(RuleViolation(
229
+ rule_name="no_shading_must_explain",
230
+ severity="warn",
231
+ message="Response says 'no shading' without explaining why.",
232
+ correction=(
233
+ "When recommending no shading, always explain the reason: "
234
+ "is the vine light-limited (T < 30°C), dormant (no leaves), "
235
+ "or is there no radiation? The farmer needs to understand why."
236
+ ),
237
+ ))
238
+
239
+ return violations
240
+
241
+
242
+ # Shared keyword lists for shading detection heuristics
243
+ _POSITIVE_SHADE_PHRASES = [
244
+ "recommend shading", "should shade", "activate shading",
245
+ "tilt the panel", "move the panel", "adjust the panel",
246
+ "shade the vine", "shade your vine", "shading would help",
247
+ "shading is recommended", "suggest shading", "consider shading",
248
+ "apply shading", "deploy shading", "enable shading",
249
+ "recommend anti-tracking", "switch to anti-tracking",
250
+ ]
251
+
252
+ _NEGATIVE_SHADE_PHRASES = [
253
+ "should not shade", "don't shade", "no shading",
254
+ "avoid shading", "shading is not", "not recommend shading",
255
+ "do not shade", "keep panels tracking", "full tracking",
256
+ "shading would reduce", "shading would hurt",
257
+ "shading is irrelevant", "shading is unnecessary",
258
+ "i would not recommend shading", "i don't recommend shading",
259
+ "no shading needed", "shading is not needed",
260
+ "no need to shade", "no need for shading",
261
+ ]
262
+
263
+
264
+ def _text_recommends_shading(text_lower: str) -> bool:
265
+ """Heuristic: does the response recommend activating shade?"""
266
+ has_positive = any(p in text_lower for p in _POSITIVE_SHADE_PHRASES)
267
+ has_negative = any(p in text_lower for p in _NEGATIVE_SHADE_PHRASES)
268
+ # If both present, the negative usually wins (e.g. "some might suggest shading, but I don't recommend it")
269
+ return has_positive and not has_negative
270
+
271
+
272
+ def _text_recommends_no_shading(text_lower: str) -> bool:
273
+ """Heuristic: does the response explicitly recommend NOT shading?"""
274
+ return any(p in text_lower for p in _NEGATIVE_SHADE_PHRASES)
275
+
276
+
277
+ # ---------------------------------------------------------------------------
278
+ # 3. Confidence estimation
279
+ # ---------------------------------------------------------------------------
280
+
281
+ def estimate_confidence(
282
+ tool_called: bool,
283
+ tool_succeeded: bool,
284
+ data_age_minutes: Optional[float],
285
+ tool_name: Optional[str] = None,
286
+ ) -> str:
287
+ """
288
+ Estimate response confidence based on data grounding.
289
+
290
+ Returns one of: "high", "medium", "low", "insufficient_data".
291
+ """
292
+ # No tool called at all
293
+ if not tool_called:
294
+ return "low" # answering from system prompt / training data only
295
+
296
+ # Tool was called but failed
297
+ if not tool_succeeded:
298
+ return "insufficient_data"
299
+
300
+ # Tool succeeded — check data freshness
301
+ if data_age_minutes is None:
302
+ # Computed result (FvCB, shading sim) — no age concept
303
+ return "high"
304
+
305
+ if data_age_minutes <= 30:
306
+ return "high"
307
+ elif data_age_minutes <= 120:
308
+ return "medium"
309
+ else:
310
+ return "low"
311
+
312
+
313
+ # ---------------------------------------------------------------------------
314
+ # 4. Source tagging helper
315
+ # ---------------------------------------------------------------------------
316
+
317
+ # Map tool names to human-readable data sources
318
+ _TOOL_SOURCES = {
319
+ "get_current_weather": "IMS Station 43 (Sde Boker)",
320
+ "get_weather_history": "IMS Station 43 (Sde Boker)",
321
+ "get_vine_state": "ThingsBoard sensors (on-site)",
322
+ "get_sensor_history": "ThingsBoard sensors (on-site)",
323
+ "calc_photosynthesis": "Farquhar FvCB model (computed)",
324
+ "predict_photosynthesis_ml": "ML ensemble (computed)",
325
+ "get_ps_forecast": "FvCB day-ahead forecast (computed)",
326
+ "simulate_shading": "Shadow model simulation (computed)",
327
+ "compare_tilt_angles": "Shadow model simulation (computed)",
328
+ "get_daily_schedule": "Shadow model schedule (computed)",
329
+ "get_energy_generation": "IMS + analytical model (estimated)",
330
+ "get_energy_history": "IMS + analytical model (estimated)",
331
+ "predict_energy": "IMS + analytical model (estimated)",
332
+ "run_day_ahead_advisory": "Gemini day-ahead advisor",
333
+ "explain_biology_rule": "Built-in biology rules",
334
+ "get_photosynthesis_3d": "3D scene (computed)",
335
+ }
336
+
337
+
338
+ def get_source_label(tool_name: str) -> str:
339
+ """Return a human-readable source label for a tool."""
340
+ return _TOOL_SOURCES.get(tool_name, tool_name)
341
+
342
+
343
+ def tag_tool_result(tool_name: str, tool_result: dict) -> dict:
344
+ """
345
+ Add source metadata to a tool result before sending to Gemini.
346
+
347
+ The tagged result helps Gemini cite sources in its response.
348
+ """
349
+ tagged = dict(tool_result)
350
+ tagged["_source"] = get_source_label(tool_name)
351
+ tagged["_tool"] = tool_name
352
+
353
+ # Extract data age if present
354
+ age = tool_result.get("age_minutes")
355
+ if age is not None:
356
+ tagged["_data_age_minutes"] = age
357
+ if age > 60:
358
+ tagged["_freshness_warning"] = (
359
+ f"This data is {age:.0f} minutes old. "
360
+ "Warn the user that conditions may have changed."
361
+ )
362
+
363
+ return tagged
src/chatbot/llm_data_engineer.py ADDED
@@ -0,0 +1,559 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LLMDataEngineer: Gemini-assisted sensor data cleaning and feature engineering
3
+ for the SolarWine agrivoltaic pipeline.
4
+
5
+ Phase 8B tasks:
6
+ - llm-data-cleaning : Gemini analyzes sensor stats, returns Z-score/IQR
7
+ filter thresholds for automated anomaly detection.
8
+ - llm-feature-eng : Gemini confirms feature formulae; module generates
9
+ cyclical time features and a Stress Risk Score.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from typing import Optional
15
+
16
+ import hashlib
17
+ import numpy as np
18
+ import pandas as pd
19
+
20
+ from src.genai_utils import extract_json_object, get_genai_client, get_google_api_key
21
+ from src.time_features import add_cyclical_time_features
22
+
23
+
24
+ # ---------------------------------------------------------------------------
25
+ # Domain knowledge injected into Gemini prompts
26
+ # ---------------------------------------------------------------------------
27
+
28
+ SENSOR_CONTEXT = {
29
+ "Air1_PAR_ref": {
30
+ "description": "Photosynthetically Active Radiation (PAR)",
31
+ "unit": "μmol photons m⁻² s⁻¹",
32
+ "physical_range": [0, 2500],
33
+ "notes": "Solar PAR at surface cannot exceed ~2200–2500 under any realistic sky. "
34
+ "Values above 3000 are sensor artefacts.",
35
+ },
36
+ "Air1_leafTemperature_ref": {
37
+ "description": "Leaf (canopy) temperature",
38
+ "unit": "°C",
39
+ "physical_range": [-5, 55],
40
+ "notes": "Grape leaf temperature in the Negev can reach ~45°C on extreme days, "
41
+ "but values above 55°C are physiologically impossible for a living leaf.",
42
+ },
43
+ "Air1_airTemperature_ref": {
44
+ "description": "Air temperature near canopy",
45
+ "unit": "°C",
46
+ "physical_range": [0, 50],
47
+ "notes": "Sde Boker record high is ~47°C. Values above 50°C or below 0°C "
48
+ "during the growing season (May–Sep) are sensor faults.",
49
+ },
50
+ "Air1_VPD_ref": {
51
+ "description": "Vapour Pressure Deficit",
52
+ "unit": "kPa",
53
+ "physical_range": [0, 7],
54
+ "notes": "Desert VPD rarely exceeds 6–7 kPa even in extreme heat. "
55
+ "Negative values and values above 8 kPa are sensor errors.",
56
+ },
57
+ "Air1_airHumidity_ref": {
58
+ "description": "Relative Humidity",
59
+ "unit": "%",
60
+ "physical_range": [0, 100],
61
+ "notes": "Must be in [0, 100]. Values outside this range are invalid.",
62
+ },
63
+ "Air1_CO2_ref": {
64
+ "description": "CO₂ concentration (raw sensor, corrected ×0.7 by SensorDataLoader)",
65
+ "unit": "ppm (raw)",
66
+ "physical_range": [400, 4000],
67
+ "notes": "Raw sensor reads ~30% too high (corrected ×0.7 in the data pipeline). "
68
+ "Raw values above 4000 ppm or below 400 ppm are sensor artefacts. "
69
+ "Post-correction (~280–2800 ppm) values above 2000 ppm indicate sensor drift.",
70
+ },
71
+ }
72
+
73
+ _SYSTEM_PROMPT_CLEANING = (
74
+ "You are a precision-agriculture sensor data quality engineer. "
75
+ "You are given descriptive statistics for sensor columns from a vineyard "
76
+ "in the Negev desert, Israel (Sde Boker region, Semillon grapevine, May–September). "
77
+ "Your task: for each column, propose anomaly filter thresholds to flag "
78
+ "or remove invalid readings. "
79
+ "Return ONLY a JSON object (no markdown, no explanation) with the following schema:\n"
80
+ "{\n"
81
+ ' "<column_name>": {\n'
82
+ ' "lower_bound": <float or null>,\n'
83
+ ' "upper_bound": <float or null>,\n'
84
+ ' "zscore_threshold": <float>,\n'
85
+ ' "iqr_multiplier": <float>,\n'
86
+ ' "rationale": "<one sentence>"\n'
87
+ " },\n"
88
+ " ...\n"
89
+ "}"
90
+ )
91
+
92
+ _SYSTEM_PROMPT_FEATURES = (
93
+ "You are a precision-agriculture feature engineering expert specialising in "
94
+ "grapevine physiology and agrivoltaic systems. "
95
+ "Given the available sensor columns, propose the exact mathematical formulae "
96
+ "for a Stress Risk Score that combines VPD and (optionally) CWSI. "
97
+ "Return ONLY a JSON object (no markdown, no explanation) with schema:\n"
98
+ "{\n"
99
+ ' "stress_risk_score": {\n'
100
+ ' "formula_description": "<one sentence>",\n'
101
+ ' "vpd_weight": <float>,\n'
102
+ ' "cwsi_weight": <float>,\n'
103
+ ' "vpd_clip_max": <float>,\n'
104
+ ' "cwsi_clip_max": <float>,\n'
105
+ ' "rationale": "<one or two sentences on biological justification>"\n'
106
+ " }\n"
107
+ "}"
108
+ )
109
+
110
+
111
+ # ---------------------------------------------------------------------------
112
+ # Helper: robust JSON extraction from LLM response
113
+ # ---------------------------------------------------------------------------
114
+
115
+ def _extract_json(text: str) -> dict:
116
+ """Thin wrapper around the shared genai_utils implementation."""
117
+ return extract_json_object(text)
118
+
119
+
120
+ # ---------------------------------------------------------------------------
121
+ # Main class
122
+ # ---------------------------------------------------------------------------
123
+
124
+ class LLMDataEngineer:
125
+ """
126
+ Gemini-assisted sensor data cleaning and feature engineering.
127
+
128
+ Usage
129
+ -----
130
+ engineer = LLMDataEngineer()
131
+ df_clean, thresholds, features_meta = engineer.run_pipeline(df)
132
+ """
133
+
134
+ def __init__(
135
+ self,
136
+ model_name: str = "gemini-2.5-flash",
137
+ api_key: Optional[str] = None,
138
+ verbose: bool = True,
139
+ ):
140
+ self.model_name = model_name
141
+ self._api_key = api_key
142
+ self._client = None
143
+ self.verbose = verbose
144
+ # Caches keyed by content hash — avoids repeated Gemini calls
145
+ self._threshold_cache: dict[str, dict] = {}
146
+ self._feature_spec_cache: dict[str, dict] = {}
147
+
148
+ # ------------------------------------------------------------------
149
+ # Internal helpers
150
+ # ------------------------------------------------------------------
151
+
152
+ @property
153
+ def api_key(self) -> str:
154
+ return get_google_api_key(self._api_key)
155
+
156
+ @property
157
+ def client(self):
158
+ if self._client is None:
159
+ self._client = get_genai_client(self._api_key)
160
+ return self._client
161
+
162
+ def _call_gemini(self, system_prompt: str, user_prompt: str) -> str:
163
+ """Send a prompt to Gemini and return the raw text response."""
164
+ response = self.client.models.generate_content(
165
+ model=self.model_name,
166
+ contents=user_prompt,
167
+ config={"system_instruction": system_prompt},
168
+ )
169
+ return response.text
170
+
171
+ @staticmethod
172
+ def _hash_key(*parts: str) -> str:
173
+ """Create a short hash from string parts for cache keying."""
174
+ return hashlib.md5("|".join(parts).encode()).hexdigest()[:12]
175
+
176
+ def _log(self, msg: str) -> None:
177
+ if self.verbose:
178
+ print(f"[LLMDataEngineer] {msg}")
179
+
180
+ # ------------------------------------------------------------------
181
+ # Step 1: Anomaly detection — ask Gemini for filter thresholds
182
+ # ------------------------------------------------------------------
183
+
184
+ def analyze_anomalies(
185
+ self,
186
+ df: pd.DataFrame,
187
+ columns: Optional[list[str]] = None,
188
+ ) -> dict:
189
+ """
190
+ Send descriptive statistics to Gemini and receive per-column
191
+ anomaly filter thresholds.
192
+
193
+ Parameters
194
+ ----------
195
+ df : DataFrame with sensor measurements
196
+ columns : subset of columns to analyze; defaults to SENSOR_CONTEXT keys
197
+
198
+ Returns
199
+ -------
200
+ dict mapping column_name → {lower_bound, upper_bound,
201
+ zscore_threshold, iqr_multiplier, rationale}
202
+ """
203
+ target_cols = [
204
+ c for c in (columns or list(SENSOR_CONTEXT.keys())) if c in df.columns
205
+ ]
206
+ if not target_cols:
207
+ raise ValueError("No recognized sensor columns found in DataFrame.")
208
+
209
+ stats = df[target_cols].describe(percentiles=[0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99])
210
+
211
+ # Build prompt with stats + domain context
212
+ lines = [
213
+ "Analyze the following sensor columns from a vineyard dataset.",
214
+ "For each column, the physical context and expected range are provided.",
215
+ "",
216
+ ]
217
+ for col in target_cols:
218
+ ctx = SENSOR_CONTEXT.get(col, {})
219
+ lines.append(f"Column: {col}")
220
+ if ctx:
221
+ lines.append(f" Description : {ctx['description']} ({ctx['unit']})")
222
+ lines.append(f" Expected range : {ctx['physical_range']}")
223
+ lines.append(f" Domain notes : {ctx['notes']}")
224
+ lines.append(" Observed statistics:")
225
+ for stat_name, val in stats[col].items():
226
+ lines.append(f" {stat_name:10s}: {val:.4f}")
227
+ lines.append("")
228
+
229
+ user_prompt = "\n".join(lines)
230
+
231
+ # Check cache (same stats → same thresholds)
232
+ cache_key = self._hash_key(user_prompt)
233
+ if cache_key in self._threshold_cache:
234
+ self._log("Using cached anomaly thresholds (same data fingerprint).")
235
+ return self._threshold_cache[cache_key]
236
+
237
+ self._log("Querying Gemini for anomaly thresholds …")
238
+
239
+ try:
240
+ raw = self._call_gemini(_SYSTEM_PROMPT_CLEANING, user_prompt)
241
+ thresholds = _extract_json(raw)
242
+ except Exception as exc:
243
+ self._log(f"Gemini API error: {exc}. Using statistical fallback.")
244
+ thresholds = self._fallback_thresholds(df, target_cols)
245
+
246
+ self._threshold_cache[cache_key] = thresholds
247
+ self._log(f"Received thresholds for {len(thresholds)} columns.")
248
+ return thresholds
249
+
250
+ @staticmethod
251
+ def _fallback_thresholds(df: pd.DataFrame, cols: list[str]) -> dict:
252
+ """Conservative statistical fallback used when API is unavailable."""
253
+ result = {}
254
+ for col in cols:
255
+ ctx = SENSOR_CONTEXT.get(col, {})
256
+ phys = ctx.get("physical_range", [None, None])
257
+ result[col] = {
258
+ "lower_bound": phys[0],
259
+ "upper_bound": phys[1],
260
+ "zscore_threshold": 3.5,
261
+ "iqr_multiplier": 3.0,
262
+ "rationale": "Statistical fallback (Gemini unavailable).",
263
+ }
264
+ return result
265
+
266
+ # ------------------------------------------------------------------
267
+ # Step 2: Apply cleaning
268
+ # ------------------------------------------------------------------
269
+
270
+ def apply_cleaning(
271
+ self,
272
+ df: pd.DataFrame,
273
+ thresholds: dict,
274
+ strategy: str = "clip",
275
+ ) -> pd.DataFrame:
276
+ """
277
+ Apply Gemini-generated thresholds to clean the sensor DataFrame.
278
+
279
+ Parameters
280
+ ----------
281
+ df : raw sensor DataFrame
282
+ thresholds : dict from analyze_anomalies()
283
+ strategy : 'clip' — clamp values to [lower_bound, upper_bound]
284
+ 'drop' — drop rows where any column is out of bounds
285
+ 'nan' — replace out-of-bounds values with NaN
286
+
287
+ Returns
288
+ -------
289
+ Cleaned DataFrame (copy).
290
+ """
291
+ result = df.copy()
292
+ report_lines = ["Anomaly cleaning report:"]
293
+
294
+ for col, thresh in thresholds.items():
295
+ if col not in result.columns:
296
+ continue
297
+ series = result[col]
298
+ lower = thresh.get("lower_bound")
299
+ upper = thresh.get("upper_bound")
300
+
301
+ # Count violations before cleaning
302
+ mask_low = (series < lower) if lower is not None else pd.Series(False, index=series.index)
303
+ mask_high = (series > upper) if upper is not None else pd.Series(False, index=series.index)
304
+
305
+ # Z-score based detection (secondary flag)
306
+ z_thresh = thresh.get("zscore_threshold", 3.5)
307
+ z_scores = (series - series.mean()) / (series.std() + 1e-9)
308
+ mask_zscore = z_scores.abs() > z_thresh
309
+
310
+ # IQR-based detection (tertiary flag)
311
+ iqr_mult = thresh.get("iqr_multiplier", 3.0)
312
+ q1, q3 = series.quantile(0.25), series.quantile(0.75)
313
+ iqr = q3 - q1
314
+ mask_iqr = (series < q1 - iqr_mult * iqr) | (series > q3 + iqr_mult * iqr)
315
+
316
+ # Union of all anomaly flags
317
+ mask_anomaly = mask_low | mask_high | (mask_zscore & mask_iqr)
318
+ n_anomalies = int(mask_anomaly.sum())
319
+
320
+ if n_anomalies > 0:
321
+ report_lines.append(
322
+ f" {col}: {n_anomalies} anomalies ({n_anomalies / len(series) * 100:.2f}%)"
323
+ )
324
+
325
+ if strategy == "clip":
326
+ result[col] = series.clip(
327
+ lower=lower if lower is not None else -np.inf,
328
+ upper=upper if upper is not None else np.inf,
329
+ )
330
+ elif strategy == "nan":
331
+ result.loc[mask_anomaly, col] = np.nan
332
+ elif strategy == "drop":
333
+ result = result.loc[~mask_anomaly].copy()
334
+ else:
335
+ raise ValueError(f"Unknown strategy '{strategy}'. Use 'clip', 'nan', or 'drop'.")
336
+
337
+ self._log("\n".join(report_lines))
338
+ return result
339
+
340
+ # ------------------------------------------------------------------
341
+ # Step 3: Feature engineering
342
+ # ------------------------------------------------------------------
343
+
344
+ def get_feature_spec(
345
+ self,
346
+ available_cols: list[str],
347
+ ) -> dict:
348
+ """
349
+ Ask Gemini to confirm the Stress Risk Score formula given available columns.
350
+
351
+ Returns a feature spec dict with vpd_weight, cwsi_weight, etc.
352
+ Falls back to a biologically motivated default if API is unavailable.
353
+ """
354
+ has_cwsi = any("cwsi" in c.lower() or "CWSI" in c for c in available_cols)
355
+
356
+ # Cache key: just depends on whether CWSI is available
357
+ cache_key = f"cwsi={has_cwsi}"
358
+ if cache_key in self._feature_spec_cache:
359
+ self._log("Using cached feature spec.")
360
+ return self._feature_spec_cache[cache_key]
361
+
362
+ user_prompt = (
363
+ f"Available sensor columns: {available_cols}.\n"
364
+ f"CWSI column available: {has_cwsi}.\n"
365
+ "Propose weights and clip bounds for a Stress Risk Score that linearly "
366
+ "combines normalised VPD and (if available) normalised CWSI. "
367
+ "The score should be in [0, 1] and reflect acute heat/drought stress "
368
+ "for Semillon grapevine in a desert agrivoltaic system."
369
+ )
370
+ self._log("Querying Gemini for Stress Risk Score formula …")
371
+ try:
372
+ raw = self._call_gemini(_SYSTEM_PROMPT_FEATURES, user_prompt)
373
+ spec = _extract_json(raw).get("stress_risk_score", {})
374
+ except Exception as exc:
375
+ self._log(f"Gemini API error: {exc}. Using default feature spec.")
376
+ spec = {}
377
+
378
+ # Merge with defaults so the dict is always complete
379
+ defaults = {
380
+ "formula_description": "Normalised weighted sum of VPD and CWSI stress signals",
381
+ "vpd_weight": 0.6,
382
+ "cwsi_weight": 0.4,
383
+ "vpd_clip_max": 6.0,
384
+ "cwsi_clip_max": 1.0,
385
+ "rationale": (
386
+ "VPD dominates stomatal response (weight 0.6); "
387
+ "CWSI captures cumulative water status (weight 0.4)."
388
+ ),
389
+ }
390
+ for k, v in defaults.items():
391
+ spec.setdefault(k, v)
392
+
393
+ self._feature_spec_cache[cache_key] = spec
394
+ return spec
395
+
396
+ def engineer_features(
397
+ self,
398
+ df: pd.DataFrame,
399
+ timestamp_col: str = "time",
400
+ cwsi_col: Optional[str] = None,
401
+ vpd_col: str = "Air1_VPD_ref",
402
+ feature_spec: Optional[dict] = None,
403
+ ) -> pd.DataFrame:
404
+ """
405
+ Add engineered features to the sensor DataFrame.
406
+
407
+ New columns added
408
+ -----------------
409
+ hour_sin, hour_cos – cyclical encoding of hour-of-day
410
+ doy_sin, doy_cos – cyclical encoding of day-of-year
411
+ stress_risk_score – weighted VPD (+ CWSI) stress index in [0, 1]
412
+
413
+ Parameters
414
+ ----------
415
+ df : sensor DataFrame (original unmodified)
416
+ timestamp_col : name of the datetime column (or index if not a column)
417
+ cwsi_col : optional CWSI column name; if None, stress score uses VPD only
418
+ vpd_col : VPD column name
419
+ feature_spec : pre-fetched spec from get_feature_spec(); fetched if None
420
+
421
+ Returns
422
+ -------
423
+ DataFrame copy with additional feature columns.
424
+ """
425
+ result = df.copy()
426
+
427
+ # --- Cyclical time features (via shared utility) ---
428
+ ts_col = timestamp_col if timestamp_col in result.columns else None
429
+ use_index = ts_col is None and isinstance(result.index, pd.DatetimeIndex)
430
+ if ts_col is not None or use_index:
431
+ result = add_cyclical_time_features(
432
+ result,
433
+ timestamp_col=ts_col,
434
+ index_is_timestamp=use_index,
435
+ )
436
+ self._log("Added cyclical time features: hour_sin, hour_cos, doy_sin, doy_cos")
437
+ else:
438
+ self._log("Warning: no timestamp found; skipping cyclical features.")
439
+
440
+ # --- Stress Risk Score ---
441
+ if vpd_col in result.columns:
442
+ if feature_spec is None:
443
+ feature_spec = self.get_feature_spec(list(result.columns))
444
+
445
+ vpd_w = float(feature_spec.get("vpd_weight", 0.6))
446
+ cwsi_w = float(feature_spec.get("cwsi_weight", 0.4))
447
+ vpd_max = float(feature_spec.get("vpd_clip_max", 6.0))
448
+ cwsi_max = float(feature_spec.get("cwsi_clip_max", 1.0))
449
+
450
+ vpd_norm = (result[vpd_col].clip(0, vpd_max) / vpd_max).fillna(0.0)
451
+
452
+ if cwsi_col and cwsi_col in result.columns:
453
+ cwsi_norm = (result[cwsi_col].clip(0, cwsi_max) / cwsi_max).fillna(0.0)
454
+ effective_cwsi_w = cwsi_w
455
+ effective_vpd_w = vpd_w
456
+ else:
457
+ # No CWSI — redistribute weight entirely to VPD
458
+ cwsi_norm = pd.Series(0.0, index=result.index)
459
+ effective_cwsi_w = 0.0
460
+ effective_vpd_w = 1.0
461
+
462
+ score = (effective_vpd_w * vpd_norm + effective_cwsi_w * cwsi_norm).clip(0, 1)
463
+ result["stress_risk_score"] = score.round(4)
464
+
465
+ self._log(
466
+ f"Added stress_risk_score (vpd_weight={effective_vpd_w:.2f}, "
467
+ f"cwsi_weight={effective_cwsi_w:.2f})"
468
+ )
469
+ else:
470
+ self._log(f"Warning: VPD column '{vpd_col}' not found; skipping stress_risk_score.")
471
+
472
+ return result
473
+
474
+ # ------------------------------------------------------------------
475
+ # Full pipeline
476
+ # ------------------------------------------------------------------
477
+
478
+ def run_pipeline(
479
+ self,
480
+ df: pd.DataFrame,
481
+ cleaning_strategy: str = "clip",
482
+ timestamp_col: str = "time",
483
+ cwsi_col: Optional[str] = None,
484
+ vpd_col: str = "Air1_VPD_ref",
485
+ ) -> tuple[pd.DataFrame, dict, dict]:
486
+ """
487
+ Execute the full LLM data engineering pipeline.
488
+
489
+ Steps
490
+ -----
491
+ 1. Gemini analyzes column stats → anomaly thresholds
492
+ 2. Apply cleaning (clip / nan / drop)
493
+ 3. Gemini confirms feature spec → engineer features
494
+
495
+ Returns
496
+ -------
497
+ (df_engineered, thresholds, feature_spec)
498
+ """
499
+ self._log("=== LLM Data Engineering Pipeline ===")
500
+
501
+ # Step 1: anomaly thresholds
502
+ thresholds = self.analyze_anomalies(df)
503
+
504
+ # Step 2: clean
505
+ df_clean = self.apply_cleaning(df, thresholds, strategy=cleaning_strategy)
506
+
507
+ # Step 3: feature spec + engineering
508
+ feature_spec = self.get_feature_spec(list(df_clean.columns))
509
+ df_engineered = self.engineer_features(
510
+ df_clean,
511
+ timestamp_col=timestamp_col,
512
+ cwsi_col=cwsi_col,
513
+ vpd_col=vpd_col,
514
+ feature_spec=feature_spec,
515
+ )
516
+
517
+ new_cols = [c for c in df_engineered.columns if c not in df.columns]
518
+ self._log(f"Pipeline complete. New columns: {new_cols}")
519
+ return df_engineered, thresholds, feature_spec
520
+
521
+
522
+ # ---------------------------------------------------------------------------
523
+ # CLI entry point
524
+ # ---------------------------------------------------------------------------
525
+
526
+ if __name__ == "__main__":
527
+ from pathlib import Path
528
+
529
+ DATA_DIR = Path(__file__).resolve().parent.parent / "Data"
530
+ sample_path = DATA_DIR / "Seymour" / "sensors_wide_sample.csv"
531
+ sensors_path = DATA_DIR / "Seymour" / "sensors_wide.csv"
532
+ csv_path = sample_path if sample_path.exists() else sensors_path
533
+
534
+ print(f"Loading sensor data from: {csv_path.name}")
535
+ df_raw = pd.read_csv(csv_path)
536
+ print(f"Shape: {df_raw.shape} | Columns: {list(df_raw.columns)}\n")
537
+
538
+ engineer = LLMDataEngineer(verbose=True)
539
+ df_out, thresh, feat_spec = engineer.run_pipeline(df_raw)
540
+
541
+ print("\n--- Anomaly Thresholds (from Gemini) ---")
542
+ for col, t in thresh.items():
543
+ print(
544
+ f" {col:35s} lower={t.get('lower_bound')} "
545
+ f"upper={t.get('upper_bound')} "
546
+ f"z={t.get('zscore_threshold')} "
547
+ f"IQR×{t.get('iqr_multiplier')}"
548
+ )
549
+ print(f" → {t.get('rationale', '')}")
550
+
551
+ print("\n--- Stress Risk Score Spec (from Gemini) ---")
552
+ for k, v in feat_spec.items():
553
+ print(f" {k}: {v}")
554
+
555
+ print("\n--- Engineered DataFrame Head ---")
556
+ eng_cols = ["time", "Air1_PAR_ref", "Air1_VPD_ref",
557
+ "hour_sin", "hour_cos", "doy_sin", "doy_cos", "stress_risk_score"]
558
+ show = [c for c in eng_cols if c in df_out.columns]
559
+ print(df_out[show].head(6).to_string(index=False))
src/chatbot/routing_agent.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RoutingAgent: Gemini-based intelligent model routing for the agrivoltaic
3
+ control system. Given real-time telemetry, routes to either the FvCB
4
+ mechanistic model or the ML ensemble for photosynthesis prediction.
5
+
6
+ Uses gemini-2.5-flash for low-latency (~100ms) routing decisions.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Optional
12
+
13
+ from src.genai_utils import get_genai_client, get_google_api_key
14
+
15
+ SYSTEM_PROMPT = (
16
+ "You are a model routing supervisor for an agrivoltaic vineyard control system. "
17
+ "Given real-time telemetry, decide which photosynthesis model to use:\n"
18
+ "- MODEL_A (FvCB mechanistic): accurate under standard conditions (T<30C, low stress)\n"
19
+ "- MODEL_B (ML ensemble): handles non-linear stress, high VPD, extreme heat\n"
20
+ "Reply with ONLY 'MODEL_A' or 'MODEL_B'."
21
+ )
22
+
23
+
24
+ class RoutingAgent:
25
+ """Model router for FvCB vs ML ensemble selection.
26
+
27
+ Uses deterministic rules first (covers >90% of cases without any API call).
28
+ Falls back to Gemini only for ambiguous transition-zone conditions.
29
+ """
30
+
31
+ # Thresholds for rule-based routing (avoids API calls)
32
+ _TEMP_CLEAR_FVCB = 28.0 # clearly FvCB territory
33
+ _TEMP_CLEAR_ML = 32.0 # clearly ML territory
34
+ _VPD_CLEAR_ML = 2.5 # high VPD → ML
35
+ _CWSI_CLEAR_ML = 0.4 # water stress → ML
36
+
37
+ def __init__(
38
+ self,
39
+ model_name: str = "gemini-2.5-flash",
40
+ api_key: Optional[str] = None,
41
+ ):
42
+ self.model_name = model_name
43
+ self._api_key = api_key
44
+ self._client = None
45
+
46
+ @property
47
+ def api_key(self) -> str:
48
+ return get_google_api_key(self._api_key)
49
+
50
+ @property
51
+ def client(self):
52
+ """Lazy-init the Gemini client."""
53
+ if self._client is None:
54
+ self._client = get_genai_client(self._api_key)
55
+ return self._client
56
+
57
+ # ------------------------------------------------------------------
58
+ # Rule-based fast path (no API call)
59
+ # ------------------------------------------------------------------
60
+
61
+ @classmethod
62
+ def _rule_based_route(cls, telemetry: dict) -> Optional[str]:
63
+ """Return 'fvcb' or 'ml' if rules are decisive, else None."""
64
+ temp = telemetry.get("temp_c")
65
+ vpd = telemetry.get("vpd")
66
+ cwsi = telemetry.get("cwsi")
67
+
68
+ # High stress signals → ML (no ambiguity)
69
+ if temp is not None and temp >= cls._TEMP_CLEAR_ML:
70
+ return "ml"
71
+ if vpd is not None and vpd >= cls._VPD_CLEAR_ML:
72
+ return "ml"
73
+ if cwsi is not None and cwsi >= cls._CWSI_CLEAR_ML:
74
+ return "ml"
75
+
76
+ # Clearly cool/calm → FvCB
77
+ if temp is not None and temp < cls._TEMP_CLEAR_FVCB:
78
+ if vpd is None or vpd < cls._VPD_CLEAR_ML:
79
+ if cwsi is None or cwsi < cls._CWSI_CLEAR_ML:
80
+ return "fvcb"
81
+
82
+ return None # transition zone — need LLM
83
+
84
+ # ------------------------------------------------------------------
85
+ # Gemini routing (only for ambiguous cases)
86
+ # ------------------------------------------------------------------
87
+
88
+ @staticmethod
89
+ def _format_telemetry(telemetry: dict) -> str:
90
+ """Format telemetry dict into a readable prompt string."""
91
+ lines = ["Current telemetry:"]
92
+ field_labels = {
93
+ "temp_c": "Air temperature",
94
+ "ghi_w_m2": "GHI (irradiance)",
95
+ "cwsi": "CWSI (crop water stress)",
96
+ "vpd": "VPD (vapor pressure deficit)",
97
+ "wind_speed_ms": "Wind speed",
98
+ "hour": "Hour of day",
99
+ }
100
+ for key, label in field_labels.items():
101
+ if key in telemetry:
102
+ val = telemetry[key]
103
+ lines.append(f" {label}: {val}")
104
+ return "\n".join(lines)
105
+
106
+ @staticmethod
107
+ def _parse_response(text: str) -> str:
108
+ """Extract model choice from Gemini response.
109
+
110
+ Returns 'fvcb' or 'ml'. Falls back to 'fvcb' on ambiguous response.
111
+ """
112
+ text_upper = text.strip().upper()
113
+ if "MODEL_B" in text_upper:
114
+ return "ml"
115
+ return "fvcb"
116
+
117
+ def route(self, telemetry: dict) -> str:
118
+ """Route a single telemetry reading to fvcb or ml.
119
+
120
+ Uses deterministic rules first; only calls Gemini for ambiguous cases.
121
+
122
+ Parameters
123
+ ----------
124
+ telemetry : dict with keys like temp_c, ghi_w_m2, cwsi, vpd,
125
+ wind_speed_ms, hour
126
+
127
+ Returns
128
+ -------
129
+ 'fvcb' or 'ml'
130
+ """
131
+ # Fast path: rule-based (no API call)
132
+ rule_result = self._rule_based_route(telemetry)
133
+ if rule_result is not None:
134
+ return rule_result
135
+
136
+ # Slow path: Gemini for transition-zone ambiguity
137
+ prompt = self._format_telemetry(telemetry)
138
+ try:
139
+ response = self.client.models.generate_content(
140
+ model=self.model_name,
141
+ contents=prompt,
142
+ config={"system_instruction": SYSTEM_PROMPT},
143
+ )
144
+ return self._parse_response(response.text)
145
+ except Exception as e:
146
+ print(f"RoutingAgent: API error ({e}), falling back to fvcb")
147
+ return "fvcb"
148
+
149
+ def route_batch(self, telemetry_rows: list[dict]) -> list[str]:
150
+ """Route a batch of telemetry readings.
151
+
152
+ Uses rule-based routing where possible; batches remaining ambiguous
153
+ rows into a single Gemini call.
154
+ """
155
+ results = [None] * len(telemetry_rows)
156
+ ambiguous_indices = []
157
+
158
+ # First pass: rule-based
159
+ for i, row in enumerate(telemetry_rows):
160
+ rule_result = self._rule_based_route(row)
161
+ if rule_result is not None:
162
+ results[i] = rule_result
163
+ else:
164
+ ambiguous_indices.append(i)
165
+
166
+ # Second pass: single batched Gemini call for ambiguous rows
167
+ if ambiguous_indices:
168
+ lines = [
169
+ "Route each of the following telemetry readings to MODEL_A or MODEL_B.",
170
+ "Reply with one line per reading: '<index>: MODEL_A' or '<index>: MODEL_B'.",
171
+ "",
172
+ ]
173
+ for idx in ambiguous_indices:
174
+ lines.append(f"Reading {idx}: {self._format_telemetry(telemetry_rows[idx])}")
175
+ lines.append("")
176
+
177
+ try:
178
+ response = self.client.models.generate_content(
179
+ model=self.model_name,
180
+ contents="\n".join(lines),
181
+ config={"system_instruction": SYSTEM_PROMPT},
182
+ )
183
+ resp_text = response.text.upper()
184
+ for idx in ambiguous_indices:
185
+ # Look for this index's answer in the response
186
+ if f"{idx}: MODEL_B" in resp_text or f"{idx}:MODEL_B" in resp_text:
187
+ results[idx] = "ml"
188
+ else:
189
+ results[idx] = "fvcb"
190
+ except Exception as e:
191
+ print(f"RoutingAgent: batch API error ({e}), falling back to fvcb")
192
+ for idx in ambiguous_indices:
193
+ results[idx] = "fvcb"
194
+
195
+ return results
196
+
197
+
198
+ # ----------------------------------------------------------------------
199
+ # CLI entry point
200
+ # ----------------------------------------------------------------------
201
+
202
+ if __name__ == "__main__":
203
+ sample_scenarios = [
204
+ {
205
+ "name": "Cool morning",
206
+ "telemetry": {
207
+ "temp_c": 22.0, "ghi_w_m2": 350.0, "cwsi": 0.15,
208
+ "vpd": 0.8, "wind_speed_ms": 2.0, "hour": 8,
209
+ },
210
+ },
211
+ {
212
+ "name": "Hot afternoon, high stress",
213
+ "telemetry": {
214
+ "temp_c": 38.0, "ghi_w_m2": 950.0, "cwsi": 0.72,
215
+ "vpd": 3.5, "wind_speed_ms": 1.0, "hour": 14,
216
+ },
217
+ },
218
+ {
219
+ "name": "Moderate conditions",
220
+ "telemetry": {
221
+ "temp_c": 29.5, "ghi_w_m2": 680.0, "cwsi": 0.35,
222
+ "vpd": 1.8, "wind_speed_ms": 3.0, "hour": 11,
223
+ },
224
+ },
225
+ ]
226
+
227
+ agent = RoutingAgent()
228
+ print("Gemini Routing Agent — Sample Scenarios\n")
229
+
230
+ for scenario in sample_scenarios:
231
+ choice = agent.route(scenario["telemetry"])
232
+ model_label = "FvCB (mechanistic)" if choice == "fvcb" else "ML ensemble"
233
+ print(f" {scenario['name']:30s} → {choice:4s} ({model_label})")
src/chatbot/vineyard_chatbot.py ADDED
@@ -0,0 +1,939 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ VineyardChatbot: Gemini-powered conversational advisor for the SolarWine
3
+ agrivoltaic system.
4
+
5
+ Provides a natural-language interface for farmers to ask about shading
6
+ decisions, photosynthesis, weather conditions, vine biology, and energy
7
+ generation. Uses a DataHub of loosely-coupled service providers for all
8
+ data access — the chatbot never imports data clients directly.
9
+
10
+ Anti-hallucination guardrails (v2):
11
+ - Structured responses with confidence, sources, and caveats
12
+ - Mandatory tool grounding for data questions
13
+ - Post-response rule validation
14
+ - Source-tagged tool results
15
+ - Confidence estimation based on data freshness
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ import re
22
+ import traceback
23
+ from dataclasses import dataclass, field
24
+ from typing import Optional
25
+
26
+ from src.data_providers import DataHub
27
+ from src.genai_utils import extract_json_object, get_genai_client, get_google_api_key
28
+ from src.chatbot.guardrails import (
29
+ classify_query,
30
+ estimate_confidence,
31
+ get_source_label,
32
+ tag_tool_result,
33
+ validate_response,
34
+ )
35
+
36
+
37
+ def _extract_json(text: str) -> dict:
38
+ """Thin wrapper around the shared genai_utils implementation."""
39
+ return extract_json_object(text)
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # Data structures
44
+ # ---------------------------------------------------------------------------
45
+
46
+ @dataclass
47
+ class ChatResponse:
48
+ """Structured response from the chatbot with grounding metadata."""
49
+ message: str
50
+ tool_calls: list[dict] = field(default_factory=list)
51
+ data: dict = field(default_factory=dict)
52
+ # --- Grounding metadata (v2) ---
53
+ confidence: str = "low" # high / medium / low / insufficient_data
54
+ sources: list[str] = field(default_factory=list)
55
+ caveats: list[str] = field(default_factory=list)
56
+ rule_violations: list[dict] = field(default_factory=list)
57
+ # --- Dual-channel advisory (v3) ---
58
+ response_mode: str = "info" # "info" (factual) or "advisory" (recommendation)
59
+
60
+
61
+ # ---------------------------------------------------------------------------
62
+ # Biology rules lookup (shared knowledge base)
63
+ # ---------------------------------------------------------------------------
64
+
65
+ BIOLOGY_RULES = {
66
+ "site_location": (
67
+ "The vineyard site is in Yeruham, Israel (Seymour experimental plot). "
68
+ "Weather data is from IMS station 43 (Sde Boker, Negev). Timezone is always "
69
+ "Asia/Jerusalem (Israel Standard Time / Israel Daylight Time). All timestamps "
70
+ "from tools (get_current_weather, get_vine_state, etc.) are in Israel local time. "
71
+ "When the user asks about 'right now' or 'current' conditions, interpret the "
72
+ "time in the tool result as Israel local time (e.g. 15:16 = afternoon in Yeruham)."
73
+ ),
74
+ "temperature_transition": (
75
+ "Below 30\u00b0C, Semillon photosynthesis is RuBP-limited (light is the "
76
+ "bottleneck \u2014 shading HURTS). Above 30\u00b0C, it becomes Rubisco-limited "
77
+ "(heat is the bottleneck \u2014 shading MAY help). The transition is gradual "
78
+ "(28\u201332\u00b0C)."
79
+ ),
80
+ "no_shade_before_10": (
81
+ "Morning light is critical for carbon fixation. Never shade before "
82
+ "10:00 regardless of temperature."
83
+ ),
84
+ "no_shade_in_may": (
85
+ "May is the flowering/fruit-set period. Yield protection has priority: "
86
+ "avoid shading in May under normal conditions because even small losses "
87
+ "can reduce cluster number and berry set. Only introduce shade in May "
88
+ "as a last resort in extreme heat to prevent serious damage (e.g. "
89
+ "severe sunburn or lethal stress)."
90
+ ),
91
+ "cwsi_threshold": (
92
+ "Crop Water Stress Index > 0.4 indicates real water stress. Below 0.4, "
93
+ "the vine is coping adequately."
94
+ ),
95
+ "berry_sunburn": (
96
+ "Direct exposure at air temperature > 35\u00b0C risks berry sunburn, "
97
+ "especially on the southwest-facing side of clusters in the afternoon."
98
+ ),
99
+ "energy_budget": (
100
+ "Primary objective is to maximise annual PV energy. The vines have a "
101
+ "limited \"protection budget\": up to 5% annual energy sacrifice for "
102
+ "shading that clearly protects vine health or yield. Suggested monthly "
103
+ "caps: May=0%, Jun=15%, Jul=30%, Aug=30%, Sep=20%, Oct=5%. Stay below "
104
+ "these caps unless there is an exceptional agronomic reason."
105
+ ),
106
+ "model_routing": (
107
+ "Use FvCB (Farquhar model) for standard conditions (T < 30\u00b0C, "
108
+ "VPD < 2.5 kPa, adequate water). Use ML ensemble for stress conditions "
109
+ "(T > 30\u00b0C, high VPD, water stress, or any non-linear regime)."
110
+ ),
111
+ "phenological_multiplier": (
112
+ "Stress during veraison (berry ripening) is 1.5x more damaging than "
113
+ "during vegetative growth. Protect veraison at higher cost."
114
+ ),
115
+ "irrigation_management": (
116
+ "Aim to keep soil moisture in a comfortable band for Semillon: avoid "
117
+ "both chronic dryness and chronic saturation. During vegetative growth "
118
+ "allow gentle dry-down between irrigations; during flowering and "
119
+ "veraison, avoid strong swings. Use CWSI and VPD together: if CWSI "
120
+ "stays > 0.4 and VPD is high for several hours, consider an irrigation "
121
+ "event unless the soil is already wet."
122
+ ),
123
+ "fertiliser_management": (
124
+ "Prioritise balanced nutrition over aggressive fertiliser use. Apply "
125
+ "most nitrogen early in the season (budburst to pre-flowering), reduce "
126
+ "near veraison to avoid excessive vigour and delayed ripening. Use "
127
+ "leaf tissue tests and visual cues; avoid fertilising stressed vines "
128
+ "during acute heat or drought events."
129
+ ),
130
+ "photosynthesis_3d": (
131
+ "The 3D viewer shows the vine canopy, solar tracker panel and sun position, "
132
+ "with each zone coloured by photosynthesis rate (green = rate). Connect a "
133
+ "Google API key to use the Vineyard Advisor and generate the interactive "
134
+ "3D scene from the chat (e.g. \"Show me the 3D vine and photosynthesis\")."
135
+ ),
136
+ "no_leaves_no_shade_problem": (
137
+ "When there are no leaves (dormant season, before budburst, or canopy not "
138
+ "yet developed), there is no problem with shading \u2014 the vine is not "
139
+ "photosynthesising, so shading does not harm it. Do not frame the answer as "
140
+ "\"you should not shade\" as if shading would be bad; instead say that "
141
+ "shading is irrelevant right now (no leaves to protect), and panel position "
142
+ "can favour energy. In the Negev, dormancy is roughly October\u2013March; budburst "
143
+ "is typically March\u2013April."
144
+ ),
145
+ "no_shading_must_explain": (
146
+ "When recommending that the farmer should NOT shade (or that shading is not "
147
+ "needed), always give a specific reason tied to photosynthesis or need. "
148
+ "Examples: (1) No leaves / dormant \u2014 no photosynthesis to protect, so shading "
149
+ "is irrelevant. (2) Full sun is beneficial \u2014 vine is light-limited (T < 30\u00b0C), "
150
+ "so shading would reduce photosynthesis; keep panels tracking. (3) No "
151
+ "radiation (night or GHI = 0) \u2014 nothing to manage; no shading decision needed. "
152
+ "Never say only \"you should not shade\" without explaining the underlying "
153
+ "reason (no need for PS protection, or need for full light for PS, etc.)."
154
+ ),
155
+ }
156
+
157
+
158
+ # ---------------------------------------------------------------------------
159
+ # System prompt
160
+ # ---------------------------------------------------------------------------
161
+
162
+ _SYSTEM_PROMPT_TEMPLATE = """\
163
+ You are a friendly vineyard advisor for the SolarWine agrivoltaic system. \
164
+ Site: Yeruham, Israel (Seymour plot, Negev). Weather: IMS station 43 (Sde Boker). \
165
+ Timezone: Asia/Jerusalem — all tool timestamps are Israel local time; interpret \
166
+ "now" and "current" using that timezone (e.g. 15:16 = afternoon in Yeruham). \
167
+ You help the farmer decide when and how much to shade their Semillon grapevines \
168
+ (VSP trellis, 1.2 m canopy) under single-axis solar trackers (1.13 m panel at \
169
+ 2.05 m height, 3.0 m row spacing).
170
+
171
+ CONTROL OBJECTIVE:
172
+ - Primary goal: maximise annual PV energy production.
173
+ - Secondary goal: protect vines from heat, water stress, and sunburn using a \
174
+ limited shading budget (see energy_budget rule).
175
+ - When in doubt and there is no clear sign of dangerous stress, prefer \
176
+ keeping panels in their energy-maximising position.
177
+
178
+ CALENDAR & STAGE HANDLING:
179
+ - Do NOT guess the current calendar month. If the user does not supply a \
180
+ date and you do not have a phenology tool result, talk in terms of stages \
181
+ (budburst, flowering, veraison, etc.) rather than asserting a specific month.
182
+
183
+ COMMUNICATION STYLE:
184
+ - Use plain language; explain jargon when you first use it
185
+ - Be concise but thorough
186
+ - Always explain WHY a recommendation makes sense biologically
187
+ - When uncertain, say so and suggest what data would help
188
+
189
+ BIOLOGICAL GUIDELINES (strong constraints; balance them with the energy objective):
190
+
191
+ {biology_rules}
192
+
193
+ TOOLS AVAILABLE:
194
+ You can call tools by including a JSON block in your response with this format:
195
+ {{"tool_call": {{"name": "<tool_name>", "args": {{<arguments>}}}}}}
196
+
197
+ Available tools:
198
+
199
+ WEATHER & ENVIRONMENT:
200
+ - get_current_weather: No args. Returns latest IMS weather readings plus \
201
+ current_time_israel, current_date_israel, current_datetime_israel (the real \
202
+ "now" in Yeruham). Use these for "right now" answers; timestamp_local is \
203
+ when the weather was recorded (may be stale — check age_minutes).
204
+ - get_weather_history: Args: start_date (str YYYY-MM-DD), end_date (str \
205
+ YYYY-MM-DD). Returns hourly IMS weather summary for a date range.
206
+
207
+ VINE SENSORS (ThingsBoard):
208
+ - get_vine_state: No args. Returns the latest on-site sensor readings from \
209
+ ThingsBoard (soil moisture, leaf temperature, fruiting-zone PAR, irrigation \
210
+ status, panel surface temps) comparing TREATMENT area (rows 501-502, under \
211
+ panels) vs REFERENCE area (rows 503-504, open sky). Use when the user asks \
212
+ about current vine conditions, stress levels, soil moisture, or irrigation.
213
+ - get_sensor_history: Args: device_type (str: air/crop/soil), area (str: \
214
+ treatment/reference/ambient), hours_back (int, default 24). Returns hourly \
215
+ averages from ThingsBoard time-series data.
216
+
217
+ PHOTOSYNTHESIS:
218
+ - calc_photosynthesis: Args: PAR (float), Tleaf (float), CO2 (float), \
219
+ VPD (float), Tair (float). Returns net assimilation A and limiting factor \
220
+ using the mechanistic Farquhar (FvCB) model.
221
+ - predict_photosynthesis_ml: Args: features (dict, optional). Returns ML \
222
+ ensemble prediction of A. If features not provided, auto-fills from latest \
223
+ IMS cache. Use when conditions are stressful (T>30C, high VPD).
224
+ - get_ps_forecast: Args: date (str YYYY-MM-DD, optional). Returns 24-hour \
225
+ predicted A profile (hourly) using time-series forecasting.
226
+
227
+ SHADING & TRACKING:
228
+ - simulate_shading: Args: angle_offset (float, degrees), hour (int 0-23), \
229
+ date (str YYYY-MM-DD, optional). Returns A comparison shaded vs unshaded.
230
+ - compare_tilt_angles: Args: angles (list of ints, optional). Returns A \
231
+ and energy at different tilt offsets.
232
+ - get_daily_schedule: Args: stress_threshold (float, optional), \
233
+ shade_angle (int, optional). Returns hourly shading schedule.
234
+
235
+ ENERGY:
236
+ - get_energy_generation: No args. Returns latest energy generation data \
237
+ from ThingsBoard (today kWh, current power W).
238
+ - get_energy_history: Args: hours_back (int, default 24). Returns energy \
239
+ generation time-series.
240
+ - predict_energy: Args: date (str YYYY-MM-DD, optional). Returns predicted \
241
+ daily energy generation (kWh) based on IMS GHI forecast and panel geometry.
242
+
243
+ ADVISORY:
244
+ - run_day_ahead_advisory: Args: date (str YYYY-MM-DD, optional). Returns \
245
+ full stress advisory from the DayAheadAdvisor.
246
+
247
+ VISUALIZATION:
248
+ - get_photosynthesis_3d: Args: hour (int 0-23, optional), date (str YYYY-MM-DD, \
249
+ optional). Returns a 3D interactive scene showing the vine, solar tracker, sun, \
250
+ and which parts of the canopy are doing how much photosynthesis (green = rate). \
251
+ Use when the user asks to see a 3D view, visualize photosynthesis, or show vine \
252
+ and tracker together.
253
+
254
+ BIOLOGY:
255
+ - explain_biology_rule: Args: rule_name (str). Returns detailed explanation. \
256
+ Valid names: {rule_names}.
257
+
258
+ RESPONSE RULES:
259
+ - CRITICAL: When the user asks about current conditions, specific numbers, \
260
+ predictions, sensor readings, or any site-specific data, you MUST call a \
261
+ tool. NEVER answer data questions from your training knowledge — always \
262
+ use a tool to get real data.
263
+ - When quoting numbers from tool results, cite the data source and timestamp. \
264
+ Example: "According to IMS Station 43 (recorded 14:30), the temperature is 28°C."
265
+ - If tool data is older than 60 minutes, warn: "Note: this data is X minutes old."
266
+ - After receiving tool results, explain them in plain language.
267
+ - When the answer is "no shading" or "shading not needed", always state the \
268
+ specific reason (no leaves / dormant; light-limited so full sun helps PS; or \
269
+ no radiation). See no_shading_must_explain and no_leaves_no_shade_problem.
270
+ - If the user suggests something that violates a biology rule, refuse clearly \
271
+ and explain which rule and why.
272
+ - If a tool returns an error or some data is missing, say clearly what data \
273
+ is unavailable. Do NOT invent or estimate values — say "I don't have current \
274
+ data for X" and explain what you can still answer from biology rules.
275
+ - If no API key is available, you can still answer biology questions from \
276
+ your built-in knowledge.
277
+ - NEVER invent sensor readings, temperatures, or measurements. If you don't \
278
+ have data, say so.
279
+ """
280
+
281
+
282
+ # ---------------------------------------------------------------------------
283
+ # Build system prompt from BIOLOGY_RULES to avoid drift
284
+ # ---------------------------------------------------------------------------
285
+
286
+ def _build_system_prompt() -> str:
287
+ """Build the system prompt, embedding biology rules from the shared dict."""
288
+ rules_text = "\n\n".join(
289
+ f"{i}. {name.upper().replace('_', ' ')}: {text}"
290
+ for i, (name, text) in enumerate(BIOLOGY_RULES.items(), 1)
291
+ )
292
+ rule_names = ", ".join(BIOLOGY_RULES.keys())
293
+ return _SYSTEM_PROMPT_TEMPLATE.format(
294
+ biology_rules=rules_text, rule_names=rule_names,
295
+ )
296
+
297
+
298
+ CHATBOT_SYSTEM_PROMPT = _build_system_prompt()
299
+
300
+ # RAG-style rule retrieval: keyword index for selecting relevant rules per query
301
+ _RULE_KEYWORDS = {
302
+ "site_location": ["yeruham", "location", "timezone", "israel", "sde boker", "negev",
303
+ "where", "site", "local time"],
304
+ "temperature_transition": ["temperature", "30", "rubp", "rubisco", "transition",
305
+ "heat", "hot", "cold", "cool", "warm"],
306
+ "no_shade_before_10": ["morning", "before 10", "early", "sunrise", "dawn"],
307
+ "no_shade_in_may": ["may", "flowering", "fruit set", "spring"],
308
+ "cwsi_threshold": ["cwsi", "water stress", "crop water", "drought"],
309
+ "berry_sunburn": ["sunburn", "berry", "35", "cluster", "grape"],
310
+ "energy_budget": ["budget", "energy", "sacrifice", "ceiling", "5%", "kwh",
311
+ "solar", "power", "generation"],
312
+ "model_routing": ["model", "fvcb", "farquhar", "ml", "routing", "predict"],
313
+ "phenological_multiplier": ["veraison", "ripening", "phenol", "stage"],
314
+ "irrigation_management": ["irrigation", "water", "soil", "moisture", "irrigate"],
315
+ "fertiliser_management": ["fertiliser", "fertilizer", "nitrogen", "nutrient"],
316
+ "photosynthesis_3d": ["3d", "visual", "scene", "show"],
317
+ "no_leaves_no_shade_problem": ["no leaves", "dormant", "budburst", "winter"],
318
+ "no_shading_must_explain": ["should not shade", "no shading", "don't shade",
319
+ "why not shade"],
320
+ }
321
+
322
+ # Rules that are always included (core constraints)
323
+ _PINNED_RULES = {"no_shade_before_10", "energy_budget", "temperature_transition"}
324
+
325
+
326
+ def retrieve_relevant_rules(query: str, max_rules: int = 5) -> list[str]:
327
+ """Retrieve the most relevant biology rules for a query.
328
+
329
+ Returns up to ``max_rules`` rule names, always including pinned rules.
330
+ Uses keyword matching (no vector DB needed for 13 rules).
331
+ """
332
+ query_lower = query.lower()
333
+ scores: dict[str, int] = {}
334
+
335
+ for rule_name, keywords in _RULE_KEYWORDS.items():
336
+ score = sum(1 for kw in keywords if kw in query_lower)
337
+ if score > 0:
338
+ scores[rule_name] = score
339
+
340
+ # Always include pinned rules
341
+ selected = set(_PINNED_RULES)
342
+ # Add scored rules sorted by relevance
343
+ for name, _ in sorted(scores.items(), key=lambda x: -x[1]):
344
+ if len(selected) >= max_rules:
345
+ break
346
+ selected.add(name)
347
+
348
+ # If we still have room, add remaining pinned rules
349
+ return [r for r in BIOLOGY_RULES if r in selected]
350
+
351
+
352
+ _ADVISORY_PATTERNS = [re.compile(p, re.IGNORECASE) for p in [
353
+ r"\bshould i\b", r"\bwhat should\b", r"\brecommend\b", r"\badvice\b",
354
+ r"\bwhat do i\b", r"\baction\b", r"\bwhat to do\b", r"\bshade now\b",
355
+ r"\birrigate\b", r"\bprepare\b", r"\bneed to\b", r"\bhow much\b",
356
+ r"\bwhen should\b", r"\bcan i\b",
357
+ ]]
358
+
359
+
360
+ def classify_response_mode(query: str) -> str:
361
+ """Classify whether a query needs factual info or actionable advisory.
362
+
363
+ Returns 'info' or 'advisory'.
364
+ """
365
+ for pat in _ADVISORY_PATTERNS:
366
+ if pat.search(query):
367
+ return "advisory"
368
+ return "info"
369
+
370
+
371
+ def build_contextual_prompt(query: str) -> str:
372
+ """Build a system prompt with only relevant biology rules for this query."""
373
+ relevant = retrieve_relevant_rules(query)
374
+ rules_text = "\n\n".join(
375
+ f"{i}. {name.upper().replace('_', ' ')}: {BIOLOGY_RULES[name]}"
376
+ for i, name in enumerate(relevant, 1)
377
+ )
378
+ rule_names = ", ".join(BIOLOGY_RULES.keys())
379
+ return _SYSTEM_PROMPT_TEMPLATE.format(
380
+ biology_rules=rules_text, rule_names=rule_names,
381
+ )
382
+
383
+
384
+ # ---------------------------------------------------------------------------
385
+ # Main class
386
+ # ---------------------------------------------------------------------------
387
+
388
+ class VineyardChatbot:
389
+ """
390
+ Gemini-powered conversational vineyard advisor.
391
+
392
+ All data access is delegated to a DataHub of loosely-coupled services.
393
+ The chatbot itself only handles:
394
+ - Gemini communication (two-pass tool-calling flow)
395
+ - Tool dispatch (thin delegation to hub services)
396
+ - Guardrails (query classification, response validation, confidence)
397
+ - Offline fallback (keyword-match to biology rules)
398
+
399
+ Usage
400
+ -----
401
+ bot = VineyardChatbot() # default hub
402
+ bot = VineyardChatbot(hub=custom_hub) # injected hub
403
+ response = bot.chat("Should I shade right now?", history=[])
404
+ """
405
+
406
+ # Maximum retries when LLM fails to call a required tool
407
+ _MAX_TOOL_RETRIES = 1
408
+
409
+ def __init__(
410
+ self,
411
+ hub: Optional[DataHub] = None,
412
+ model_name: str = "gemini-2.5-flash",
413
+ api_key: Optional[str] = None,
414
+ verbose: bool = False,
415
+ ):
416
+ self.hub = hub or DataHub.default(verbose=verbose)
417
+ self.model_name = model_name
418
+ self._api_key = api_key
419
+ self._client = None
420
+ self.verbose = verbose
421
+
422
+ # ------------------------------------------------------------------
423
+ # Gemini client (lazy)
424
+ # ------------------------------------------------------------------
425
+
426
+ @property
427
+ def api_key(self) -> str:
428
+ return get_google_api_key(self._api_key)
429
+
430
+ @property
431
+ def client(self):
432
+ if self._client is None:
433
+ self._client = get_genai_client(self._api_key)
434
+ return self._client
435
+
436
+ @property
437
+ def has_api_key(self) -> bool:
438
+ try:
439
+ get_google_api_key(self._api_key)
440
+ return True
441
+ except (ValueError, Exception):
442
+ return False
443
+
444
+ def _log(self, msg: str) -> None:
445
+ if self.verbose:
446
+ print(f"[VineyardChatbot] {msg}")
447
+
448
+ # ------------------------------------------------------------------
449
+ # Tool dispatch — thin delegation to hub services
450
+ # ------------------------------------------------------------------
451
+
452
+ def _dispatch_tool(self, tool_name: str, args: dict) -> dict:
453
+ """Route a tool call to the correct hub service method."""
454
+ self._log(f"Dispatching tool: {tool_name}({args})")
455
+
456
+ # --- Weather ---
457
+ if tool_name == "get_current_weather":
458
+ return self.hub.weather.get_current()
459
+ elif tool_name == "get_weather_history":
460
+ return self.hub.weather.get_history(
461
+ start_date=str(args.get("start_date", "")),
462
+ end_date=str(args.get("end_date", "")),
463
+ )
464
+
465
+ # --- Vine sensors ---
466
+ elif tool_name == "get_vine_state":
467
+ return self.hub.vine_sensors.get_snapshot()
468
+ elif tool_name == "get_sensor_history":
469
+ return self.hub.vine_sensors.get_history(
470
+ device_type=str(args.get("device_type", "crop")),
471
+ area=str(args.get("area", "treatment")),
472
+ hours_back=int(args.get("hours_back", 24)),
473
+ )
474
+
475
+ # --- Photosynthesis ---
476
+ elif tool_name == "calc_photosynthesis":
477
+ return self.hub.photosynthesis.predict_fvcb(
478
+ PAR=float(args.get("PAR", 1500)),
479
+ Tleaf=float(args.get("Tleaf", 30)),
480
+ CO2=float(args.get("CO2", 400)),
481
+ VPD=float(args.get("VPD", 2.0)),
482
+ Tair=float(args.get("Tair", 30)),
483
+ )
484
+ elif tool_name == "predict_photosynthesis_ml":
485
+ return self.hub.photosynthesis.predict_ml(
486
+ features=args.get("features"),
487
+ )
488
+ elif tool_name == "get_ps_forecast":
489
+ return self.hub.photosynthesis.forecast_day_ahead(
490
+ target_date=args.get("date"),
491
+ )
492
+
493
+ # --- Shading / tracking ---
494
+ elif tool_name == "simulate_shading":
495
+ return self.hub.photosynthesis.simulate_shading(
496
+ angle_offset=float(args.get("angle_offset", 20)),
497
+ hour=int(args.get("hour", 13)),
498
+ date_str=args.get("date"),
499
+ )
500
+ elif tool_name == "compare_tilt_angles":
501
+ angles = args.get("angles")
502
+ if angles and isinstance(angles, list):
503
+ angles = [int(a) for a in angles]
504
+ return self.hub.photosynthesis.compare_angles(angles=angles)
505
+ elif tool_name == "get_daily_schedule":
506
+ return self.hub.photosynthesis.daily_schedule(
507
+ stress_threshold=float(args.get("stress_threshold", 2.0)),
508
+ shade_angle=int(args.get("shade_angle", 20)),
509
+ )
510
+
511
+ # --- Energy ---
512
+ elif tool_name == "get_energy_generation":
513
+ return self.hub.energy.get_current()
514
+ elif tool_name == "get_energy_history":
515
+ return self.hub.energy.get_history(
516
+ hours_back=int(args.get("hours_back", 24)),
517
+ )
518
+ elif tool_name == "predict_energy":
519
+ return self.hub.energy.predict(
520
+ target_date=args.get("date"),
521
+ )
522
+
523
+ # --- Advisory ---
524
+ elif tool_name == "run_day_ahead_advisory":
525
+ return self.hub.advisory.run_advisory(
526
+ target_date=args.get("date"),
527
+ )
528
+
529
+ # --- Biology ---
530
+ elif tool_name == "explain_biology_rule":
531
+ return self.hub.biology.explain_rule(
532
+ rule_name=str(args.get("rule_name", "")),
533
+ )
534
+
535
+ elif tool_name == "get_photosynthesis_3d":
536
+ hour = args.get("hour")
537
+ if hour is not None:
538
+ hour = int(hour)
539
+ return self.hub.photosynthesis.get_photosynthesis_3d_scene(
540
+ hour=hour,
541
+ date_str=args.get("date"),
542
+ )
543
+
544
+ else:
545
+ return {"error": f"Unknown tool: {tool_name}"}
546
+
547
+ # ------------------------------------------------------------------
548
+ # Gemini communication
549
+ # ------------------------------------------------------------------
550
+
551
+ # Number of recent message pairs to keep verbatim
552
+ _RECENT_MESSAGES = 6
553
+ # Max older messages to summarize
554
+ _MAX_SUMMARY_MESSAGES = 20
555
+
556
+ def _build_messages(self, user_message: str, history: list[dict]) -> list[dict]:
557
+ """Build Gemini multi-turn message list with sliding context window.
558
+
559
+ Strategy:
560
+ - Keep the most recent 6 messages verbatim (for conversational flow)
561
+ - Summarize older messages into a single context message
562
+ - Always include pinned context (current date, season)
563
+ """
564
+ messages = []
565
+ n = len(history)
566
+
567
+ if n > self._RECENT_MESSAGES:
568
+ # Summarize older messages
569
+ older = history[:n - self._RECENT_MESSAGES]
570
+ # Take at most _MAX_SUMMARY_MESSAGES from the older portion
571
+ older = older[-self._MAX_SUMMARY_MESSAGES:]
572
+ summary = self._summarize_history(older)
573
+ if summary:
574
+ messages.append({
575
+ "role": "user",
576
+ "parts": [{"text": f"[Conversation context: {summary}]"}],
577
+ })
578
+ messages.append({
579
+ "role": "model",
580
+ "parts": [{"text": "Understood, I'll keep that context in mind."}],
581
+ })
582
+
583
+ # Recent messages verbatim
584
+ recent = history[-self._RECENT_MESSAGES:] if n > self._RECENT_MESSAGES else history
585
+ for entry in recent:
586
+ role = entry.get("role", "user")
587
+ content = entry.get("content", "")
588
+ if role == "user":
589
+ messages.append({"role": "user", "parts": [{"text": content}]})
590
+ elif role == "assistant":
591
+ messages.append({"role": "model", "parts": [{"text": content}]})
592
+
593
+ messages.append({"role": "user", "parts": [{"text": user_message}]})
594
+ return messages
595
+
596
+ @staticmethod
597
+ def _summarize_history(messages: list[dict]) -> str:
598
+ """Create a brief summary of older conversation messages."""
599
+ topics = []
600
+ for entry in messages:
601
+ content = entry.get("content", "")
602
+ role = entry.get("role", "user")
603
+ if role == "user" and content:
604
+ # Extract the core question/topic (first sentence or 100 chars)
605
+ first_line = content.split("\n")[0][:100]
606
+ topics.append(first_line)
607
+
608
+ if not topics:
609
+ return ""
610
+
611
+ # Deduplicate and keep last 5 topics
612
+ seen = set()
613
+ unique = []
614
+ for t in reversed(topics):
615
+ t_lower = t.lower().strip()
616
+ if t_lower not in seen:
617
+ seen.add(t_lower)
618
+ unique.append(t)
619
+ unique.reverse()
620
+
621
+ return "Earlier in this conversation, the user asked about: " + "; ".join(unique[-5:])
622
+
623
+ def _call_gemini(self, messages: list[dict], system_prompt: str | None = None) -> str:
624
+ """Send messages to Gemini and return raw text response."""
625
+ prompt = system_prompt or CHATBOT_SYSTEM_PROMPT
626
+ response = self.client.models.generate_content(
627
+ model=self.model_name,
628
+ contents=messages,
629
+ config={"system_instruction": prompt},
630
+ )
631
+ return response.text
632
+
633
+ def _extract_tool_call(self, text: str) -> Optional[dict]:
634
+ """Try to extract a tool_call JSON from the model response."""
635
+ try:
636
+ match = re.search(r'\{\s*"tool_call"\s*:', text)
637
+ if not match:
638
+ return None
639
+ start = match.start()
640
+ brace_count = 0
641
+ for i in range(start, len(text)):
642
+ if text[i] == "{":
643
+ brace_count += 1
644
+ elif text[i] == "}":
645
+ brace_count -= 1
646
+ if brace_count == 0:
647
+ snippet = text[start:i + 1]
648
+ parsed = json.loads(snippet)
649
+ return parsed.get("tool_call")
650
+ return None
651
+ except (json.JSONDecodeError, ValueError):
652
+ return None
653
+
654
+ # ------------------------------------------------------------------
655
+ # Context gathering (for rule validation)
656
+ # ------------------------------------------------------------------
657
+
658
+ def _get_validation_context(self) -> dict:
659
+ """Gather current context for post-response rule validation."""
660
+ ctx = {}
661
+ try:
662
+ from src.phenology import estimate_stage_for_date
663
+ from datetime import date, datetime
664
+ import zoneinfo
665
+
666
+ tz = zoneinfo.ZoneInfo("Asia/Jerusalem")
667
+ now = datetime.now(tz=tz)
668
+ ctx["hour"] = now.hour
669
+ ctx["month"] = now.month
670
+
671
+ stage = estimate_stage_for_date(date.today())
672
+ ctx["stage_id"] = stage.id
673
+
674
+ # Try to get current temperature from cached weather
675
+ try:
676
+ wx = self.hub.weather.get_current()
677
+ if "error" not in wx:
678
+ t = wx.get("air_temperature_c")
679
+ if t is not None:
680
+ ctx["temp_c"] = float(t)
681
+ except Exception:
682
+ pass
683
+
684
+ except Exception:
685
+ pass
686
+ return ctx
687
+
688
+ # ------------------------------------------------------------------
689
+ # Main chat method
690
+ # ------------------------------------------------------------------
691
+
692
+ def chat(self, user_message: str, history: list[dict] | None = None) -> ChatResponse:
693
+ """
694
+ Process a user message and return a structured response.
695
+
696
+ Flow:
697
+ 1. Classify query (data vs knowledge vs greeting)
698
+ 2. Send to Gemini (Pass 1)
699
+ 3. If data query and no tool call → re-prompt to force tool use
700
+ 4. If tool call → dispatch → tag result → send back (Pass 2)
701
+ 5. Validate response against biology rules
702
+ 6. Estimate confidence
703
+ 7. Return structured ChatResponse
704
+ """
705
+ history = history or []
706
+
707
+ if not self.has_api_key:
708
+ _, response = self._fallback_response(user_message)
709
+ return response
710
+
711
+ try:
712
+ # Step 1: Classify query
713
+ query_class = classify_query(user_message)
714
+ self._log(f"Query classified: {query_class.category} "
715
+ f"(requires_data={query_class.requires_data})")
716
+
717
+ # Build contextual system prompt with only relevant biology rules
718
+ contextual_prompt = build_contextual_prompt(user_message)
719
+ messages = self._build_messages(user_message, history)
720
+ self._log("Pass 1: calling Gemini...")
721
+ response_text = self._call_gemini(messages, system_prompt=contextual_prompt)
722
+ self._log(f"Pass 1 response: {response_text[:200]}...")
723
+
724
+ tool_call = self._extract_tool_call(response_text)
725
+
726
+ # Step 2: Force tool use if query requires data but LLM didn't call one
727
+ if query_class.requires_data and not tool_call:
728
+ self._log("Data query but no tool call — re-prompting...")
729
+ retry_prompt = (
730
+ "The user is asking about site-specific data or current conditions. "
731
+ "You MUST call a tool to answer this — do not use your training "
732
+ "knowledge for real-time data. Please call the appropriate tool now."
733
+ )
734
+ messages.append({"role": "model", "parts": [{"text": response_text}]})
735
+ messages.append({"role": "user", "parts": [{"text": retry_prompt}]})
736
+ response_text = self._call_gemini(messages, system_prompt=contextual_prompt)
737
+ tool_call = self._extract_tool_call(response_text)
738
+
739
+ # Step 3: Process tool call if present
740
+ tool_name = None
741
+ tool_result = None
742
+ tool_succeeded = False
743
+ data_age = None
744
+
745
+ if tool_call:
746
+ tool_name = tool_call.get("name", "")
747
+ tool_args = tool_call.get("args", {})
748
+ self._log(f"Tool call detected: {tool_name}")
749
+
750
+ try:
751
+ tool_result = self._dispatch_tool(tool_name, tool_args)
752
+ tool_succeeded = "error" not in tool_result
753
+ except Exception as exc:
754
+ tool_result = {"error": f"Tool execution failed: {exc}"}
755
+ tool_succeeded = False
756
+
757
+ # Tag result with source metadata
758
+ tagged_result = tag_tool_result(tool_name, tool_result)
759
+ data_age = tagged_result.get("_data_age_minutes")
760
+
761
+ # Build Pass 2 prompt with source citation instructions
762
+ source_label = get_source_label(tool_name)
763
+ freshness_note = ""
764
+ if data_age is not None and data_age > 60:
765
+ freshness_note = (
766
+ f"\n\nIMPORTANT: This data is {data_age:.0f} minutes old. "
767
+ "Tell the user the data may be stale and conditions may have changed."
768
+ )
769
+
770
+ tool_result_text = (
771
+ f"Tool result for {tool_name} "
772
+ f"(source: {source_label}):\n"
773
+ f"```json\n{json.dumps(tagged_result, indent=2, default=str)}\n```\n\n"
774
+ f"Explain this result to the farmer in plain language. "
775
+ f"When quoting numbers, mention that they come from {source_label}."
776
+ f"{freshness_note}"
777
+ )
778
+
779
+ messages.append({"role": "model", "parts": [{"text": response_text}]})
780
+ messages.append({"role": "user", "parts": [{"text": tool_result_text}]})
781
+
782
+ self._log("Pass 2: calling Gemini with tool result...")
783
+ final_response = self._call_gemini(messages)
784
+ self._log(f"Pass 2 response: {final_response[:200]}...")
785
+ else:
786
+ final_response = response_text
787
+
788
+ # Step 4: Estimate confidence
789
+ confidence = estimate_confidence(
790
+ tool_called=tool_call is not None,
791
+ tool_succeeded=tool_succeeded,
792
+ data_age_minutes=data_age,
793
+ tool_name=tool_name,
794
+ )
795
+
796
+ # Step 5: Post-response rule validation
797
+ validation_ctx = self._get_validation_context()
798
+ violations = validate_response(
799
+ response_text=final_response,
800
+ context=validation_ctx,
801
+ )
802
+
803
+ caveats: list[str] = []
804
+ violation_dicts: list[dict] = []
805
+
806
+ for v in violations:
807
+ violation_dicts.append({
808
+ "rule": v.rule_name,
809
+ "severity": v.severity,
810
+ "message": v.message,
811
+ })
812
+ if v.severity == "block":
813
+ # Override the response with the correction
814
+ final_response = (
815
+ f"{v.correction}\n\n"
816
+ f"*(Original response was overridden because it violated "
817
+ f"the **{v.rule_name.replace('_', ' ')}** rule.)*"
818
+ )
819
+ confidence = "high" # rule-based override is deterministic
820
+ self._log(f"BLOCKED: {v.rule_name} — {v.message}")
821
+ elif v.severity == "warn":
822
+ caveats.append(v.correction)
823
+ self._log(f"WARNING: {v.rule_name} — {v.message}")
824
+
825
+ # Build data freshness caveat
826
+ if data_age is not None and data_age > 60:
827
+ caveats.append(
828
+ f"Data is {data_age:.0f} minutes old — conditions may have changed."
829
+ )
830
+
831
+ # Build sources list
832
+ sources: list[str] = []
833
+ if tool_name:
834
+ sources.append(get_source_label(tool_name))
835
+ if not tool_call and query_class.category == "knowledge":
836
+ sources.append("Built-in biology rules")
837
+
838
+ response_mode = classify_response_mode(user_message)
839
+
840
+ return ChatResponse(
841
+ message=final_response,
842
+ tool_calls=[{"name": tool_name, "args": tool_call.get("args", {}),
843
+ "result": tool_result}] if tool_call else [],
844
+ data=tool_result if tool_result else {},
845
+ confidence=confidence,
846
+ sources=sources,
847
+ caveats=caveats,
848
+ rule_violations=violation_dicts,
849
+ response_mode=response_mode,
850
+ )
851
+
852
+ except Exception as exc:
853
+ self._log(f"Chat error: {exc}\n{traceback.format_exc()}")
854
+ matched, fallback = self._fallback_response(user_message)
855
+ if matched:
856
+ return fallback
857
+ return ChatResponse(
858
+ message=(
859
+ "I'm having trouble connecting to the AI service right now. "
860
+ "You can still ask me about vine biology rules \u2014 I have those "
861
+ "built in. For data queries, please check that your Google API "
862
+ "key is configured."
863
+ ),
864
+ confidence="insufficient_data",
865
+ sources=[],
866
+ caveats=["AI service connection failed"],
867
+ )
868
+
869
+ # ------------------------------------------------------------------
870
+ # Fallback (no API key / offline)
871
+ # ------------------------------------------------------------------
872
+
873
+ def _fallback_response(self, user_message: str) -> tuple[bool, ChatResponse]:
874
+ """Keyword-match fallback when Gemini is unavailable."""
875
+ msg_lower = user_message.lower()
876
+
877
+ rule_matches = {
878
+ "site_location": ["yeruham", "location", "timezone", "right now", "current time",
879
+ "what time", "israel time", "local time"],
880
+ "temperature_transition": ["temperature", "30 degree", "30\u00b0", "rubp", "rubisco",
881
+ "transition", "heat", "hot"],
882
+ "no_shade_before_10": ["morning", "before 10", "early", "sunrise"],
883
+ "no_shade_in_may": ["may", "flowering", "fruit set", "fruit-set"],
884
+ "cwsi_threshold": ["cwsi", "water stress", "crop water"],
885
+ "berry_sunburn": ["sunburn", "berry", "35\u00b0", "35 degree"],
886
+ "energy_budget": ["budget", "energy", "sacrifice", "ceiling", "5%",
887
+ "monthly", "generation", "kwh", "power", "solar"],
888
+ "model_routing": ["model", "fvcb", "farquhar", "ml", "routing",
889
+ "predict", "forecast"],
890
+ "phenological_multiplier": ["veraison", "ripening", "phenolog"],
891
+ "irrigation_management": ["irrigation", "water", "soil moisture"],
892
+ "fertiliser_management": ["fertiliser", "fertilizer", "nitrogen", "nutrient"],
893
+ "photosynthesis_3d": ["3d", "3D", "visual", "visualize", "visualise",
894
+ "model show", "vine and tracker", "sun and vine"],
895
+ "no_leaves_no_shade_problem": ["no leaves", "dormant", "budburst", "no canopy"],
896
+ "no_shading_must_explain": ["should not shade", "don't shade", "no shading"],
897
+ }
898
+
899
+ matched_rules = []
900
+ for rule_name, keywords in rule_matches.items():
901
+ if any(kw in msg_lower for kw in keywords):
902
+ matched_rules.append(rule_name)
903
+
904
+ if matched_rules:
905
+ parts = ["Here's what I know about that (from built-in biology rules):\n"]
906
+ for rule in matched_rules:
907
+ parts.append(f"**{rule.replace('_', ' ').title()}:** {BIOLOGY_RULES[rule]}\n")
908
+ parts.append(
909
+ "\n*Note: I'm running without an AI connection, so I can only "
910
+ "answer from built-in biology rules. Connect a Google API key "
911
+ "for full advisory capabilities.*"
912
+ )
913
+ return True, ChatResponse(
914
+ message="\n".join(parts),
915
+ confidence="medium",
916
+ sources=["Built-in biology rules"],
917
+ )
918
+
919
+ return False, ChatResponse(
920
+ message=(
921
+ "I'm currently running without an AI connection (no Google API key). "
922
+ "I can answer questions about vine biology rules \u2014 try asking about:\n\n"
923
+ "- Temperature and shading thresholds\n"
924
+ "- Morning light rules\n"
925
+ "- May shading restrictions\n"
926
+ "- Water stress (CWSI)\n"
927
+ "- Berry sunburn risk\n"
928
+ "- Energy budget limits\n"
929
+ "- Model routing (FvCB vs ML)\n"
930
+ "- Veraison protection\n"
931
+ "- Irrigation management\n"
932
+ "- Energy generation and prediction\n\n"
933
+ "*Connect a Google API key for full advisory capabilities "
934
+ "(weather, photosynthesis calculations, shading simulations, "
935
+ "energy analysis).*"
936
+ ),
937
+ confidence="insufficient_data",
938
+ sources=[],
939
+ )
src/chronos_forecaster.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ """Backward-compatible re-export from src.forecasting.chronos_forecaster."""
2
+ from src.forecasting.chronos_forecaster import * # noqa: F401, F403
src/command_arbiter.py ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CommandArbiter: priority stack, hysteresis, and fallback logic for tracker commands.
3
+
4
+ Sits between the TradeoffEngine output and the physical tracker actuator.
5
+ Ensures:
6
+ 1. Weather protection and harvest mode override everything.
7
+ 2. Safety rail alerts and simulation timeouts fall back to θ_astro.
8
+ 3. Hysteresis prevents sub-slot jitter (motor protection).
9
+ 4. All fallbacks default to full astronomical tracking (zero energy cost).
10
+
11
+ Priority Stack (highest to lowest):
12
+ P1 Weather Protection → stow angle (flat, 0°)
13
+ P2 Mechanical Harvest → vertical park (90°)
14
+ P3 Safety Rail Alert → θ_astro
15
+ P4 Simulation Timeout → θ_astro
16
+ P5 TradeoffEngine → θ_astro or θ_astro + offset
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from dataclasses import dataclass, field
22
+ from datetime import datetime
23
+ from enum import Enum
24
+ from typing import Optional
25
+
26
+ import pandas as pd
27
+
28
+ from config.settings import (
29
+ ANGLE_TOLERANCE_DEG,
30
+ HYSTERESIS_WINDOW_MIN,
31
+ SIMULATION_TIMEOUT_SEC,
32
+ WIND_STOW_SPEED_MS,
33
+ )
34
+
35
+
36
+ class CommandSource(str, Enum):
37
+ """Priority source identifiers for tracker commands."""
38
+ WEATHER = "weather_protection"
39
+ HARVEST = "harvest_mode"
40
+ SAFETY = "safety_fallback"
41
+ TIMEOUT = "timeout_fallback"
42
+ ENGINE = "engine"
43
+ HYSTERESIS = "hysteresis"
44
+ INITIAL = "initial"
45
+ STABLE = "stable"
46
+
47
+
48
+ @dataclass
49
+ class ArbiterDecision:
50
+ """Output of the CommandArbiter."""
51
+
52
+ angle: float # final tracker tilt angle (degrees)
53
+ dispatch: bool # True = send command to actuator
54
+ source: str # which priority level decided
55
+ requested_angle: float = 0.0 # what was originally requested
56
+ suppressed_reason: Optional[str] = None # why dispatch=False (if suppressed)
57
+
58
+ def decision_tags(self) -> list[str]:
59
+ tags = [f"source:{self.source}"]
60
+ if not self.dispatch and self.suppressed_reason:
61
+ tags.append(f"suppressed:{self.suppressed_reason}")
62
+ return tags
63
+
64
+
65
+ class CommandArbiter:
66
+ """Priority stack + hysteresis for tracker tilt commands.
67
+
68
+ Parameters
69
+ ----------
70
+ hysteresis_window_min : float
71
+ Minimum time (minutes) between consecutive tilt changes.
72
+ angle_tolerance_deg : float
73
+ Changes smaller than this are suppressed (motor protection).
74
+ """
75
+
76
+ def __init__(
77
+ self,
78
+ hysteresis_window_min: float = HYSTERESIS_WINDOW_MIN,
79
+ angle_tolerance_deg: float = ANGLE_TOLERANCE_DEG,
80
+ ):
81
+ self.window_min = hysteresis_window_min
82
+ self.tolerance = angle_tolerance_deg
83
+ self._buffer: list[tuple[datetime, float]] = []
84
+ self.current_angle: float = 0.0
85
+ self._last_dispatch_time: Optional[datetime] = None
86
+
87
+ # ------------------------------------------------------------------
88
+ # Priority selection
89
+ # ------------------------------------------------------------------
90
+
91
+ def select_source(
92
+ self,
93
+ engine_result: dict,
94
+ safety_valid: bool = True,
95
+ sim_time_sec: float = 0.0,
96
+ weather_override: Optional[dict] = None,
97
+ harvest_active: bool = False,
98
+ theta_astro: float = 0.0,
99
+ ) -> dict:
100
+ """Select the highest-priority command source.
101
+
102
+ Parameters
103
+ ----------
104
+ engine_result : dict
105
+ Output from TradeoffEngine.evaluate_slot() or find_minimum_dose().
106
+ Must contain 'angle' key (or 'chosen_offset_deg' for DoseResult).
107
+ safety_valid : bool
108
+ False if SafetyRails detected FvCB/ML divergence.
109
+ sim_time_sec : float
110
+ Wall-clock time the simulation took (seconds).
111
+ weather_override : dict or None
112
+ If not None, must contain 'target_angle' and optionally 'reason'.
113
+ harvest_active : bool
114
+ True if mechanical harvesting is in progress.
115
+ theta_astro : float
116
+ Astronomical tracking angle (safe default).
117
+
118
+ Returns
119
+ -------
120
+ dict with 'angle', 'source', 'reason'
121
+ """
122
+ # P1: Weather protection (wind stow, hail, etc.)
123
+ if weather_override is not None:
124
+ return {
125
+ "angle": weather_override.get("target_angle", 0.0),
126
+ "source": CommandSource.WEATHER,
127
+ "reason": weather_override.get("reason", "weather override active"),
128
+ }
129
+
130
+ # P2: Mechanical harvesting — panels go vertical for clearance
131
+ if harvest_active:
132
+ return {
133
+ "angle": 90.0,
134
+ "source": CommandSource.HARVEST,
135
+ "reason": "mechanical harvesting in progress",
136
+ }
137
+
138
+ # P3: Safety rail alert — FvCB/ML divergence too high
139
+ if not safety_valid:
140
+ return {
141
+ "angle": theta_astro,
142
+ "source": CommandSource.SAFETY,
143
+ "reason": "FvCB/ML divergence exceeded threshold; reverting to astronomical",
144
+ }
145
+
146
+ # P4: Simulation timeout — shadow model took too long
147
+ if sim_time_sec > SIMULATION_TIMEOUT_SEC:
148
+ return {
149
+ "angle": theta_astro,
150
+ "source": CommandSource.TIMEOUT,
151
+ "reason": f"simulation took {sim_time_sec:.1f}s > {SIMULATION_TIMEOUT_SEC}s limit",
152
+ }
153
+
154
+ # P5: Normal — use TradeoffEngine result
155
+ angle = engine_result.get("angle", theta_astro)
156
+ return {
157
+ "angle": angle,
158
+ "source": CommandSource.ENGINE,
159
+ "reason": engine_result.get("action", "tradeoff_engine"),
160
+ }
161
+
162
+ # ------------------------------------------------------------------
163
+ # Hysteresis filter
164
+ # ------------------------------------------------------------------
165
+
166
+ def should_move(
167
+ self,
168
+ requested_angle: float,
169
+ timestamp: datetime,
170
+ ) -> ArbiterDecision:
171
+ """Apply hysteresis filter to a requested angle change.
172
+
173
+ Motor protection logic:
174
+ - Suppresses changes smaller than angle_tolerance_deg.
175
+ - Requires the requested angle to be stable for hysteresis_window_min
176
+ before dispatching.
177
+ - Immediate dispatch if this is the first command or if the change
178
+ is large (e.g., weather stow).
179
+ """
180
+ # Record request in buffer
181
+ self._buffer.append((timestamp, requested_angle))
182
+
183
+ # Trim buffer to window
184
+ cutoff = timestamp - pd.Timedelta(minutes=self.window_min)
185
+ self._buffer = [(t, a) for t, a in self._buffer if t >= cutoff]
186
+
187
+ # Change smaller than tolerance → suppress
188
+ angle_diff = abs(requested_angle - self.current_angle)
189
+ if angle_diff <= self.tolerance:
190
+ return ArbiterDecision(
191
+ angle=self.current_angle,
192
+ dispatch=False,
193
+ source=CommandSource.HYSTERESIS,
194
+ requested_angle=requested_angle,
195
+ suppressed_reason=f"change {angle_diff:.1f}° ≤ tolerance {self.tolerance}°",
196
+ )
197
+
198
+ # First command or only one entry in buffer → dispatch immediately
199
+ if len(self._buffer) < 2 or self._last_dispatch_time is None:
200
+ self.current_angle = requested_angle
201
+ self._last_dispatch_time = timestamp
202
+ return ArbiterDecision(
203
+ angle=requested_angle,
204
+ dispatch=True,
205
+ source=CommandSource.INITIAL,
206
+ requested_angle=requested_angle,
207
+ )
208
+
209
+ # Check stability: all recent entries must agree within tolerance
210
+ stable = all(
211
+ abs(a - requested_angle) <= self.tolerance
212
+ for _, a in self._buffer
213
+ )
214
+
215
+ if stable:
216
+ self.current_angle = requested_angle
217
+ self._last_dispatch_time = timestamp
218
+ return ArbiterDecision(
219
+ angle=requested_angle,
220
+ dispatch=True,
221
+ source=CommandSource.STABLE,
222
+ requested_angle=requested_angle,
223
+ )
224
+
225
+ return ArbiterDecision(
226
+ angle=self.current_angle,
227
+ dispatch=False,
228
+ source=CommandSource.HYSTERESIS,
229
+ requested_angle=requested_angle,
230
+ suppressed_reason="angle not stable within hysteresis window",
231
+ )
232
+
233
+ # ------------------------------------------------------------------
234
+ # Combined: select + filter
235
+ # ------------------------------------------------------------------
236
+
237
+ def arbitrate(
238
+ self,
239
+ timestamp: datetime,
240
+ engine_result: dict,
241
+ theta_astro: float,
242
+ safety_valid: bool = True,
243
+ sim_time_sec: float = 0.0,
244
+ weather_override: Optional[dict] = None,
245
+ harvest_active: bool = False,
246
+ ) -> ArbiterDecision:
247
+ """Full arbitration: priority selection → hysteresis filter.
248
+
249
+ This is the main entry point for the 15-min control loop.
250
+ """
251
+ selected = self.select_source(
252
+ engine_result=engine_result,
253
+ safety_valid=safety_valid,
254
+ sim_time_sec=sim_time_sec,
255
+ weather_override=weather_override,
256
+ harvest_active=harvest_active,
257
+ theta_astro=theta_astro,
258
+ )
259
+
260
+ # Weather and harvest overrides bypass hysteresis (safety-critical)
261
+ if selected["source"] in {CommandSource.WEATHER, CommandSource.HARVEST}:
262
+ self.current_angle = selected["angle"]
263
+ self._last_dispatch_time = timestamp
264
+ self._buffer.clear()
265
+ return ArbiterDecision(
266
+ angle=selected["angle"],
267
+ dispatch=True,
268
+ source=selected["source"],
269
+ requested_angle=selected["angle"],
270
+ )
271
+
272
+ # Normal path: apply hysteresis
273
+ decision = self.should_move(selected["angle"], timestamp)
274
+ # Override source with the priority level that selected the angle
275
+ if decision.dispatch:
276
+ decision.source = selected["source"]
277
+ return decision
278
+
279
+ # ------------------------------------------------------------------
280
+ # Wind stow helper (delegates to operational_modes)
281
+ # ------------------------------------------------------------------
282
+
283
+ @staticmethod
284
+ def check_wind_stow(
285
+ wind_speed_ms: float,
286
+ stow_threshold: float = WIND_STOW_SPEED_MS,
287
+ ) -> Optional[dict]:
288
+ """Return a weather override dict if wind speed exceeds stow threshold.
289
+
290
+ Note: ControlLoop uses OperationalModeChecker instead of this method.
291
+ Kept for backward compatibility with direct arbiter usage.
292
+ """
293
+ from src.operational_modes import check_wind_stow as _check
294
+ result = _check(wind_speed_ms, stow_threshold)
295
+ return result.to_weather_override()
296
+
297
+
298
+ class AstronomicalTracker:
299
+ """Pure sun-following. The always-safe default.
300
+
301
+ Wraps ShadowModel to provide a simple get_angle(timestamp) interface.
302
+ """
303
+
304
+ def __init__(self, shadow_model=None):
305
+ self._shadow_model = shadow_model
306
+
307
+ @property
308
+ def shadow_model(self):
309
+ if self._shadow_model is None:
310
+ from src.shading.solar_geometry import ShadowModel
311
+ self._shadow_model = ShadowModel()
312
+ return self._shadow_model
313
+
314
+ def get_angle(self, timestamp: datetime) -> float:
315
+ """Return the astronomical tracking angle for a given timestamp."""
316
+ ts = pd.Timestamp(timestamp)
317
+ if ts.tzinfo is None:
318
+ ts = ts.tz_localize("UTC")
319
+ sp = self.shadow_model.get_solar_position(
320
+ pd.DatetimeIndex([ts])
321
+ )
322
+ elev = float(sp["solar_elevation"].iloc[0])
323
+ if elev <= 0:
324
+ return 0.0
325
+ azim = float(sp["solar_azimuth"].iloc[0])
326
+ result = self.shadow_model.compute_tracker_tilt(azim, elev)
327
+ return float(result["tracker_theta"])
src/control_loop.py ADDED
@@ -0,0 +1,779 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ControlLoop: the 15-minute agrivoltaic control cycle.
3
+
4
+ Each tick:
5
+ 1. Fetch live sensor data (IMS weather + TB vine sensors)
6
+ 2. Load/validate the day-ahead plan for today
7
+ 3. Look up the planned offset for the current slot
8
+ 4. Run live gate check (may override plan if conditions diverged)
9
+ 5. Check energy budget (block intervention if budget exhausted)
10
+ 6. Run CommandArbiter (priority stack + hysteresis)
11
+ 7. Resolve per-tracker fleet overrides (rare; default = all same angle)
12
+ 8. Dispatch angle to trackers via TrackerDispatcher
13
+ 9. Spend energy budget for the slot
14
+ 10. Check plan divergence and trigger re-plan if needed
15
+ 11. Log the result
16
+
17
+ The loop can run as:
18
+ - **one-shot**: ``loop.tick()`` — execute one cycle (called externally)
19
+ - **continuous**: ``loop.run()`` — blocking loop with 15-min sleep
20
+ - **plan-only**: ``loop.tick(dry_run=True)`` — compute decisions without sending
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import json
26
+ import logging
27
+ import time
28
+ from dataclasses import dataclass, field
29
+ from datetime import date, datetime, timedelta, timezone
30
+ from pathlib import Path
31
+ from typing import Dict, List, Optional
32
+
33
+ import pandas as pd
34
+
35
+ from config.settings import (
36
+ ANGLE_TOLERANCE_DEG,
37
+ DAILY_PLAN_PATH,
38
+ DP_SLOT_DURATION_MIN,
39
+ PLAN_DIVERGENCE_THRESHOLD_KWH,
40
+ PLAN_DIVERGENCE_THRESHOLD_SLOTS,
41
+ PLAN_REPLAN_COOLDOWN_SLOTS,
42
+ SIMULATION_LOG_PATH,
43
+ )
44
+
45
+ logger = logging.getLogger(__name__)
46
+
47
+
48
+ # ---------------------------------------------------------------------------
49
+ # Tick result
50
+ # ---------------------------------------------------------------------------
51
+
52
+ @dataclass
53
+ class TickResult:
54
+ """Output of a single control loop tick."""
55
+
56
+ timestamp: datetime
57
+ slot_index: int # 0–95
58
+ stage_id: str = "unknown"
59
+
60
+ # Plan lookup
61
+ plan_offset_deg: float = 0.0 # what the day-ahead plan says
62
+ plan_gate_passed: bool = False
63
+
64
+ # Live override
65
+ live_gate_passed: bool = False
66
+ live_override: bool = False # True if live data diverged from plan
67
+ override_reason: Optional[str] = None
68
+
69
+ # Arbiter decision
70
+ target_angle: float = 0.0
71
+ dispatch: bool = False
72
+ source: str = ""
73
+
74
+ # Dispatch result
75
+ trackers_verified: int = 0
76
+ trackers_total: int = 0
77
+ dispatch_error: Optional[str] = None
78
+
79
+ # Energy cost
80
+ energy_cost_kwh: float = 0.0 # energy sacrificed by this slot's offset
81
+
82
+ # Budget tracking
83
+ budget_spent_kwh: float = 0.0 # actual amount deducted from budget
84
+ budget_remaining_kwh: float = 0.0 # daily budget remaining after this slot
85
+
86
+ # Model routing
87
+ model_route: str = "" # "fvcb" or "ml" — which model was selected
88
+
89
+ # Fleet overrides (per-tracker angles, if any differ from default)
90
+ fleet_overrides: Optional[Dict[str, float]] = None
91
+
92
+ # Plan divergence tracking
93
+ divergence_cumulative_kwh: float = 0.0
94
+ divergence_consecutive: int = 0
95
+ replan_triggered: bool = False
96
+
97
+ # Sensor snapshot
98
+ air_temp_c: Optional[float] = None
99
+ ghi_w_m2: Optional[float] = None
100
+ wind_speed_ms: Optional[float] = None
101
+
102
+ def to_dict(self) -> dict:
103
+ return {k: (v.isoformat() if isinstance(v, datetime) else v)
104
+ for k, v in self.__dict__.items()}
105
+
106
+
107
+ # ---------------------------------------------------------------------------
108
+ # ControlLoop
109
+ # ---------------------------------------------------------------------------
110
+
111
+ class ControlLoop:
112
+ """15-minute agrivoltaic control loop.
113
+
114
+ Parameters
115
+ ----------
116
+ dry_run : bool
117
+ If True, compute decisions but don't send commands to trackers.
118
+ plan_path : Path
119
+ Path to the day-ahead plan JSON file.
120
+ log_path : Path
121
+ Path for simulation log output.
122
+ """
123
+
124
+ def __init__(
125
+ self,
126
+ dry_run: bool = True,
127
+ plan_path: Path = DAILY_PLAN_PATH,
128
+ log_path: Path = SIMULATION_LOG_PATH,
129
+ ):
130
+ self.dry_run = dry_run
131
+ self.plan_path = plan_path
132
+ self.log_path = log_path
133
+
134
+ # Lazy-init components
135
+ self._arbiter = None
136
+ self._dispatcher = None
137
+ self._astro = None
138
+ self._hub = None
139
+ self._modes = None
140
+ self._fleet = None
141
+ self._schedulers: Dict[str, object] = {}
142
+ self._budget_planner = None
143
+ self._router = None
144
+ self._current_plan: Optional[dict] = None
145
+ self._tick_log: List[dict] = []
146
+
147
+ # Daily budget state (reset each day)
148
+ self._daily_budget_plan: Optional[dict] = None
149
+ self._daily_budget_date: Optional[date] = None
150
+
151
+ # Divergence tracking (reset on re-plan or new day)
152
+ self._divergence_cumulative_kwh: float = 0.0
153
+ self._divergence_consecutive: int = 0
154
+ self._last_replan_slot: int = -99
155
+ self._replan_count: int = 0
156
+
157
+ # ------------------------------------------------------------------
158
+ # Lazy component init
159
+ # ------------------------------------------------------------------
160
+
161
+ @property
162
+ def arbiter(self):
163
+ if self._arbiter is None:
164
+ from src.command_arbiter import CommandArbiter
165
+ self._arbiter = CommandArbiter()
166
+ return self._arbiter
167
+
168
+ @property
169
+ def dispatcher(self):
170
+ if self._dispatcher is None:
171
+ from src.tracker_dispatcher import TrackerDispatcher
172
+ self._dispatcher = TrackerDispatcher(dry_run=self.dry_run)
173
+ return self._dispatcher
174
+
175
+ @property
176
+ def astro(self):
177
+ if self._astro is None:
178
+ from src.command_arbiter import AstronomicalTracker
179
+ self._astro = AstronomicalTracker()
180
+ return self._astro
181
+
182
+ @property
183
+ def hub(self):
184
+ if self._hub is None:
185
+ from src.data.data_providers import DataHub
186
+ self._hub = DataHub.default()
187
+ return self._hub
188
+
189
+ @property
190
+ def modes(self):
191
+ if self._modes is None:
192
+ from src.operational_modes import OperationalModeChecker
193
+ self._modes = OperationalModeChecker()
194
+ return self._modes
195
+
196
+ @property
197
+ def fleet(self):
198
+ if self._fleet is None:
199
+ from src.tracker_fleet import TrackerFleet
200
+ self._fleet = TrackerFleet()
201
+ return self._fleet
202
+
203
+ @property
204
+ def budget_planner(self):
205
+ if self._budget_planner is None:
206
+ from src.energy_budget import EnergyBudgetPlanner
207
+ self._budget_planner = EnergyBudgetPlanner()
208
+ return self._budget_planner
209
+
210
+ @property
211
+ def router(self):
212
+ if self._router is None:
213
+ from src.chatbot.routing_agent import RoutingAgent
214
+ self._router = RoutingAgent()
215
+ return self._router
216
+
217
+ # ------------------------------------------------------------------
218
+ # Plan loading
219
+ # ------------------------------------------------------------------
220
+
221
+ def _build_persistence_forecast(self) -> tuple[list[float], list[float]]:
222
+ """Build 96-slot temp/GHI forecast from last available IMS day."""
223
+ ims_df = self.hub.weather.get_dataframe()
224
+ if ims_df.empty:
225
+ return [25.0] * 96, [0.0] * 96
226
+
227
+ df = ims_df.copy()
228
+ if "timestamp_utc" in df.columns:
229
+ df["timestamp_utc"] = pd.to_datetime(df["timestamp_utc"], utc=True)
230
+ df = df.set_index("timestamp_utc")
231
+
232
+ last_day = df.index.max().normalize()
233
+ day_data = df[df.index.normalize() == last_day]
234
+ if len(day_data) < 10:
235
+ last_day -= pd.Timedelta(days=1)
236
+ day_data = df[df.index.normalize() == last_day]
237
+
238
+ temps = [25.0] * 96
239
+ ghis = [0.0] * 96
240
+ for _, row in day_data.iterrows():
241
+ slot = row.name.hour * 4 + row.name.minute // 15
242
+ if 0 <= slot < 96:
243
+ t = row.get("air_temperature_c")
244
+ if pd.notna(t):
245
+ temps[slot] = float(t)
246
+ g = row.get("ghi_w_m2")
247
+ if pd.notna(g):
248
+ ghis[slot] = float(g)
249
+ return temps, ghis
250
+
251
+ def _compute_daily_budget(self, target: date) -> float:
252
+ """Compute the daily energy budget from the annual/monthly hierarchy."""
253
+ annual = self.budget_planner.compute_annual_plan(target.year)
254
+ month_budget = annual["monthly_budgets"].get(target.month, 0.5)
255
+ weekly = self.budget_planner.compute_weekly_plan(target, month_budget)
256
+ dow = target.weekday()
257
+ return weekly["daily_budgets_kWh"][min(dow, 6)]
258
+
259
+ def load_plan(self, target_date: Optional[date] = None) -> Optional[dict]:
260
+ """Load the day-ahead plan for the given date."""
261
+ target = target_date or date.today()
262
+
263
+ # Try loading from file
264
+ if self.plan_path.exists():
265
+ try:
266
+ with open(self.plan_path) as f:
267
+ plan = json.load(f)
268
+ if plan.get("target_date") == str(target):
269
+ self._current_plan = plan
270
+ logger.info("Loaded plan for %s (%d slots)",
271
+ target, len(plan.get("slots", [])))
272
+ return plan
273
+ except Exception as exc:
274
+ logger.warning("Failed to load plan from %s: %s", self.plan_path, exc)
275
+
276
+ # No plan file or wrong date — compute on the fly
277
+ try:
278
+ from src.day_ahead_planner import DayAheadPlanner
279
+
280
+ temps, ghis = self._build_persistence_forecast()
281
+ daily_budget = self._compute_daily_budget(target)
282
+
283
+ planner = DayAheadPlanner()
284
+ plan_obj = planner.plan_day(target, temps, ghis, max(daily_budget, 0.1))
285
+ plan = plan_obj.to_dict()
286
+
287
+ # Save for reuse
288
+ self.plan_path.parent.mkdir(parents=True, exist_ok=True)
289
+ with open(self.plan_path, "w") as f:
290
+ json.dump(plan, f, indent=2)
291
+
292
+ self._current_plan = plan
293
+ return plan
294
+
295
+ except Exception as exc:
296
+ logger.error("Plan generation failed: %s", exc)
297
+ return None
298
+
299
+ def _get_slot_plan(self, slot_index: int) -> Optional[dict]:
300
+ """Look up the planned offset for a given slot."""
301
+ if not self._current_plan:
302
+ return None
303
+ slots = self._current_plan.get("slots", [])
304
+ for s in slots:
305
+ t = s.get("time", "")
306
+ try:
307
+ h, m = map(int, t.split(":"))
308
+ s_idx = h * 4 + m // 15
309
+ if s_idx == slot_index:
310
+ return s
311
+ except (ValueError, AttributeError):
312
+ continue
313
+ return None
314
+
315
+ # ------------------------------------------------------------------
316
+ # Energy budget
317
+ # ------------------------------------------------------------------
318
+
319
+ def _ensure_daily_budget(self, today: date) -> Optional[dict]:
320
+ """Load or reuse the daily slot-level budget plan."""
321
+ if self._daily_budget_plan and self._daily_budget_date == today:
322
+ return self._daily_budget_plan
323
+
324
+ # Try restoring from Redis (survives worker restarts)
325
+ try:
326
+ from src.data.redis_cache import get_redis
327
+ redis = get_redis()
328
+ if redis:
329
+ cached = redis.get_json("control:budget")
330
+ if cached and cached.get("date") == str(today):
331
+ self._daily_budget_plan = cached["plan"]
332
+ self._daily_budget_date = today
333
+ logger.info("Restored daily budget from Redis for %s", today)
334
+ return self._daily_budget_plan
335
+ except Exception:
336
+ pass
337
+
338
+ try:
339
+ daily_budget = self._compute_daily_budget(today)
340
+ self._daily_budget_plan = self.budget_planner.compute_daily_plan(
341
+ today, daily_budget,
342
+ )
343
+ self._daily_budget_date = today
344
+
345
+ # Reset divergence tracking for new day
346
+ self._divergence_cumulative_kwh = 0.0
347
+ self._divergence_consecutive = 0
348
+ self._last_replan_slot = -99
349
+
350
+ # Persist to Redis
351
+ self._persist_budget(today)
352
+
353
+ return self._daily_budget_plan
354
+ except Exception as exc:
355
+ logger.warning("Failed to compute daily budget: %s", exc)
356
+ return None
357
+
358
+ def _persist_budget(self, today: date) -> None:
359
+ """Save daily budget state to Redis for cross-process access."""
360
+ try:
361
+ from src.data.redis_cache import get_redis
362
+ import json as _json
363
+ redis = get_redis()
364
+ if redis and self._daily_budget_plan:
365
+ payload = {
366
+ "date": str(today),
367
+ "plan": _json.loads(_json.dumps(self._daily_budget_plan, default=str)),
368
+ }
369
+ redis.set_json("control:budget", payload, ttl=86400)
370
+ except Exception as exc:
371
+ logger.debug("Budget Redis persist failed: %s", exc)
372
+
373
+ @staticmethod
374
+ def _slot_key(now: datetime) -> str:
375
+ """Format a datetime as a slot key like '10:15'."""
376
+ return f"{now.hour:02d}:{(now.minute // 15) * 15:02d}"
377
+
378
+ # ------------------------------------------------------------------
379
+ # Fleet overrides (Task 1)
380
+ # ------------------------------------------------------------------
381
+
382
+ def _resolve_fleet_overrides(
383
+ self, now: datetime, theta_astro: float,
384
+ ) -> Dict[str, float]:
385
+ """Resolve per-tracker angle overrides from TrackerFleet assignments.
386
+
387
+ Returns an empty dict in the common case (all trackers follow the
388
+ arbiter's angle). Only returns overrides for trackers that have
389
+ an explicit non-tracking assignment active right now.
390
+ """
391
+ from src.tracker_fleet import tracker_id_to_name
392
+ from src.tracker_scheduler import TrackerScheduler, PLAN_LIBRARY
393
+
394
+ overrides: Dict[str, float] = {}
395
+ try:
396
+ best = self.fleet.get_all_best_assignments(now)
397
+ except Exception as exc:
398
+ logger.debug("Fleet assignment lookup skipped: %s", exc)
399
+ return overrides
400
+
401
+ for tracker_id, assignment in best.items():
402
+ if assignment is None:
403
+ continue
404
+
405
+ plan_id = assignment.plan_id
406
+ # Get or create scheduler for this plan
407
+ if plan_id not in self._schedulers:
408
+ if assignment.plan_file:
409
+ plan_path = Path(assignment.plan_file)
410
+ if plan_path.exists():
411
+ self._schedulers[plan_id] = TrackerScheduler(
412
+ plan_file=plan_path,
413
+ )
414
+ else:
415
+ logger.warning("Plan file not found: %s", plan_path)
416
+ continue
417
+ elif plan_id in PLAN_LIBRARY:
418
+ self._schedulers[plan_id] = TrackerScheduler(
419
+ plan_data=PLAN_LIBRARY[plan_id],
420
+ )
421
+ else:
422
+ logger.debug("Unknown plan_id %r, skipping", plan_id)
423
+ continue
424
+
425
+ sched = self._schedulers[plan_id]
426
+ event = sched.get_event(now)
427
+ if event is None:
428
+ continue
429
+
430
+ mode = event.get("mode")
431
+ event_angle = event.get("angle")
432
+
433
+ if mode == "tracking" or mode is None:
434
+ # Same as default astronomical tracking — no override needed
435
+ continue
436
+ elif mode == "antiTracking" and event_angle is not None:
437
+ overrides[tracker_id_to_name(tracker_id)] = theta_astro + event_angle
438
+ elif mode == "fixed_angle" and event_angle is not None:
439
+ overrides[tracker_id_to_name(tracker_id)] = event_angle
440
+
441
+ return overrides
442
+
443
+ # ------------------------------------------------------------------
444
+ # Plan divergence (Task 3)
445
+ # ------------------------------------------------------------------
446
+
447
+ def _check_plan_divergence(
448
+ self,
449
+ slot_index: int,
450
+ planned_offset: float,
451
+ actual_offset: float,
452
+ planned_cost: float,
453
+ actual_cost: float,
454
+ ) -> bool:
455
+ """Track divergence between plan and execution. Return True if re-plan needed."""
456
+ cost_diff = abs(planned_cost - actual_cost)
457
+ offset_diverged = abs(planned_offset - actual_offset) > ANGLE_TOLERANCE_DEG
458
+
459
+ self._divergence_cumulative_kwh += cost_diff
460
+
461
+ if offset_diverged:
462
+ self._divergence_consecutive += 1
463
+ else:
464
+ self._divergence_consecutive = 0
465
+
466
+ # Check cooldown
467
+ if slot_index - self._last_replan_slot < PLAN_REPLAN_COOLDOWN_SLOTS:
468
+ return False
469
+
470
+ if self._divergence_cumulative_kwh >= PLAN_DIVERGENCE_THRESHOLD_KWH:
471
+ logger.warning(
472
+ "Cumulative divergence %.3f kWh >= %.3f threshold; triggering re-plan",
473
+ self._divergence_cumulative_kwh, PLAN_DIVERGENCE_THRESHOLD_KWH,
474
+ )
475
+ return True
476
+
477
+ if self._divergence_consecutive >= PLAN_DIVERGENCE_THRESHOLD_SLOTS:
478
+ logger.warning(
479
+ "%d consecutive divergent slots >= %d threshold; triggering re-plan",
480
+ self._divergence_consecutive, PLAN_DIVERGENCE_THRESHOLD_SLOTS,
481
+ )
482
+ return True
483
+
484
+ return False
485
+
486
+ def _trigger_replan(self, now: datetime, slot_index: int) -> bool:
487
+ """Re-generate the day-ahead plan from the current slot onward."""
488
+ today = now.date()
489
+ daily_bp = self._ensure_daily_budget(today)
490
+ spent = daily_bp["cumulative_spent"] if daily_bp else 0.0
491
+ remaining = (daily_bp["daily_total_kWh"] - spent) if daily_bp else 0.0
492
+
493
+ if remaining <= 0:
494
+ logger.info("Re-plan skipped: no budget remaining")
495
+ return False
496
+
497
+ try:
498
+ from src.day_ahead_planner import DayAheadPlanner
499
+
500
+ temps, ghis = self._build_persistence_forecast()
501
+
502
+ planner = DayAheadPlanner()
503
+ plan_obj = planner.plan_day(today, temps, ghis, max(remaining, 0.01))
504
+ plan = plan_obj.to_dict()
505
+
506
+ # Save for reuse
507
+ self.plan_path.parent.mkdir(parents=True, exist_ok=True)
508
+ with open(self.plan_path, "w") as f:
509
+ json.dump(plan, f, indent=2)
510
+
511
+ self._current_plan = plan
512
+ self._last_replan_slot = slot_index
513
+ self._divergence_cumulative_kwh = 0.0
514
+ self._divergence_consecutive = 0
515
+ self._replan_count += 1
516
+
517
+ n_slots = len(plan.get("slots", []))
518
+ logger.info(
519
+ "Re-plan #%d at slot %d: %d slots, %.4f kWh remaining budget",
520
+ self._replan_count, slot_index, n_slots, remaining,
521
+ )
522
+ return True
523
+ except Exception as exc:
524
+ logger.error("Re-plan failed: %s", exc)
525
+ return False
526
+
527
+ # ------------------------------------------------------------------
528
+ # Main tick
529
+ # ------------------------------------------------------------------
530
+
531
+ def tick(self, timestamp: Optional[datetime] = None) -> TickResult:
532
+ """Execute one control loop cycle.
533
+
534
+ Parameters
535
+ ----------
536
+ timestamp : datetime, optional
537
+ Override current time (for simulation/replay).
538
+ """
539
+ now = timestamp or datetime.now(tz=timezone.utc)
540
+ slot_index = now.hour * 4 + now.minute // 15
541
+
542
+ result = TickResult(timestamp=now, slot_index=slot_index)
543
+
544
+ # 1. Load plan if needed
545
+ today = now.date() if hasattr(now, 'date') else date.today()
546
+ if (not self._current_plan or
547
+ self._current_plan.get("target_date") != str(today)):
548
+ self.load_plan(today)
549
+
550
+ # 2. Fetch live weather
551
+ try:
552
+ wx = self.hub.weather.get_current()
553
+ if "error" not in wx:
554
+ result.air_temp_c = wx.get("air_temperature_c")
555
+ result.ghi_w_m2 = wx.get("ghi_w_m2")
556
+ result.wind_speed_ms = wx.get("wind_speed_ms")
557
+ except Exception as exc:
558
+ logger.warning("Weather fetch failed: %s", exc)
559
+
560
+ # 2b. Route model selection (FvCB vs ML) based on live conditions
561
+ try:
562
+ telemetry = {
563
+ "temp_c": result.air_temp_c,
564
+ "ghi_w_m2": result.ghi_w_m2,
565
+ "hour": now.hour,
566
+ }
567
+ result.model_route = self.router.route(telemetry)
568
+ except Exception as exc:
569
+ logger.debug("Model routing failed: %s", exc)
570
+ result.model_route = "fvcb"
571
+
572
+ # 3. Get astronomical tracking angle
573
+ theta_astro = self.astro.get_angle(now)
574
+
575
+ # 4. Look up plan for this slot
576
+ slot_plan = self._get_slot_plan(slot_index)
577
+ if slot_plan:
578
+ result.plan_offset_deg = slot_plan.get("offset_deg", 0.0)
579
+ result.plan_gate_passed = slot_plan.get("gate_passed", False)
580
+ result.energy_cost_kwh = slot_plan.get("energy_cost_kwh", 0.0)
581
+ result.stage_id = self._current_plan.get("stage_id", "unknown")
582
+ else:
583
+ logger.debug("No plan slot for index %d — defaulting to astronomical", slot_index)
584
+
585
+ # 5. Live gate check — override plan if conditions diverged
586
+ # Intentionally simpler than DayAheadPlanner._check_gate():
587
+ # the planner has forecast CWSI + FvCB shading_helps; the live gate
588
+ # only checks real-time temp and GHI as hard constraints.
589
+ planned_offset = result.plan_offset_deg
590
+ live_offset = planned_offset # default: follow the plan
591
+
592
+ if result.air_temp_c is not None:
593
+ from config.settings import (
594
+ NO_SHADE_BEFORE_HOUR,
595
+ SEMILLON_TRANSITION_TEMP_C,
596
+ SHADE_ELIGIBLE_GHI_ABOVE,
597
+ )
598
+
599
+ if planned_offset > 0:
600
+ blocked = False
601
+ reason = ""
602
+
603
+ if now.hour < NO_SHADE_BEFORE_HOUR:
604
+ blocked, reason = True, "morning — no shading before 10:00"
605
+ elif result.air_temp_c < SEMILLON_TRANSITION_TEMP_C:
606
+ blocked, reason = True, f"temp {result.air_temp_c:.0f}°C < {SEMILLON_TRANSITION_TEMP_C:.0f}°C"
607
+ elif result.ghi_w_m2 is not None and result.ghi_w_m2 < SHADE_ELIGIBLE_GHI_ABOVE:
608
+ blocked, reason = True, f"GHI {result.ghi_w_m2:.0f} < {SHADE_ELIGIBLE_GHI_ABOVE:.0f}"
609
+
610
+ if blocked:
611
+ live_offset = 0.0
612
+ result.live_override = True
613
+ result.override_reason = reason
614
+ logger.info("Live override: plan offset %.0f° → 0° (%s)",
615
+ planned_offset, reason)
616
+
617
+ result.live_gate_passed = live_offset > 0
618
+
619
+ # 5b. Budget guard — block intervention if daily budget exhausted
620
+ if live_offset > 0:
621
+ daily_bp = self._ensure_daily_budget(today)
622
+ if daily_bp:
623
+ sk = self._slot_key(now)
624
+ slot_remaining = daily_bp["slot_budgets"].get(sk, 0.0)
625
+ margin_remaining = daily_bp["daily_margin_remaining_kWh"]
626
+ if slot_remaining + margin_remaining <= 0:
627
+ live_offset = 0.0
628
+ result.live_override = True
629
+ result.override_reason = "daily energy budget exhausted"
630
+ logger.info("Budget guard: forcing astronomical (budget depleted)")
631
+
632
+ # 6. Build engine result for arbiter
633
+ target_angle = theta_astro + live_offset
634
+ engine_result = {
635
+ "angle": target_angle,
636
+ "action": f"plan_offset_{live_offset:.0f}deg",
637
+ }
638
+
639
+ # Check operational modes (wind stow, heat shield, harvest)
640
+ mode_override = self.modes.check_all(
641
+ wind_speed_ms=result.wind_speed_ms,
642
+ air_temp_c=result.air_temp_c,
643
+ theta_astro=theta_astro,
644
+ current_date=today,
645
+ )
646
+ weather_override = mode_override.to_weather_override() if mode_override else None
647
+
648
+ # 7. Arbitrate
649
+ decision = self.arbiter.arbitrate(
650
+ timestamp=now,
651
+ engine_result=engine_result,
652
+ theta_astro=theta_astro,
653
+ weather_override=weather_override,
654
+ )
655
+
656
+ result.target_angle = decision.angle
657
+ result.dispatch = decision.dispatch
658
+ result.source = decision.source.value if hasattr(decision.source, 'value') else str(decision.source)
659
+
660
+ # 7b. Resolve per-tracker fleet overrides (rare; most ticks return {})
661
+ fleet_overrides = self._resolve_fleet_overrides(now, theta_astro)
662
+ if fleet_overrides:
663
+ result.fleet_overrides = fleet_overrides
664
+ logger.info("Fleet overrides active: %s", fleet_overrides)
665
+
666
+ # 8. Dispatch to trackers
667
+ if decision.dispatch:
668
+ try:
669
+ dispatch_result = self.dispatcher.dispatch(
670
+ decision, angle_overrides=fleet_overrides or None,
671
+ )
672
+ result.trackers_verified = dispatch_result.n_success
673
+ result.trackers_total = len(dispatch_result.trackers)
674
+ if not dispatch_result.all_verified:
675
+ failed = [t.device_name for t in dispatch_result.trackers if not t.verified]
676
+ result.dispatch_error = f"failed: {', '.join(failed)}"
677
+ except Exception as exc:
678
+ result.dispatch_error = str(exc)
679
+ logger.error("Dispatch failed: %s", exc)
680
+
681
+ # 9. Spend energy budget
682
+ if result.energy_cost_kwh > 0:
683
+ daily_bp = self._ensure_daily_budget(today)
684
+ if daily_bp:
685
+ sk = self._slot_key(now)
686
+ result.budget_spent_kwh = self.budget_planner.spend_slot(
687
+ daily_bp, sk, result.energy_cost_kwh,
688
+ )
689
+ result.budget_remaining_kwh = (
690
+ sum(daily_bp["slot_budgets"].values())
691
+ + daily_bp["daily_margin_remaining_kWh"]
692
+ )
693
+ # Persist updated budget to Redis
694
+ self._persist_budget(today)
695
+
696
+ if result.budget_spent_kwh < result.energy_cost_kwh:
697
+ logger.warning(
698
+ "Budget shortfall: requested %.4f kWh, spent %.4f kWh (slot %s)",
699
+ result.energy_cost_kwh, result.budget_spent_kwh, sk,
700
+ )
701
+
702
+ # 10. Check plan divergence and trigger re-plan if needed
703
+ if slot_plan:
704
+ actual_offset = live_offset if not result.live_override else 0.0
705
+ needs_replan = self._check_plan_divergence(
706
+ slot_index=slot_index,
707
+ planned_offset=result.plan_offset_deg,
708
+ actual_offset=actual_offset,
709
+ planned_cost=slot_plan.get("energy_cost_kwh", 0.0),
710
+ actual_cost=result.energy_cost_kwh,
711
+ )
712
+ result.divergence_cumulative_kwh = self._divergence_cumulative_kwh
713
+ result.divergence_consecutive = self._divergence_consecutive
714
+ if needs_replan:
715
+ result.replan_triggered = self._trigger_replan(now, slot_index)
716
+
717
+ # 11. Log
718
+ self._tick_log.append(result.to_dict())
719
+ logger.info(
720
+ "Tick %02d:%02d slot=%d angle=%.1f° offset=%.0f° dispatch=%s source=%s"
721
+ " budget_remaining=%.3f kWh%s",
722
+ now.hour, now.minute, slot_index, decision.angle,
723
+ live_offset, decision.dispatch, decision.source,
724
+ result.budget_remaining_kwh,
725
+ f" [OVERRIDE: {result.override_reason}]" if result.live_override else "",
726
+ )
727
+
728
+ return result
729
+
730
+ # ------------------------------------------------------------------
731
+ # Continuous run
732
+ # ------------------------------------------------------------------
733
+
734
+ def run(self, max_ticks: Optional[int] = None) -> None:
735
+ """Run the control loop continuously (blocking).
736
+
737
+ Parameters
738
+ ----------
739
+ max_ticks : int, optional
740
+ Stop after this many ticks (for testing). None = run forever.
741
+ """
742
+ logger.info("Control loop starting (dry_run=%s)", self.dry_run)
743
+ tick_count = 0
744
+
745
+ while max_ticks is None or tick_count < max_ticks:
746
+ try:
747
+ result = self.tick()
748
+ tick_count += 1
749
+ except Exception as exc:
750
+ logger.error("Tick failed: %s", exc)
751
+
752
+ # Sleep until next 15-min boundary
753
+ now = datetime.now(tz=timezone.utc)
754
+ next_slot = now.replace(
755
+ minute=(now.minute // DP_SLOT_DURATION_MIN + 1) * DP_SLOT_DURATION_MIN % 60,
756
+ second=0, microsecond=0,
757
+ )
758
+ if next_slot <= now:
759
+ next_slot += timedelta(hours=1)
760
+ sleep_sec = (next_slot - now).total_seconds()
761
+ logger.debug("Sleeping %.0f s until %s", sleep_sec, next_slot)
762
+ time.sleep(max(sleep_sec, 1.0))
763
+
764
+ # ------------------------------------------------------------------
765
+ # Log access
766
+ # ------------------------------------------------------------------
767
+
768
+ def get_log(self) -> List[dict]:
769
+ """Return all tick results from this session."""
770
+ return list(self._tick_log)
771
+
772
+ def save_log(self, path: Optional[Path] = None) -> Path:
773
+ """Save tick log to JSON file."""
774
+ out = path or self.log_path.with_suffix(".json")
775
+ out.parent.mkdir(parents=True, exist_ok=True)
776
+ with open(out, "w") as f:
777
+ json.dump(self._tick_log, f, indent=2, default=str)
778
+ logger.info("Saved %d tick results to %s", len(self._tick_log), out)
779
+ return out
src/data/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Data access: IMS, sensors, schema, ThingsBoard, data providers."""
src/data/data_providers.py ADDED
@@ -0,0 +1,1180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Data provider layer for the VineyardChatbot.
3
+
4
+ Architecture
5
+ ------------
6
+ Each data domain gets a **Service** class that encapsulates:
7
+ - data fetching (IMS API, ThingsBoard API, model inference, ...)
8
+ - caching / TTL logic
9
+ - error handling (returns dict with "error" key on failure)
10
+ - serialisation to LLM-friendly dicts
11
+
12
+ Services are registered on a lightweight **DataHub** which is injected
13
+ into the chatbot. The chatbot's tool methods become thin one-liners
14
+ that delegate to ``self.hub.<service>.<method>()``.
15
+
16
+ ┌────────────────────┐
17
+ │ VineyardChatbot │
18
+ │ (tool dispatch) │
19
+ └────────┬───────────┘
20
+ │ self.hub
21
+ ┌────────▼───────────┐
22
+ │ DataHub │
23
+ │ (service registry) │
24
+ └────────┬───────────┘
25
+ ┌──────────┬────────┼────────┬──────────┐
26
+ ▼ ▼ ▼ ▼ ▼
27
+ WeatherSvc VineSensorSvc PSSvc EnergySvc BiologySvc
28
+ │ │ │ │ │
29
+ IMSClient TB Client Farquhar TB+Analytical rules dict
30
+ ML Pred
31
+
32
+ Loose coupling guarantees:
33
+ - The chatbot never imports IMS / TB / Farquhar / ML directly.
34
+ - Each service can be unit-tested in isolation (pass a mock client).
35
+ - Adding a new data source = write a new Service + register it.
36
+ - Services own their TTL caches — the chatbot is stateless w.r.t. data.
37
+ """
38
+
39
+ from __future__ import annotations
40
+
41
+ import time
42
+ import traceback
43
+ from abc import ABC, abstractmethod
44
+ from dataclasses import dataclass, field
45
+ from datetime import date, datetime, timedelta, timezone
46
+ from typing import Any, Dict, List, Optional
47
+
48
+ import numpy as np
49
+ import pandas as pd
50
+
51
+
52
+ # ═══════════════════════════════════════════════════════════════════════
53
+ # TTL Cache helper
54
+ # ═══════════════════════════════════════════════════════════════════════
55
+
56
+ @dataclass
57
+ class _CacheEntry:
58
+ value: Any
59
+ expires_at: float # monotonic clock
60
+
61
+
62
+ class TTLCache:
63
+ """TTL cache with optional Redis backend.
64
+
65
+ When Redis is available (``UPSTASH_REDIS_URL`` set), values are stored
66
+ in Redis so multiple processes (API server, workers) share state.
67
+ Falls back to in-memory when Redis is unavailable — Streamlit keeps
68
+ working exactly as before.
69
+ """
70
+
71
+ def __init__(self, ttl_seconds: float = 300, redis_prefix: str = ""):
72
+ self.ttl = ttl_seconds
73
+ self._prefix = redis_prefix
74
+ self._store: Dict[str, _CacheEntry] = {}
75
+ # Lazy Redis lookup (avoid import-time side effects)
76
+ self._redis_checked = False
77
+ self._redis = None
78
+
79
+ def _get_redis(self):
80
+ if not self._redis_checked:
81
+ self._redis_checked = True
82
+ try:
83
+ from src.data.redis_cache import get_redis
84
+ self._redis = get_redis()
85
+ except Exception:
86
+ self._redis = None
87
+ return self._redis
88
+
89
+ def _rkey(self, key: str) -> str:
90
+ return f"{self._prefix}{key}" if self._prefix else key
91
+
92
+ def get(self, key: str) -> Any | None:
93
+ # Try Redis first
94
+ redis = self._get_redis()
95
+ if redis:
96
+ val = redis.get_json(self._rkey(key))
97
+ if val is not None:
98
+ return val
99
+ # Fall back to in-memory
100
+ entry = self._store.get(key)
101
+ if entry and time.monotonic() < entry.expires_at:
102
+ return entry.value
103
+ return None
104
+
105
+ def set(self, key: str, value: Any) -> None:
106
+ # Write to Redis if available
107
+ redis = self._get_redis()
108
+ if redis:
109
+ redis.set_json(self._rkey(key), value, ttl=int(self.ttl))
110
+ # Always write in-memory too (local fast path)
111
+ self._store[key] = _CacheEntry(value=value, expires_at=time.monotonic() + self.ttl)
112
+
113
+ def invalidate(self, key: str) -> None:
114
+ redis = self._get_redis()
115
+ if redis:
116
+ redis.delete(self._rkey(key))
117
+ self._store.pop(key, None)
118
+
119
+
120
+ # ═══════════════════════════════════════════════════════════════════════
121
+ # LLM-friendly summarisation
122
+ # ══��════════════════════════════════════════════════════════════════════
123
+
124
+ def summarise_dataframe(df: pd.DataFrame, max_rows: int = 48) -> Dict[str, Any]:
125
+ """Compress a DataFrame to key stats when it exceeds *max_rows*.
126
+
127
+ Returns a dict with ``rows`` (list of dicts) if small enough, or
128
+ ``summary`` (per-column min/max/mean/trend) if too large.
129
+ """
130
+ if df.empty:
131
+ return {"rows": [], "note": "No data available."}
132
+
133
+ if len(df) <= max_rows:
134
+ records = df.reset_index().to_dict(orient="records")
135
+ for r in records:
136
+ for k, v in list(r.items()):
137
+ if isinstance(v, (pd.Timestamp, datetime)):
138
+ r[k] = str(v)
139
+ elif isinstance(v, (float, np.floating)):
140
+ r[k] = round(float(v), 2)
141
+ return {"rows": records, "row_count": len(records)}
142
+
143
+ # Summarise
144
+ summary: Dict[str, Any] = {"row_count": len(df), "summarised": True, "columns": {}}
145
+ numeric = df.select_dtypes(include=[np.number])
146
+ for col in numeric.columns:
147
+ s = numeric[col].dropna()
148
+ if s.empty:
149
+ continue
150
+ summary["columns"][col] = {
151
+ "min": round(float(s.min()), 2),
152
+ "max": round(float(s.max()), 2),
153
+ "mean": round(float(s.mean()), 2),
154
+ "first": round(float(s.iloc[0]), 2),
155
+ "last": round(float(s.iloc[-1]), 2),
156
+ }
157
+
158
+ # Time range
159
+ if isinstance(df.index, pd.DatetimeIndex):
160
+ summary["time_range"] = {"start": str(df.index.min()), "end": str(df.index.max())}
161
+ return summary
162
+
163
+
164
+ # ═══════════════════════════════════════════════════════════════════════
165
+ # Service base class
166
+ # ═══════════════════════════════════════════════════════════════════════
167
+
168
+ class BaseService(ABC):
169
+ """Abstract base for all data-provider services.
170
+
171
+ Subclasses must implement ``service_name`` (used as registry key).
172
+ All public methods should return plain dicts (JSON-serialisable)
173
+ so the chatbot can forward them to the LLM without conversion.
174
+ """
175
+
176
+ @property
177
+ @abstractmethod
178
+ def service_name(self) -> str: ...
179
+
180
+
181
+ # ═══════════════════════════════════════════════════════════════════════
182
+ # 1. WeatherService (IMS station 43)
183
+ # ═══════════════════════════════════════════════════════════════════════
184
+
185
+ class WeatherService(BaseService):
186
+ """IMS weather data — cached CSV for history, latest row for 'now'."""
187
+
188
+ service_name = "weather"
189
+
190
+ def __init__(self, ims_client: Any = None, cache_ttl: float = 1800):
191
+ self._ims = ims_client # lazy
192
+ self._df_cache = TTLCache(ttl_seconds=cache_ttl, redis_prefix="weather:")
193
+
194
+ # -- lazy client --
195
+
196
+ def _client(self):
197
+ if self._ims is None:
198
+ from src.ims_client import IMSClient
199
+ self._ims = IMSClient()
200
+ return self._ims
201
+
202
+ def _load_df(self) -> pd.DataFrame:
203
+ cached = self._df_cache.get("ims")
204
+ if cached is not None:
205
+ return cached
206
+ df = self._client().load_cached()
207
+ if not df.empty:
208
+ self._df_cache.set("ims", df)
209
+ return df
210
+
211
+ def get_dataframe(self) -> pd.DataFrame:
212
+ """Public accessor for the cached IMS DataFrame."""
213
+ return self._load_df()
214
+
215
+ # -- public API --
216
+
217
+ def _now_israel(self) -> Dict[str, str]:
218
+ """Current time in Yeruham (Asia/Jerusalem) for context in API responses."""
219
+ try:
220
+ from zoneinfo import ZoneInfo
221
+ tz = ZoneInfo("Asia/Jerusalem")
222
+ except ImportError:
223
+ tz = timezone(timedelta(hours=2))
224
+ now = datetime.now(tz)
225
+ return {
226
+ "current_time_israel": now.strftime("%H:%M"),
227
+ "current_date_israel": now.strftime("%Y-%m-%d"),
228
+ "current_datetime_israel": now.isoformat(),
229
+ }
230
+
231
+ def get_current(self) -> Dict[str, Any]:
232
+ """Latest IMS weather row with local time and staleness. Always includes current time (Yeruham) so callers can compare."""
233
+ try:
234
+ df = self._load_df()
235
+ if df.empty:
236
+ return {"error": "No cached IMS data available.", **self._now_israel()}
237
+ last = df.iloc[-1]
238
+
239
+ result: Dict[str, Any] = {
240
+ "timezone": "Asia/Jerusalem (Israel local, Yeruham/Sde Boker)",
241
+ **self._now_israel(),
242
+ }
243
+ try:
244
+ ts_utc = pd.to_datetime(last.get("timestamp_utc"), utc=True)
245
+ ts_local = ts_utc.tz_convert("Asia/Jerusalem")
246
+ now_utc = pd.Timestamp.now(tz="UTC")
247
+ result["timestamp_utc"] = ts_utc.isoformat()
248
+ result["timestamp_local"] = ts_local.isoformat()
249
+ result["age_minutes"] = round((now_utc - ts_utc).total_seconds() / 60, 1)
250
+ except Exception:
251
+ result["timestamp_utc"] = str(last.get("timestamp_utc", "unknown"))
252
+
253
+ for col in df.columns:
254
+ if col != "timestamp_utc":
255
+ val = last[col]
256
+ if pd.notna(val):
257
+ result[col] = round(float(val), 2) if isinstance(val, (int, float, np.floating)) else str(val)
258
+ return result
259
+ except Exception as exc:
260
+ return {"error": f"Could not load weather data: {exc}"}
261
+
262
+ def get_history(self, start_date: str, end_date: str) -> Dict[str, Any]:
263
+ """Hourly IMS summary for a date range (from cached CSV)."""
264
+ try:
265
+ df = self._load_df()
266
+ if df.empty:
267
+ return {"error": "No cached IMS data."}
268
+ if "timestamp_utc" in df.columns:
269
+ df = df.set_index(pd.to_datetime(df["timestamp_utc"], utc=True))
270
+ start = pd.Timestamp(start_date, tz="UTC")
271
+ end = pd.Timestamp(end_date, tz="UTC") + pd.Timedelta(days=1)
272
+ subset = df.loc[start:end]
273
+ if subset.empty:
274
+ return {"error": f"No data in range {start_date} to {end_date}."}
275
+ hourly = subset.resample("1h").mean(numeric_only=True)
276
+ return summarise_dataframe(hourly)
277
+ except Exception as exc:
278
+ return {"error": f"Weather history failed: {exc}"}
279
+
280
+
281
+ # ═══════════════════════════════════════════════════════════════════════
282
+ # 2. VineSensorService (ThingsBoard)
283
+ # ═══════════════════════════════════════════════════════════════════════
284
+
285
+ class VineSensorService(BaseService):
286
+ """On-site vine sensors via ThingsBoard — snapshot + time-series."""
287
+
288
+ service_name = "vine_sensors"
289
+
290
+ def __init__(self, tb_client: Any = None, snapshot_ttl: float = 300):
291
+ self._tb = tb_client # lazy
292
+ self._snap_cache = TTLCache(ttl_seconds=snapshot_ttl, redis_prefix="vine:")
293
+
294
+ def _client(self):
295
+ if self._tb is None:
296
+ from src.thingsboard_client import ThingsBoardClient
297
+ self._tb = ThingsBoardClient()
298
+ return self._tb
299
+
300
+ # -- public API --
301
+
302
+ def get_snapshot(self, light: bool = False,
303
+ mode: Optional[str] = None) -> Dict[str, Any]:
304
+ """Latest vine state (treatment vs reference), 5-min TTL.
305
+
306
+ Parameters
307
+ ----------
308
+ light : bool
309
+ If True, fetch only ~6 key devices instead of all 21.
310
+ mode : str, optional
311
+ "dashboard" = 4 devices only (air + soil + irrigation).
312
+ """
313
+ cache_key = mode or ("snap_light" if light else "snap")
314
+ cached = self._snap_cache.get(cache_key)
315
+ if cached is not None:
316
+ return cached
317
+ try:
318
+ snapshot = self._client().get_vine_snapshot(light=light, mode=mode)
319
+ result = snapshot.to_dict()
320
+ self._snap_cache.set(cache_key, result)
321
+ return result
322
+ except Exception as exc:
323
+ return {
324
+ "error": f"ThingsBoard unavailable: {exc}",
325
+ "hint": "Check THINGSBOARD_USERNAME/PASSWORD in .env",
326
+ }
327
+
328
+ def get_history(
329
+ self,
330
+ device_type: str = "crop",
331
+ area: str = "treatment",
332
+ hours_back: int = 24,
333
+ ) -> Dict[str, Any]:
334
+ """Hourly averages for a device group over the last N hours."""
335
+ from src.thingsboard_client import (
336
+ AIR_KEYS, CROP_KEYS, SOIL_KEYS, DEVICE_REGISTRY, VineArea,
337
+ )
338
+
339
+ key_map = {"air": AIR_KEYS, "crop": CROP_KEYS, "soil": SOIL_KEYS}
340
+ keys = key_map.get(device_type.lower())
341
+ if keys is None:
342
+ return {"error": f"Unknown device_type '{device_type}'. Use air/crop/soil."}
343
+
344
+ area_enum = {
345
+ "treatment": VineArea.TREATMENT,
346
+ "reference": VineArea.REFERENCE,
347
+ "ambient": VineArea.AMBIENT,
348
+ }.get(area.lower())
349
+ if area_enum is None:
350
+ return {"error": f"Unknown area '{area}'. Use treatment/reference/ambient."}
351
+
352
+ # Select matching devices
353
+ devices = [
354
+ name for name, info in DEVICE_REGISTRY.items()
355
+ if info.area == area_enum and name.lower().startswith(device_type.lower())
356
+ ]
357
+ if not devices:
358
+ return {"error": f"No {device_type} devices in {area} area."}
359
+
360
+ end = datetime.now(tz=timezone.utc)
361
+ start = end - timedelta(hours=hours_back)
362
+
363
+ try:
364
+ frames = []
365
+ for dev in devices:
366
+ df = self._client().get_timeseries(dev, keys, start, end)
367
+ if not df.empty:
368
+ df = df.add_prefix(f"{dev}_")
369
+ frames.append(df)
370
+ if not frames:
371
+ return {"error": "No time-series data returned from ThingsBoard."}
372
+ merged = pd.concat(frames, axis=1).sort_index()
373
+ hourly = merged.resample("1h").mean(numeric_only=True)
374
+ return summarise_dataframe(hourly)
375
+ except Exception as exc:
376
+ return {"error": f"Sensor history failed: {exc}"}
377
+
378
+
379
+ # ═══════════════════════════════════════════════════════════════════════
380
+ # 3. PhotosynthesisService (FvCB + ML + forecast)
381
+ # ═══════════════════════════════════════════════════════════════════════
382
+
383
+ class PhotosynthesisService(BaseService):
384
+ """Photosynthesis predictions — mechanistic, ML, and day-ahead."""
385
+
386
+ service_name = "photosynthesis"
387
+
388
+ def __init__(self):
389
+ self._farquhar = None
390
+ self._ml_predictor = None
391
+ self._shadow = None
392
+ self._canopy = None
393
+
394
+ # -- lazy loaders --
395
+
396
+ def _get_farquhar(self):
397
+ if self._farquhar is None:
398
+ from src.farquhar_model import FarquharModel
399
+ self._farquhar = FarquharModel()
400
+ return self._farquhar
401
+
402
+ def _get_shadow(self):
403
+ if self._shadow is None:
404
+ from src.solar_geometry import ShadowModel
405
+ self._shadow = ShadowModel()
406
+ return self._shadow
407
+
408
+ def _get_canopy(self):
409
+ if self._canopy is None:
410
+ from src.canopy_photosynthesis import CanopyPhotosynthesisModel
411
+ self._canopy = CanopyPhotosynthesisModel(
412
+ shadow_model=self._get_shadow(),
413
+ farquhar_model=self._get_farquhar(),
414
+ )
415
+ return self._canopy
416
+
417
+ # -- public API --
418
+
419
+ def predict_fvcb(
420
+ self, PAR: float, Tleaf: float, CO2: float, VPD: float, Tair: float,
421
+ ) -> Dict[str, Any]:
422
+ """Single-point Farquhar model prediction with limiting factor."""
423
+ model = self._get_farquhar()
424
+ A = model.calc_photosynthesis(PAR=PAR, Tleaf=Tleaf, CO2=CO2, VPD=VPD, Tair=Tair)
425
+
426
+ Tk = Tleaf + 273.15
427
+ Vcmax = model.calc_Vcmax(Tk)
428
+ Jmax = model.calc_Jmax(Tk)
429
+ gamma_star = model.calc_gamma_star(Tk)
430
+ Kc = model.calc_Kc(Tk)
431
+ Ko = model.calc_Ko(Tk)
432
+ ci = model._ci_from_ca(CO2, VPD)
433
+ J = model.calc_electron_transport(PAR, Jmax)
434
+ Ac = Vcmax * (ci - gamma_star) / (ci + Kc * (1.0 + 210.0 / Ko))
435
+ Aj = J * (ci - gamma_star) / (4.0 * ci + 8.0 * gamma_star)
436
+
437
+ limiting = ("Rubisco-limited (high temperature is the bottleneck)"
438
+ if Ac < Aj else
439
+ "RuBP-limited (light is the bottleneck)")
440
+ shading_helps = Tleaf > 30.0
441
+
442
+ return {
443
+ "A_net": round(A, 3),
444
+ "units": "umol CO2 m-2 s-1",
445
+ "limiting_factor": limiting,
446
+ "Tleaf": Tleaf,
447
+ "shading_would_help": shading_helps,
448
+ "model": "fvcb",
449
+ "note": ("Shading may help reduce heat stress" if shading_helps
450
+ else "Shading would reduce photosynthesis (vine needs light)"),
451
+ }
452
+
453
+ def predict_ml(self, features: Optional[Dict[str, float]] = None) -> Dict[str, Any]:
454
+ """ML ensemble prediction. Auto-fills features from latest IMS if not provided.
455
+
456
+ Trains the model once on first call (lazy), then caches it.
457
+ """
458
+ try:
459
+ predictor, feature_cols, best_name = self._ensure_ml_predictor()
460
+ except Exception as exc:
461
+ return {"error": f"ML predictor unavailable: {exc}"}
462
+
463
+ try:
464
+ if features:
465
+ row = {col: features.get(col, 0.0) for col in feature_cols}
466
+ else:
467
+ row = self._auto_fill_features(feature_cols)
468
+ if row is None:
469
+ return {"error": "No IMS data available to auto-fill features."}
470
+
471
+ import pandas as _pd
472
+ X = _pd.DataFrame([row])[feature_cols]
473
+ model = predictor.models[best_name]
474
+ pred = float(model.predict(X)[0])
475
+ metrics = predictor.results.get(best_name, {})
476
+
477
+ return {
478
+ "A_net_predicted": round(pred, 3),
479
+ "units": "umol CO2 m-2 s-1",
480
+ "model": best_name,
481
+ "model_mae": round(metrics.get("mae", 0), 3),
482
+ "model_r2": round(metrics.get("r2", 0), 3),
483
+ "features_used": {k: round(v, 2) for k, v in row.items()},
484
+ "note": "Prediction from ML ensemble trained on IMS weather features.",
485
+ }
486
+ except Exception as exc:
487
+ return {"error": f"ML prediction failed: {exc}"}
488
+
489
+ def _ensure_ml_predictor(self):
490
+ """Train the ML predictor once and cache it. Returns (predictor, feature_cols, best_name)."""
491
+ if self._ml_predictor is not None:
492
+ return self._ml_predictor
493
+
494
+ from src.ims_client import IMSClient
495
+ from src.farquhar_model import FarquharModel
496
+ from src.preprocessor import Preprocessor
497
+ from src.predictor import PhotosynthesisPredictor
498
+
499
+ ims = IMSClient()
500
+ ims_df = ims.load_cached()
501
+ if ims_df.empty:
502
+ raise RuntimeError("No IMS cache data — cannot train ML predictor.")
503
+
504
+ # Compute Stage 1 labels (A) from sensor data
505
+ from src.sensor_data_loader import SensorDataLoader
506
+ loader = SensorDataLoader()
507
+ sensor_df = loader.load()
508
+ fvcb = FarquharModel()
509
+ labels = fvcb.compute_all(sensor_df)
510
+ labels.name = "A"
511
+
512
+ # Ensure labels have a datetime index for merge
513
+ if "time" in sensor_df.columns:
514
+ ts = pd.to_datetime(sensor_df["time"], utc=True)
515
+ labels.index = ts
516
+
517
+ # Preprocess: merge, time features, split
518
+ prep = Preprocessor()
519
+ merged = prep.merge_ims_with_labels(ims_df, labels)
520
+ if merged.empty:
521
+ raise RuntimeError("Merge of IMS + labels produced empty DataFrame.")
522
+ merged = prep.create_time_features(merged)
523
+ X_train, y_train, X_test, y_test = prep.temporal_split(merged)
524
+ if X_train.empty:
525
+ raise RuntimeError("Not enough data to train ML predictor.")
526
+
527
+ predictor = PhotosynthesisPredictor()
528
+ predictor.train(X_train, y_train)
529
+ if not X_test.empty:
530
+ predictor.evaluate(X_test, y_test)
531
+
532
+ best_name = "GradientBoosting"
533
+ if predictor.results:
534
+ best_name = min(predictor.results, key=lambda n: predictor.results[n].get("mae", 999))
535
+
536
+ feature_cols = list(X_train.columns)
537
+ self._ml_predictor = (predictor, feature_cols, best_name)
538
+ return self._ml_predictor
539
+
540
+ def _auto_fill_features(self, feature_cols: List[str]) -> Optional[Dict[str, float]]:
541
+ """Fill feature vector from the latest IMS cache row + time features."""
542
+ try:
543
+ from src.ims_client import IMSClient
544
+ from src.time_features import add_cyclical_time_features
545
+ ims = IMSClient()
546
+ df = ims.load_cached()
547
+ if df.empty:
548
+ return None
549
+ last_row_df = df.tail(1).copy()
550
+ last_row_df = add_cyclical_time_features(last_row_df, timestamp_col="timestamp_utc")
551
+ ts = pd.to_datetime(last_row_df["timestamp_utc"].iloc[0], utc=True)
552
+ last_row_df["month"] = ts.month
553
+ last_row_df["day_of_year"] = ts.day_of_year
554
+ row = {}
555
+ for col in feature_cols:
556
+ if col in last_row_df.columns:
557
+ val = last_row_df[col].iloc[0]
558
+ row[col] = float(val) if pd.notna(val) else 0.0
559
+ else:
560
+ row[col] = 0.0
561
+ return row
562
+ except Exception:
563
+ return None
564
+
565
+ def forecast_day_ahead(self, target_date: Optional[str] = None) -> Dict[str, Any]:
566
+ """24h A profile using FvCB model over IMS weather data.
567
+
568
+ For each daytime hour, computes A from IMS temperature/GHI/humidity
569
+ using typical vine conditions. Falls back to FvCB-based projection
570
+ when Chronos or ML forecast is unavailable.
571
+ """
572
+ try:
573
+ from src.ims_client import IMSClient
574
+ ims = IMSClient()
575
+ df = ims.load_cached()
576
+ if df.empty:
577
+ return {"error": "No IMS data cached for PS forecast."}
578
+
579
+ if "timestamp_utc" in df.columns:
580
+ df["timestamp_utc"] = pd.to_datetime(df["timestamp_utc"], utc=True)
581
+ df = df.set_index("timestamp_utc")
582
+
583
+ target = target_date or str(date.today())
584
+ try:
585
+ day_start = pd.Timestamp(target, tz="UTC")
586
+ day_end = day_start + pd.Timedelta(days=1)
587
+ day_df = df.loc[day_start:day_end]
588
+ except Exception:
589
+ day_df = pd.DataFrame()
590
+
591
+ # If target date not in cache, use last available day
592
+ if day_df.empty:
593
+ day_df = df.tail(96) # ~24h of 15-min data
594
+ if day_df.empty:
595
+ return {"error": "Not enough IMS data for forecast."}
596
+ target = str(day_df.index[-1].date())
597
+
598
+ hourly = day_df.resample("1h").mean(numeric_only=True)
599
+ model = self._get_farquhar()
600
+
601
+ # Map IMS columns (try exact settings names first, then fuzzy match)
602
+ def _find_col(df_cols, exact_names, fuzzy_terms, exclude_terms=()):
603
+ for name in exact_names:
604
+ if name in df_cols:
605
+ return name
606
+ for c in df_cols:
607
+ cl = c.lower()
608
+ if any(t in cl for t in fuzzy_terms) and not any(t in cl for t in exclude_terms):
609
+ return c
610
+ return None
611
+
612
+ temp_col = _find_col(hourly.columns, ["air_temperature_c"], ["temp"], ["dew", "soil"])
613
+ ghi_col = _find_col(hourly.columns, ["ghi_w_m2"], ["ghi", "rad", "irrad"])
614
+ rh_col = _find_col(hourly.columns, ["rh_percent"], ["rh", "humid"])
615
+
616
+ hourly_results = []
617
+ for idx, row in hourly.iterrows():
618
+ hour = idx.hour if hasattr(idx, "hour") else 0
619
+ if hour < 6 or hour > 19:
620
+ continue
621
+
622
+ Tair = float(row[temp_col]) if temp_col and pd.notna(row.get(temp_col)) else 25.0
623
+ Tleaf = Tair + 2.0 # leaf typically ~2C above air
624
+ ghi = float(row[ghi_col]) if ghi_col and pd.notna(row.get(ghi_col)) else 0.0
625
+ PAR = ghi * 2.0 # approximate PAR from GHI (umol/m2/s ~ 2x W/m2)
626
+ rh = float(row[rh_col]) if rh_col and pd.notna(row.get(rh_col)) else 40.0
627
+
628
+ # Estimate VPD from T and RH
629
+ es = 0.6108 * np.exp(17.27 * Tair / (Tair + 237.3))
630
+ VPD = max(es * (1 - rh / 100), 0.1)
631
+
632
+ if PAR < 50:
633
+ A = 0.0
634
+ limiting = "dark"
635
+ else:
636
+ A = model.calc_photosynthesis(PAR=PAR, Tleaf=Tleaf, CO2=400.0, VPD=VPD, Tair=Tair)
637
+ limiting = "rubisco" if Tleaf > 30 else "rubp"
638
+
639
+ hourly_results.append({
640
+ "hour": hour,
641
+ "A_predicted": round(A, 2),
642
+ "Tair": round(Tair, 1),
643
+ "PAR": round(PAR, 0),
644
+ "VPD": round(VPD, 2),
645
+ "limiting": limiting,
646
+ "shading_helps": Tleaf > 30.0,
647
+ })
648
+
649
+ if not hourly_results:
650
+ return {"error": "No daytime hours available in forecast range."}
651
+
652
+ peak = max(hourly_results, key=lambda r: r["A_predicted"])
653
+ total_A = sum(r["A_predicted"] for r in hourly_results)
654
+ stress_hours = sum(1 for r in hourly_results if r["limiting"] == "rubisco")
655
+
656
+ return {
657
+ "date": target,
658
+ "method": "fvcb_projection",
659
+ "hourly": hourly_results,
660
+ "peak_A": peak["A_predicted"],
661
+ "peak_hour": peak["hour"],
662
+ "daily_total_A": round(total_A, 1),
663
+ "rubisco_limited_hours": stress_hours,
664
+ "note": "FvCB-based projection from IMS weather data. "
665
+ "PAR estimated as 2x GHI. Leaf temp estimated as Tair+2C.",
666
+ }
667
+ except Exception as exc:
668
+ return {"error": f"PS forecast failed: {exc}"}
669
+
670
+ def simulate_shading(
671
+ self,
672
+ angle_offset: float,
673
+ hour: int,
674
+ date_str: Optional[str] = None,
675
+ ) -> Dict[str, Any]:
676
+ """Compare A at astronomical tracking vs offset angle."""
677
+ shadow = self._get_shadow()
678
+ canopy = self._get_canopy()
679
+
680
+ dt_str = date_str or str(date.today())
681
+ try:
682
+ dt = pd.Timestamp(f"{dt_str} {hour:02d}:00:00", tz="Asia/Jerusalem")
683
+ except Exception:
684
+ dt = pd.Timestamp(f"{date.today()} {hour:02d}:00:00", tz="Asia/Jerusalem")
685
+
686
+ solar_pos = shadow.get_solar_position(pd.DatetimeIndex([dt]))
687
+ elev = float(solar_pos["solar_elevation"].iloc[0])
688
+ azim = float(solar_pos["solar_azimuth"].iloc[0])
689
+
690
+ if elev <= 2.0:
691
+ return {"error": f"Sun below horizon at hour {hour} (elevation {elev:.1f}\u00b0)."}
692
+
693
+ tracker = shadow.compute_tracker_tilt(azim, elev)
694
+ astro_tilt = tracker["tracker_theta"]
695
+
696
+ PAR, Tleaf, CO2, VPD, Tair = 1800.0, 32.0, 400.0, 2.5, 33.0
697
+
698
+ mask_un = shadow.project_shadow(elev, azim, astro_tilt)
699
+ res_un = canopy.compute_vine_A(
700
+ par=PAR, Tleaf=Tleaf, CO2=CO2, VPD=VPD, Tair=Tair,
701
+ shadow_mask=mask_un, solar_elevation=elev,
702
+ solar_azimuth=azim, tracker_tilt=astro_tilt,
703
+ )
704
+
705
+ shaded_tilt = astro_tilt + angle_offset
706
+ mask_sh = shadow.project_shadow(elev, azim, shaded_tilt)
707
+ res_sh = canopy.compute_vine_A(
708
+ par=PAR, Tleaf=Tleaf, CO2=CO2, VPD=VPD, Tair=Tair,
709
+ shadow_mask=mask_sh, solar_elevation=elev,
710
+ solar_azimuth=azim, tracker_tilt=shaded_tilt,
711
+ )
712
+
713
+ A_un = res_un["A_vine"]
714
+ A_sh = res_sh["A_vine"]
715
+ change = ((A_sh - A_un) / A_un * 100) if A_un > 0 else 0
716
+
717
+ return {
718
+ "hour": hour, "date": dt_str, "angle_offset": angle_offset,
719
+ "solar_elevation": round(elev, 1),
720
+ "A_unshaded": round(A_un, 3), "A_shaded": round(A_sh, 3),
721
+ "A_change_pct": round(change, 1),
722
+ "sunlit_fraction_unshaded": round(res_un["sunlit_fraction"], 3),
723
+ "sunlit_fraction_shaded": round(res_sh["sunlit_fraction"], 3),
724
+ "tracker_tilt_astronomical": round(astro_tilt, 1),
725
+ "tracker_tilt_shaded": round(shaded_tilt, 1),
726
+ }
727
+
728
+ def compare_angles(self, angles: Optional[List[int]] = None) -> Dict[str, Any]:
729
+ """Compare A and energy across tilt angle offsets."""
730
+ try:
731
+ from src.tracker_optimizer import simulate_tilt_angles, load_sensor_data
732
+ df = load_sensor_data()
733
+ result_df = simulate_tilt_angles(df, angles=angles)
734
+ records = result_df.to_dict(orient="records")
735
+ for r in records:
736
+ for k, v in r.items():
737
+ if isinstance(v, (float, np.floating)):
738
+ r[k] = round(float(v), 2)
739
+ return {"angles": records}
740
+ except Exception as exc:
741
+ return {"error": f"Angle comparison failed: {exc}"}
742
+
743
+ def daily_schedule(
744
+ self, stress_threshold: float = 2.0, shade_angle: int = 20,
745
+ ) -> Dict[str, Any]:
746
+ """Hourly shading schedule based on leaf-air temperature stress."""
747
+ try:
748
+ from src.tracker_optimizer import compute_daily_schedule, load_sensor_data
749
+ df = load_sensor_data()
750
+ last_date = df["date"].max()
751
+ day_df = df[df["date"] == last_date].copy()
752
+ if day_df.empty:
753
+ return {"error": "No sensor data available for schedule."}
754
+ result_df = compute_daily_schedule(
755
+ day_df, stress_threshold=stress_threshold, shade_angle=shade_angle,
756
+ )
757
+ records = result_df.to_dict(orient="records")
758
+ for r in records:
759
+ for k, v in list(r.items()):
760
+ if isinstance(v, (float, np.floating)):
761
+ r[k] = round(float(v), 2)
762
+ elif isinstance(v, (pd.Timestamp, datetime)):
763
+ r[k] = str(v)
764
+ return {"date": str(last_date), "schedule": records}
765
+ except Exception as exc:
766
+ return {"error": f"Schedule failed: {exc}"}
767
+
768
+ def get_photosynthesis_3d_scene(
769
+ self,
770
+ hour: Optional[int] = None,
771
+ date_str: Optional[str] = None,
772
+ height_px: int = 480,
773
+ ) -> Dict[str, Any]:
774
+ """Build 3D scene data and HTML for vine, tracker, sun and photosynthesis.
775
+
776
+ Returns dict with scene_3d (data), scene_3d_html (full HTML string),
777
+ A_vine, sunlit_fraction, and optional error.
778
+ """
779
+ try:
780
+ from src.vine_3d_scene import build_scene_data, build_scene_html
781
+ except Exception as exc:
782
+ return {"error": f"3D scene module unavailable: {exc}"}
783
+
784
+ try:
785
+ from datetime import datetime
786
+ h = hour if hour is not None else datetime.now().hour
787
+ scene_data = build_scene_data(hour=h, date_str=date_str)
788
+ html = build_scene_html(scene_data, height_px=height_px)
789
+ return {
790
+ "scene_3d": scene_data,
791
+ "scene_3d_html": html,
792
+ "A_vine": scene_data["A_vine"],
793
+ "sunlit_fraction": scene_data["sunlit_fraction"],
794
+ "hour": scene_data["hour"],
795
+ "date": scene_data["date"],
796
+ }
797
+ except Exception as exc:
798
+ return {"error": f"3D scene build failed: {exc}"}
799
+
800
+
801
+ # ═══════════════════════════════════════════════════════════════════════
802
+ # 4. EnergyService (TB generation + analytical prediction)
803
+ # ═══════════════════════════════════════════════════════════════════════
804
+
805
+ class EnergyService(BaseService):
806
+ """Energy generation data from ThingsBoard Plant asset.
807
+
808
+ The 'Yeruham Vineyard' asset (type=Plant) provides:
809
+ - ``power``: instantaneous power in W
810
+ - ``production``: energy produced per 5-min interval in Wh
811
+
812
+ Daily kWh = sum(production) / 1000 over the day.
813
+ """
814
+
815
+ service_name = "energy"
816
+
817
+ def __init__(self, tb_client: Any = None):
818
+ self._tb = tb_client
819
+
820
+ def _client(self):
821
+ if self._tb is None:
822
+ from src.data.thingsboard_client import ThingsBoardClient
823
+ self._tb = ThingsBoardClient()
824
+ return self._tb
825
+
826
+ # ------------------------------------------------------------------
827
+ # Public API
828
+ # ------------------------------------------------------------------
829
+
830
+ def get_current(self) -> Dict[str, Any]:
831
+ """Latest power reading from the Plant asset."""
832
+ try:
833
+ vals = self._client().get_asset_latest("Plant", ["power", "production"])
834
+ power_w = vals.get("power")
835
+ return {
836
+ "power_kw": round(power_w / 1000, 1) if power_w else None,
837
+ "source": "ThingsBoard Plant asset",
838
+ }
839
+ except Exception as exc:
840
+ return {"error": f"Energy current failed: {exc}"}
841
+
842
+ def get_daily_production(self, target_date: Optional[str] = None) -> Dict[str, Any]:
843
+ """Accumulated energy production for a single day (real TB data).
844
+
845
+ Returns dict with daily_kwh, peak_hour, hourly_profile.
846
+ """
847
+ try:
848
+ target = target_date or str(date.today())
849
+ day_start = pd.Timestamp(target, tz="UTC")
850
+ day_end = day_start + pd.Timedelta(days=1)
851
+
852
+ df = self._client().get_asset_timeseries(
853
+ "Plant", ["production"],
854
+ start=day_start.to_pydatetime(),
855
+ end=day_end.to_pydatetime(),
856
+ limit=500,
857
+ interval_ms=3_600_000, # 1 hour
858
+ agg="SUM",
859
+ )
860
+ if df.empty or "production" not in df.columns:
861
+ return {"date": target, "daily_kwh": None, "error": "No production data"}
862
+
863
+ # production is in Wh per interval; hourly SUM = Wh per hour
864
+ df["kwh"] = df["production"].fillna(0) / 1000
865
+ total_kwh = df["kwh"].sum()
866
+
867
+ # Convert UTC → Israel local time for display
868
+ try:
869
+ import zoneinfo
870
+ tz_il = zoneinfo.ZoneInfo("Asia/Jerusalem")
871
+ except Exception:
872
+ tz_il = None
873
+
874
+ hourly_profile = []
875
+ peak_hour = 12
876
+ peak_kwh = 0.0
877
+ for ts, row in df.iterrows():
878
+ local_ts = ts.astimezone(tz_il) if tz_il else ts
879
+ h = local_ts.hour if hasattr(local_ts, "hour") else 0
880
+ kwh = row["kwh"]
881
+ hourly_profile.append({"hour": h, "energy_kwh": round(kwh, 2)})
882
+ if kwh > peak_kwh:
883
+ peak_kwh = kwh
884
+ peak_hour = h
885
+
886
+ return {
887
+ "date": target,
888
+ "daily_kwh": round(total_kwh, 1),
889
+ "peak_hour": peak_hour,
890
+ "peak_hour_kwh": round(peak_kwh, 2),
891
+ "hourly_profile": hourly_profile,
892
+ "source": "ThingsBoard Plant asset",
893
+ }
894
+ except Exception as exc:
895
+ return {"date": target_date, "daily_kwh": None, "error": f"Energy fetch failed: {exc}"}
896
+
897
+ def get_history(self, hours_back: int = 24) -> Dict[str, Any]:
898
+ """Hourly power time-series from TB Plant asset."""
899
+ try:
900
+ end = datetime.now(tz=timezone.utc)
901
+ start = end - timedelta(hours=hours_back)
902
+ df = self._client().get_asset_timeseries(
903
+ "Plant", ["power", "production"],
904
+ start=start, end=end,
905
+ limit=500,
906
+ interval_ms=3_600_000,
907
+ agg="AVG",
908
+ )
909
+ if df.empty:
910
+ return {"error": f"No energy data in last {hours_back} hours."}
911
+ df["power_kw"] = df["power"].fillna(0) / 1000
912
+ return summarise_dataframe(df[["power_kw"]])
913
+ except Exception as exc:
914
+ return {"error": f"Energy history failed: {exc}"}
915
+
916
+ def predict(self, target_date: Optional[str] = None,
917
+ *, ims_df: Optional[pd.DataFrame] = None) -> Dict[str, Any]:
918
+ """For future dates: analytical estimate. For past/today: real TB data."""
919
+ target = target_date or str(date.today())
920
+ target_d = date.fromisoformat(target)
921
+ today = date.today()
922
+
923
+ # Past or today → use real TB data
924
+ if target_d <= today:
925
+ return self.get_daily_production(target)
926
+
927
+ # Future → analytical estimate from IMS GHI
928
+ return self._predict_analytical(target, ims_df=ims_df)
929
+
930
+ def _predict_analytical(self, target_date: str,
931
+ *, ims_df: Optional[pd.DataFrame] = None) -> Dict[str, Any]:
932
+ """Energy estimate for future dates.
933
+
934
+ Strategy (in priority order):
935
+ 1. ML predictor (XGBoost) with ThingsBoard Air1 weather persistence
936
+ 2. ML predictor with IMS weather persistence
937
+ 3. Analytical fallback (GHI × system capacity)
938
+ """
939
+ # --- Try ML predictor with on-site weather first ---
940
+ try:
941
+ result = self._predict_ml(target_date)
942
+ if result and result.get("daily_kwh") is not None:
943
+ return result
944
+ except Exception:
945
+ pass # fall through to IMS / analytical
946
+
947
+ # --- Fallback: analytical from IMS GHI ---
948
+ try:
949
+ if ims_df is not None:
950
+ df = ims_df
951
+ else:
952
+ from src.ims_client import IMSClient
953
+ df = IMSClient().load_cached()
954
+ if df.empty:
955
+ return {"date": target_date, "daily_kwh": None, "error": "No weather data"}
956
+
957
+ if "timestamp_utc" in df.columns:
958
+ df = df.copy()
959
+ df["timestamp_utc"] = pd.to_datetime(df["timestamp_utc"], utc=True)
960
+ df = df.set_index("timestamp_utc")
961
+
962
+ # Try ML predictor with IMS data
963
+ try:
964
+ from src.energy_predictor import EnergyPredictor
965
+ ep = EnergyPredictor()
966
+ return ep.predict_day_from_weather_df(target_date, df.tail(96))
967
+ except Exception:
968
+ pass # fall through to raw analytical
969
+
970
+ # Raw analytical: GHI × capacity / STC
971
+ day_df = df.tail(96).copy()
972
+ if day_df.empty:
973
+ return {"date": target_date, "daily_kwh": None, "error": "Not enough IMS data"}
974
+
975
+ ghi_col = next(
976
+ (c for c in day_df.columns if "ghi" in c.lower() or "rad" in c.lower()), None)
977
+ if ghi_col is None:
978
+ return {"date": target_date, "daily_kwh": None, "error": "No GHI column"}
979
+
980
+ from config.settings import SYSTEM_CAPACITY_KW, STC_IRRADIANCE_W_M2
981
+ system_kw = SYSTEM_CAPACITY_KW
982
+ stc_ghi = STC_IRRADIANCE_W_M2
983
+ slot_hours = 0.25
984
+
985
+ total_kwh = 0.0
986
+ hourly_kwh: Dict[int, float] = {}
987
+ for idx, row in day_df.iterrows():
988
+ ghi = float(row[ghi_col]) if pd.notna(row.get(ghi_col)) else 0.0
989
+ if ghi <= 0:
990
+ continue
991
+ energy = system_kw * (ghi / stc_ghi) * slot_hours
992
+ total_kwh += energy
993
+ h = idx.hour if hasattr(idx, "hour") else 0
994
+ hourly_kwh[h] = hourly_kwh.get(h, 0) + energy
995
+
996
+ peak_hour = max(hourly_kwh, key=hourly_kwh.get) if hourly_kwh else 12
997
+ hourly_profile = [
998
+ {"hour": h, "energy_kwh": round(e, 2)}
999
+ for h, e in sorted(hourly_kwh.items())
1000
+ ]
1001
+ return {
1002
+ "date": target_date,
1003
+ "daily_kwh": round(total_kwh, 1),
1004
+ "peak_hour": peak_hour,
1005
+ "peak_hour_kwh": round(hourly_kwh.get(peak_hour, 0), 2),
1006
+ "hourly_profile": hourly_profile,
1007
+ "source": f"Analytical estimate (persistence forecast × {system_kw:.0f} kW system)",
1008
+ }
1009
+ except Exception as exc:
1010
+ return {"date": target_date, "daily_kwh": None, "error": f"Prediction failed: {exc}"}
1011
+
1012
+ def _predict_ml(self, target_date: str) -> Optional[Dict[str, Any]]:
1013
+ """ML energy prediction using latest ThingsBoard Air1 weather as persistence forecast."""
1014
+ from src.energy_predictor import EnergyPredictor
1015
+
1016
+ ep = EnergyPredictor()
1017
+
1018
+ # Fetch last 24h of on-site weather (Air1) as persistence forecast
1019
+ end = datetime.now(tz=timezone.utc)
1020
+ start = end - timedelta(hours=24)
1021
+
1022
+ df = self._client().get_timeseries(
1023
+ "Air1",
1024
+ keys=["GSR", "airTemperature", "windSpeed"],
1025
+ start=start, end=end,
1026
+ limit=500,
1027
+ interval_ms=3_600_000,
1028
+ agg="AVG",
1029
+ )
1030
+ if df.empty or len(df) < 8:
1031
+ return None
1032
+
1033
+ return ep.predict_day_from_weather_df(target_date, df)
1034
+
1035
+
1036
+ # ═══════════════════════════════════════════════════════════════════════
1037
+ # 5. AdvisoryService (Gemini day-ahead advisor)
1038
+ # ═══════════════════════════════════════════════════════════════════════
1039
+
1040
+ class AdvisoryService(BaseService):
1041
+ """Gemini-powered day-ahead stress advisory."""
1042
+
1043
+ service_name = "advisory"
1044
+
1045
+ def __init__(self, vine_sensor_svc: Optional[VineSensorService] = None, verbose: bool = False):
1046
+ self._vine_svc = vine_sensor_svc
1047
+ self._verbose = verbose
1048
+
1049
+ def run_advisory(self, target_date: Optional[str] = None) -> Dict[str, Any]:
1050
+ """Full DayAheadAdvisor report, enriched with vine snapshot if available."""
1051
+ try:
1052
+ from src.day_ahead_advisor import DayAheadAdvisor
1053
+ from src.ims_client import IMSClient
1054
+
1055
+ advisor = DayAheadAdvisor(verbose=self._verbose)
1056
+ weather_df = IMSClient().load_cached()
1057
+ if weather_df.empty:
1058
+ return {"error": "No IMS weather data cached. Cannot run advisory."}
1059
+
1060
+ vine_snapshot = None
1061
+ if self._vine_svc:
1062
+ snap_dict = self._vine_svc.get_snapshot()
1063
+ if "error" not in snap_dict:
1064
+ # Reconstruct a VineSnapshot-like object for to_advisor_text()
1065
+ try:
1066
+ from src.thingsboard_client import ThingsBoardClient
1067
+ tb = self._vine_svc._client()
1068
+ vine_snapshot = tb.get_vine_snapshot()
1069
+ except Exception:
1070
+ pass
1071
+
1072
+ report = advisor.advise(
1073
+ date=target_date or str(date.today()),
1074
+ weather_forecast=weather_df,
1075
+ phenological_stage="vegetative",
1076
+ vine_snapshot=vine_snapshot,
1077
+ )
1078
+ return DayAheadAdvisor.report_to_dict(report)
1079
+ except Exception as exc:
1080
+ return {"error": f"Advisory failed: {exc}"}
1081
+
1082
+
1083
+ # ═══════════════════════════════════════════════════════════════════════
1084
+ # 6. BiologyService (rule lookup — no external deps)
1085
+ # ═══════════════════════════════════════════════════════════════════════
1086
+
1087
+ class BiologyService(BaseService):
1088
+ """Biology rules lookup — pure in-memory, no API calls."""
1089
+
1090
+ service_name = "biology"
1091
+
1092
+ def __init__(self, rules: Optional[Dict[str, str]] = None):
1093
+ if rules is None:
1094
+ from src.vineyard_chatbot import BIOLOGY_RULES
1095
+ rules = BIOLOGY_RULES
1096
+ self._rules = rules
1097
+
1098
+ def explain_rule(self, rule_name: str) -> Dict[str, Any]:
1099
+ key = rule_name.lower().strip()
1100
+ if key in self._rules:
1101
+ return {"rule": key, "explanation": self._rules[key]}
1102
+ return {"error": f"Unknown rule '{key}'", "available_rules": list(self._rules.keys())}
1103
+
1104
+ def list_rules(self) -> Dict[str, Any]:
1105
+ return {"rules": list(self._rules.keys())}
1106
+
1107
+
1108
+ # ═══════════════════════════════════════════════════════════════════════
1109
+ # DataHub (service registry)
1110
+ # ═══════════════════════════════════════════════════════════════════════
1111
+
1112
+ class DataHub:
1113
+ """Lightweight registry of data-provider services.
1114
+
1115
+ Usage
1116
+ -----
1117
+ hub = DataHub.default()
1118
+ hub.weather.get_current()
1119
+ hub.vine_sensors.get_snapshot()
1120
+ hub.photosynthesis.predict_fvcb(PAR=1500, ...)
1121
+ hub.energy.get_current()
1122
+
1123
+ The chatbot receives a hub at init and delegates all data access
1124
+ through it — never importing data clients directly.
1125
+ """
1126
+
1127
+ def __init__(self) -> None:
1128
+ self._services: Dict[str, BaseService] = {}
1129
+
1130
+ # -- registration --
1131
+
1132
+ def register(self, service: BaseService) -> None:
1133
+ self._services[service.service_name] = service
1134
+
1135
+ def get(self, name: str) -> BaseService:
1136
+ if name not in self._services:
1137
+ raise KeyError(f"No service registered as '{name}'. "
1138
+ f"Available: {list(self._services)}")
1139
+ return self._services[name]
1140
+
1141
+ # -- typed accessors (convenience, avoids casts everywhere) --
1142
+
1143
+ @property
1144
+ def weather(self) -> WeatherService:
1145
+ return self._services["weather"] # type: ignore[return-value]
1146
+
1147
+ @property
1148
+ def vine_sensors(self) -> VineSensorService:
1149
+ return self._services["vine_sensors"] # type: ignore[return-value]
1150
+
1151
+ @property
1152
+ def photosynthesis(self) -> PhotosynthesisService:
1153
+ return self._services["photosynthesis"] # type: ignore[return-value]
1154
+
1155
+ @property
1156
+ def energy(self) -> EnergyService:
1157
+ return self._services["energy"] # type: ignore[return-value]
1158
+
1159
+ @property
1160
+ def advisory(self) -> AdvisoryService:
1161
+ return self._services["advisory"] # type: ignore[return-value]
1162
+
1163
+ @property
1164
+ def biology(self) -> BiologyService:
1165
+ return self._services["biology"] # type: ignore[return-value]
1166
+
1167
+ # -- factory --
1168
+
1169
+ @classmethod
1170
+ def default(cls, verbose: bool = False) -> "DataHub":
1171
+ """Create a hub with all default services (lazy clients)."""
1172
+ hub = cls()
1173
+ vine_svc = VineSensorService()
1174
+ hub.register(WeatherService())
1175
+ hub.register(vine_svc)
1176
+ hub.register(PhotosynthesisService())
1177
+ hub.register(EnergyService())
1178
+ hub.register(AdvisoryService(vine_sensor_svc=vine_svc, verbose=verbose))
1179
+ hub.register(BiologyService())
1180
+ return hub
src/data/data_schema.py ADDED
@@ -0,0 +1,519 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SolarWine 2.0 — Data Schema
3
+ ============================
4
+ Canonical dataclasses for the four telemetry tables that flow through
5
+ the 15-minute control loop.
6
+
7
+ SensorRaw — one-slot snapshot of all on-site + IMS inputs
8
+ BiologicalState — photosynthesis model outputs + phenological state
9
+ TrackerKinematics — tracker position, commands, operational mode
10
+ SimulationLog — complete audit record for one 15-min slot
11
+
12
+ Storage
13
+ -------
14
+ CSV/Parquet backend via to_dict() / from_dict() helpers. Schema is forward-
15
+ compatible with a future TimescaleDB migration (all timestamps are UTC,
16
+ numeric fields are SI units).
17
+
18
+ Unit conventions
19
+ ----------------
20
+ Temperatures : °C
21
+ PAR : µmol m⁻² s⁻¹
22
+ DLI : mol m⁻² day⁻¹
23
+ Irradiance (GHI) : W m⁻²
24
+ VPD : kPa
25
+ CO₂ : ppm
26
+ Angles : degrees (tilt: + = east-facing, 0 = horizontal, - = west-facing)
27
+ Energy : kWh
28
+ Soil moisture : %
29
+ Wind speed : m s⁻¹
30
+ """
31
+
32
+ from __future__ import annotations
33
+
34
+ from dataclasses import asdict, dataclass, field
35
+ from datetime import datetime, timezone
36
+ from typing import Any, Dict, List, Optional
37
+
38
+ from src.utils import cwsi_from_delta_t
39
+
40
+
41
+ # ---------------------------------------------------------------------------
42
+ # SensorRaw — single 15-min slot of all sensor inputs
43
+ # ---------------------------------------------------------------------------
44
+
45
+ @dataclass
46
+ class SensorRaw:
47
+ """
48
+ Canonical sensor snapshot for one 15-min control slot.
49
+
50
+ Populated from ThingsBoard (TB) via VineSnapshot for real-time control,
51
+ or from CSV/Parquet for historical replay. IMS fields are always from
52
+ the IMS station 43 (Sde Boker) cache.
53
+ """
54
+
55
+ ts: datetime # UTC timestamp of the slot start
56
+
57
+ # --- TB microclimate (treatment area Air2/3/4 average) ---
58
+ air_temp_c: Optional[float] = None
59
+ leaf_temp_c: Optional[float] = None
60
+ vpd_kpa: Optional[float] = None
61
+ co2_ppm: Optional[float] = None
62
+ air_leaf_delta_t: Optional[float] = None # proxy for CWSI
63
+ humidity_pct: Optional[float] = None
64
+ dew_temp_c: Optional[float] = None
65
+
66
+ # --- PAR / irradiance ---
67
+ par_umol: Optional[float] = None # above-canopy ambient PAR (Air devices)
68
+ fruiting_zone_par_umol: Optional[float] = None # mid-canopy PAR (Crop3/5/6/7 avg)
69
+ ghi_w_m2: Optional[float] = None # IMS global horizontal irradiance
70
+
71
+ # --- Daily light / spectral indices ---
72
+ dli_mol_m2: Optional[float] = None # daily light integral so far
73
+ ndvi: Optional[float] = None
74
+ pri: Optional[float] = None
75
+
76
+ # --- Wind & rain ---
77
+ wind_speed_ms: Optional[float] = None
78
+ wind_angle_deg: Optional[float] = None
79
+ rain_mm: Optional[float] = None
80
+ air_pressure_hpa: Optional[float] = None
81
+
82
+ # --- TB soil (treatment area Soil1/3/5/6 average) ---
83
+ soil_moisture_pct: Optional[float] = None
84
+ soil_temp_c: Optional[float] = None
85
+ soil_ec_ds_m: Optional[float] = None
86
+ soil_ph: Optional[float] = None
87
+
88
+ # --- TB reference area (Crop1/2/4 avg, open sky) ---
89
+ reference_crop_par_umol: Optional[float] = None
90
+ reference_crop_leaf_temp_c: Optional[float] = None
91
+ reference_soil_moisture_pct: Optional[float] = None
92
+
93
+ # --- Shading effectiveness ---
94
+ par_shading_ratio: Optional[float] = None # treatment / reference PAR (<1 = shaded)
95
+
96
+ # --- Derived stress index ---
97
+ cwsi: Optional[float] = None # explicit CWSI if available from TB
98
+
99
+ # --- Data provenance ---
100
+ source: str = "unknown" # "thingsboard" | "ims" | "csv" | "mixed"
101
+ quality_flags: List[str] = field(default_factory=list)
102
+ # e.g. ["soil5_temp_outlier_excluded", "air3_stale"]
103
+
104
+ # ------------------------------------------------------------------
105
+ # Factory: build from a VineSnapshot
106
+ # ------------------------------------------------------------------
107
+
108
+ @classmethod
109
+ def from_vine_snapshot(cls, snapshot: Any) -> "SensorRaw":
110
+ """
111
+ Construct SensorRaw from a ThingsBoardClient.VineSnapshot.
112
+
113
+ The snapshot already contains treatment-vs-reference aggregations
114
+ and bounded averages; this method simply re-maps them to the
115
+ canonical SensorRaw field names.
116
+ """
117
+ flags: List[str] = []
118
+ if hasattr(snapshot, "staleness_minutes") and snapshot.staleness_minutes > 20:
119
+ flags.append(f"stale_{snapshot.staleness_minutes:.0f}min")
120
+
121
+ # CWSI proxy from air-leaf temperature delta (see src.utils.cwsi_from_delta_t)
122
+ cwsi_proxy: Optional[float] = None
123
+ delta_t = getattr(snapshot, "treatment_air_leaf_delta_t", None)
124
+ if delta_t is not None:
125
+ cwsi_proxy = cwsi_from_delta_t(delta_t=delta_t)
126
+
127
+ return cls(
128
+ ts=getattr(snapshot, "snapshot_ts", datetime.now(tz=timezone.utc)),
129
+
130
+ # Microclimate
131
+ air_temp_c=getattr(snapshot, "treatment_air_temp_c", None),
132
+ leaf_temp_c=getattr(snapshot, "treatment_leaf_temp_c", None)
133
+ or getattr(snapshot, "treatment_crop_leaf_temp_c", None),
134
+ vpd_kpa=getattr(snapshot, "treatment_vpd_kpa", None),
135
+ co2_ppm=getattr(snapshot, "treatment_co2_ppm", None),
136
+ air_leaf_delta_t=delta_t,
137
+ humidity_pct=getattr(snapshot, "ambient_humidity_pct", None),
138
+
139
+ # PAR
140
+ par_umol=getattr(snapshot, "treatment_par_umol", None),
141
+ fruiting_zone_par_umol=getattr(snapshot, "treatment_crop_par_umol", None),
142
+ dli_mol_m2=getattr(snapshot, "treatment_crop_dli_mol_m2", None),
143
+ ndvi=getattr(snapshot, "treatment_crop_ndvi", None),
144
+ pri=getattr(snapshot, "treatment_pri", None),
145
+
146
+ # Wind / weather
147
+ wind_speed_ms=getattr(snapshot, "ambient_wind_speed_ms", None),
148
+ wind_angle_deg=getattr(snapshot, "ambient_wind_angle_deg", None),
149
+ rain_mm=getattr(snapshot, "ambient_rain_mm", None),
150
+
151
+ # Soil
152
+ soil_moisture_pct=getattr(snapshot, "treatment_soil_moisture_pct", None),
153
+ soil_temp_c=getattr(snapshot, "treatment_soil_temp_c", None),
154
+ soil_ec_ds_m=getattr(snapshot, "treatment_soil_ec_ds_m", None),
155
+ soil_ph=getattr(snapshot, "treatment_soil_ph", None),
156
+
157
+ # Reference
158
+ reference_crop_par_umol=getattr(snapshot, "reference_crop_par_umol", None),
159
+ reference_crop_leaf_temp_c=getattr(snapshot, "reference_crop_leaf_temp_c", None),
160
+ reference_soil_moisture_pct=getattr(snapshot, "reference_soil_moisture_pct", None),
161
+
162
+ # Shading ratio
163
+ par_shading_ratio=getattr(snapshot, "par_shading_ratio", None),
164
+
165
+ cwsi=cwsi_proxy,
166
+ source="thingsboard",
167
+ quality_flags=flags,
168
+ )
169
+
170
+ # ------------------------------------------------------------------
171
+ # Serialization
172
+ # ------------------------------------------------------------------
173
+
174
+ def to_dict(self) -> Dict[str, Any]:
175
+ d = asdict(self)
176
+ d["ts"] = self.ts.isoformat() if self.ts else None
177
+ return d
178
+
179
+ @classmethod
180
+ def from_dict(cls, d: Dict[str, Any]) -> "SensorRaw":
181
+ d = d.copy()
182
+ if isinstance(d.get("ts"), str):
183
+ d["ts"] = datetime.fromisoformat(d["ts"])
184
+ return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__})
185
+
186
+
187
+ # ---------------------------------------------------------------------------
188
+ # BiologicalState — photosynthesis model outputs + phenology
189
+ # ---------------------------------------------------------------------------
190
+
191
+ @dataclass
192
+ class BiologicalState:
193
+ """
194
+ Computed vine physiological state for one control slot.
195
+
196
+ Produced by the FarquharModel (or ML ensemble via RoutingAgent) and
197
+ the phenology tracker. Drives the InterventionGate and TradeoffEngine.
198
+ """
199
+
200
+ ts: datetime
201
+
202
+ # --- Photosynthesis model outputs ---
203
+ a_net_umol: Optional[float] = None # net carbon assimilation (µmol CO₂ m⁻² s⁻¹)
204
+ limiting_state: Optional[str] = None # "rubp" | "rubisco" | "tpu" | "transition"
205
+ shading_helps: Optional[bool] = None # True only when Rubisco-limited AND heat is bottleneck
206
+
207
+ # --- Model provenance ---
208
+ model_used: str = "unknown" # "fvcb" | "fvcb_semillon" | "ml" | "ml_ensemble"
209
+ model_confidence: Optional[float] = None # 0–1 (1 = high confidence in routing choice)
210
+
211
+ # --- Raw inputs echoed for auditing ---
212
+ par_input: Optional[float] = None
213
+ tleaf_input: Optional[float] = None
214
+ vpd_input: Optional[float] = None
215
+ co2_input: Optional[float] = None
216
+
217
+ # --- Phenological state ---
218
+ phenological_stage: str = "vegetative" # vegetative | flowering | veraison | harvest
219
+ gdd_cumulative: Optional[float] = None # growing degree days since budburst
220
+ crop_value_weight: float = 1.0 # seasonal multiplier (1.5× at veraison, 0.5× post-harvest)
221
+
222
+ # --- Stress levels ---
223
+ heat_stress_level: str = "none" # none | low | moderate | high | extreme
224
+ water_stress_level: str = "none"
225
+ sunburn_risk: bool = False # True when Tleaf > BERRY_SUNBURN_TEMP_C
226
+
227
+ # --- Fruiting-zone specific ---
228
+ fruiting_zone_a_net: Optional[float] = None # A at mid-canopy zone (zone index 1)
229
+ fruiting_zone_par: Optional[float] = None # PAR at mid-canopy
230
+ top_canopy_a_net: Optional[float] = None # A at top-canopy zone (zone index 2)
231
+
232
+ # ------------------------------------------------------------------
233
+
234
+ def to_dict(self) -> Dict[str, Any]:
235
+ d = asdict(self)
236
+ d["ts"] = self.ts.isoformat() if self.ts else None
237
+ return d
238
+
239
+ @classmethod
240
+ def from_dict(cls, d: Dict[str, Any]) -> "BiologicalState":
241
+ d = d.copy()
242
+ if isinstance(d.get("ts"), str):
243
+ d["ts"] = datetime.fromisoformat(d["ts"])
244
+ return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__})
245
+
246
+
247
+ # ---------------------------------------------------------------------------
248
+ # TrackerKinematics — tracker position and operational mode
249
+ # ---------------------------------------------------------------------------
250
+
251
+ @dataclass
252
+ class TrackerKinematics:
253
+ """
254
+ Single-axis tracker state for one control slot.
255
+
256
+ astronomical_tilt_deg is always the sun-following position (full-energy).
257
+ shade_offset_deg is the deliberate deviation for vine protection.
258
+ effective_tilt_deg = astronomical_tilt_deg + shade_offset_deg.
259
+
260
+ Angle convention: 0° = horizontal, positive = tilted toward east,
261
+ negative = tilted toward west (consistent with pvlib single-axis sign convention).
262
+ """
263
+
264
+ ts: datetime
265
+
266
+ # --- Astronomical tracking (default / full-energy position) ---
267
+ astronomical_tilt_deg: float = 0.0
268
+ solar_azimuth_deg: Optional[float] = None
269
+ solar_elevation_deg: Optional[float] = None
270
+
271
+ # --- Shading offset (deliberate protection deviation) ---
272
+ shade_offset_deg: float = 0.0 # 0 = no protection, positive values = shade intervention
273
+ effective_tilt_deg: float = 0.0 # astronomical + shade_offset
274
+
275
+ # --- Previous slot (for hysteresis) ---
276
+ previous_tilt_deg: Optional[float] = None
277
+ tilt_change_deg: float = 0.0 # effective_tilt - previous_tilt
278
+ motion_triggered: bool = False # True if |change| > ANGLE_TOLERANCE_DEG
279
+
280
+ # --- Operational mode ---
281
+ operational_mode: str = "tracking" # tracking | wind_stow | heat_shield | harvest_park
282
+ mode_override_reason: Optional[str] = None
283
+
284
+ # --- Panel surface temperatures ---
285
+ panel_temp_treatment_c: Optional[float] = None # Thermocouples1 avg
286
+ panel_temp_reference_c: Optional[float] = None # Thermocouples2 avg
287
+
288
+ # ------------------------------------------------------------------
289
+
290
+ def to_dict(self) -> Dict[str, Any]:
291
+ d = asdict(self)
292
+ d["ts"] = self.ts.isoformat() if self.ts else None
293
+ return d
294
+
295
+ @classmethod
296
+ def from_dict(cls, d: Dict[str, Any]) -> "TrackerKinematics":
297
+ d = d.copy()
298
+ if isinstance(d.get("ts"), str):
299
+ d["ts"] = datetime.fromisoformat(d["ts"])
300
+ return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__})
301
+
302
+
303
+ # ---------------------------------------------------------------------------
304
+ # SimulationLog — complete audit record for one 15-min slot
305
+ # ---------------------------------------------------------------------------
306
+
307
+ @dataclass
308
+ class SimulationLog:
309
+ """
310
+ Full audit record for one 15-minute control loop execution.
311
+
312
+ Written to `data/simulation_log.parquet` (or CSV) after every slot.
313
+ Used for replay, validation, ROI reporting, and Phase 7 integration tests.
314
+ """
315
+
316
+ ts: datetime
317
+ slot_index: int # 0–95 for a 24-hour day (96 × 15-min slots)
318
+ date_str: str = "" # YYYY-MM-DD local date for partitioning
319
+
320
+ # --- Nested state objects ---
321
+ sensor: Optional[SensorRaw] = None
322
+ bio: Optional[BiologicalState] = None
323
+ kinematics: Optional[TrackerKinematics] = None
324
+
325
+ # --- InterventionGate outcome ---
326
+ intervention_gate_passed: bool = False
327
+ gate_rejection_reason: Optional[str] = None
328
+ # Rejection categories: "no_shade_window:morning" | "no_shade_window:may" |
329
+ # "overcast" | "below_temp_threshold" | "below_cwsi_threshold" | "budget_exhausted"
330
+
331
+ # --- TradeoffEngine outcome ---
332
+ candidate_offsets_tested: List[float] = field(default_factory=list)
333
+ chosen_offset_deg: float = 0.0
334
+ minimum_dose_rationale: Optional[str] = None
335
+ # e.g. "offset 5° sufficient: fruiting PAR reduced below 400 µmol/m²/s"
336
+
337
+ # --- Safety rails ---
338
+ fvcb_a: Optional[float] = None
339
+ ml_a: Optional[float] = None
340
+ model_divergence_pct: Optional[float] = None # |fvcb_a - ml_a| / max * 100
341
+ safety_fallback_triggered: bool = False
342
+ routing_decision: Optional[str] = None # "fvcb" | "ml" — which model was used
343
+
344
+ # --- Energy budget accounting ---
345
+ energy_fraction_this_slot: float = 0.0 # fraction of max generation sacrificed
346
+ budget_remaining_daily_kwh: Optional[float] = None
347
+ budget_remaining_weekly_kwh: Optional[float] = None
348
+ budget_remaining_monthly_kwh: Optional[float] = None
349
+
350
+ # --- Feedback (filled in the following slot) ---
351
+ a_net_actual: Optional[float] = None # measured A in next slot (for validation)
352
+ a_net_improvement_pct: Optional[float] = None # vs unshaded counterfactual
353
+
354
+ # --- Explainability tags ---
355
+ decision_tags: List[str] = field(default_factory=list)
356
+ # e.g. ["rubisco_limited", "dose:5deg", "veraison_1.5x", "budget_ok:32%_remaining"]
357
+
358
+ # ------------------------------------------------------------------
359
+ # Serialization
360
+ # ------------------------------------------------------------------
361
+
362
+ def to_dict(self) -> Dict[str, Any]:
363
+ """Deep-serialize to a plain dict (JSON-serializable)."""
364
+ d: Dict[str, Any] = {
365
+ "ts": self.ts.isoformat() if self.ts else None,
366
+ "slot_index": self.slot_index,
367
+ "date_str": self.date_str,
368
+ "sensor": self.sensor.to_dict() if self.sensor else None,
369
+ "bio": self.bio.to_dict() if self.bio else None,
370
+ "kinematics": self.kinematics.to_dict() if self.kinematics else None,
371
+ "intervention_gate_passed": self.intervention_gate_passed,
372
+ "gate_rejection_reason": self.gate_rejection_reason,
373
+ "candidate_offsets_tested": self.candidate_offsets_tested,
374
+ "chosen_offset_deg": self.chosen_offset_deg,
375
+ "minimum_dose_rationale": self.minimum_dose_rationale,
376
+ "fvcb_a": self.fvcb_a,
377
+ "ml_a": self.ml_a,
378
+ "model_divergence_pct": self.model_divergence_pct,
379
+ "safety_fallback_triggered": self.safety_fallback_triggered,
380
+ "routing_decision": self.routing_decision,
381
+ "energy_fraction_this_slot": self.energy_fraction_this_slot,
382
+ "budget_remaining_daily_kwh": self.budget_remaining_daily_kwh,
383
+ "budget_remaining_weekly_kwh": self.budget_remaining_weekly_kwh,
384
+ "budget_remaining_monthly_kwh": self.budget_remaining_monthly_kwh,
385
+ "a_net_actual": self.a_net_actual,
386
+ "a_net_improvement_pct": self.a_net_improvement_pct,
387
+ "decision_tags": self.decision_tags,
388
+ }
389
+ return d
390
+
391
+ def to_flat_row(self) -> Dict[str, Any]:
392
+ """
393
+ Flatten all nested objects into a single dict row suitable for
394
+ appending to a Parquet or CSV log file.
395
+
396
+ Nested field names are prefixed: sensor__*, bio__*, kinematics__*.
397
+ """
398
+ row: Dict[str, Any] = {
399
+ "ts": self.ts.isoformat() if self.ts else None,
400
+ "slot_index": self.slot_index,
401
+ "date_str": self.date_str,
402
+ "gate_passed": self.intervention_gate_passed,
403
+ "gate_reason": self.gate_rejection_reason,
404
+ "chosen_offset_deg": self.chosen_offset_deg,
405
+ "fvcb_a": self.fvcb_a,
406
+ "ml_a": self.ml_a,
407
+ "divergence_pct": self.model_divergence_pct,
408
+ "fallback": self.safety_fallback_triggered,
409
+ "routing": self.routing_decision,
410
+ "energy_fraction": self.energy_fraction_this_slot,
411
+ "budget_daily_kwh": self.budget_remaining_daily_kwh,
412
+ "budget_monthly_kwh": self.budget_remaining_monthly_kwh,
413
+ "a_net_actual": self.a_net_actual,
414
+ "a_net_improvement_pct": self.a_net_improvement_pct,
415
+ "tags": "|".join(self.decision_tags),
416
+ }
417
+ if self.sensor:
418
+ for k, v in self.sensor.to_dict().items():
419
+ if k not in ("ts", "quality_flags", "source"):
420
+ row[f"sensor__{k}"] = v
421
+ if self.bio:
422
+ for k, v in self.bio.to_dict().items():
423
+ if k != "ts":
424
+ row[f"bio__{k}"] = v
425
+ if self.kinematics:
426
+ for k, v in self.kinematics.to_dict().items():
427
+ if k != "ts":
428
+ row[f"kin__{k}"] = v
429
+ return row
430
+
431
+
432
+ # ---------------------------------------------------------------------------
433
+ # Public convenience re-exports from VineSnapshot
434
+ # ---------------------------------------------------------------------------
435
+
436
+ def sensor_raw_from_vine_snapshot(snapshot: Any) -> SensorRaw:
437
+ """Module-level alias for SensorRaw.from_vine_snapshot()."""
438
+ return SensorRaw.from_vine_snapshot(snapshot)
439
+
440
+
441
+ # ---------------------------------------------------------------------------
442
+ # Quick self-test
443
+ # ---------------------------------------------------------------------------
444
+
445
+ if __name__ == "__main__":
446
+ import json
447
+ from datetime import timezone
448
+
449
+ now = datetime.now(tz=timezone.utc)
450
+
451
+ sensor = SensorRaw(
452
+ ts=now,
453
+ air_temp_c=33.5,
454
+ leaf_temp_c=35.1,
455
+ vpd_kpa=2.9,
456
+ co2_ppm=410.0,
457
+ fruiting_zone_par_umol=820.0,
458
+ soil_moisture_pct=31.2,
459
+ reference_crop_par_umol=1150.0,
460
+ par_shading_ratio=0.71,
461
+ source="thingsboard",
462
+ )
463
+ bio = BiologicalState(
464
+ ts=now,
465
+ a_net_umol=14.3,
466
+ limiting_state="rubisco",
467
+ shading_helps=True,
468
+ model_used="fvcb_semillon",
469
+ phenological_stage="veraison",
470
+ crop_value_weight=1.5,
471
+ heat_stress_level="moderate",
472
+ sunburn_risk=True,
473
+ )
474
+ kin = TrackerKinematics(
475
+ ts=now,
476
+ astronomical_tilt_deg=42.0,
477
+ shade_offset_deg=5.0,
478
+ effective_tilt_deg=47.0,
479
+ previous_tilt_deg=42.0,
480
+ tilt_change_deg=5.0,
481
+ motion_triggered=True,
482
+ operational_mode="tracking",
483
+ panel_temp_treatment_c=58.3,
484
+ )
485
+ log = SimulationLog(
486
+ ts=now,
487
+ slot_index=52,
488
+ date_str="2025-07-15",
489
+ sensor=sensor,
490
+ bio=bio,
491
+ kinematics=kin,
492
+ intervention_gate_passed=True,
493
+ candidate_offsets_tested=[3.0, 5.0],
494
+ chosen_offset_deg=5.0,
495
+ minimum_dose_rationale="5° sufficient to reduce fruiting-zone PAR below 400",
496
+ fvcb_a=14.3,
497
+ ml_a=14.8,
498
+ model_divergence_pct=3.4,
499
+ routing_decision="fvcb_semillon",
500
+ energy_fraction_this_slot=0.042,
501
+ budget_remaining_daily_kwh=8.1,
502
+ decision_tags=["rubisco_limited", "dose:5deg", "veraison_1.5x", "budget_ok"],
503
+ )
504
+
505
+ print("SensorRaw:")
506
+ print(json.dumps(sensor.to_dict(), indent=2, default=str))
507
+ print("\nBiologicalState:")
508
+ print(json.dumps(bio.to_dict(), indent=2, default=str))
509
+ print("\nTrackerKinematics:")
510
+ print(json.dumps(kin.to_dict(), indent=2, default=str))
511
+ print("\nSimulationLog flat row keys:")
512
+ row = log.to_flat_row()
513
+ print(f" {len(row)} columns")
514
+ print(" First 10:", list(row.keys())[:10])
515
+ print("\nSensorRaw round-trip:")
516
+ s2 = SensorRaw.from_dict(sensor.to_dict())
517
+ assert s2.air_temp_c == sensor.air_temp_c
518
+ assert isinstance(s2.ts, datetime)
519
+ print(" OK")
src/data/ims_client.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ IMSClient: fetch and cache IMS weather data from station 43 (Sde Boker).
3
+ Resamples 10min data to 15min for alignment with sensor data.
4
+ """
5
+
6
+ import os
7
+ import time
8
+ from pathlib import Path
9
+ from typing import Optional
10
+
11
+ import pandas as pd
12
+ import requests
13
+
14
+ try:
15
+ from dotenv import load_dotenv
16
+ load_dotenv()
17
+ except ImportError:
18
+ pass
19
+
20
+
21
+ def _parse_ims_date(d: str) -> str:
22
+ """Convert YYYY-MM-DD to IMS format YYYY/MM/DD."""
23
+ return d.replace("-", "/")
24
+
25
+
26
+ class IMSClient:
27
+ """Fetch IMS API data for a station and cache to Data/ims/."""
28
+
29
+ def __init__(
30
+ self,
31
+ token: Optional[str] = None,
32
+ station_id: Optional[int] = None,
33
+ cache_dir: Optional[Path] = None,
34
+ channel_map: Optional[dict[int, str]] = None,
35
+ ):
36
+ from config import settings
37
+
38
+ self.token = (token or os.environ.get("IMS_API_TOKEN", "")).strip()
39
+ if not self.token:
40
+ raise ValueError(
41
+ "IMS API token is required. Set IMS_API_TOKEN in .env, "
42
+ "in Streamlit Secrets, or pass token= to IMSClient."
43
+ )
44
+ self.station_id = station_id or settings.IMS_STATION_ID
45
+ self.cache_dir = cache_dir or settings.IMS_CACHE_DIR
46
+ self.channel_map = channel_map or settings.IMS_CHANNEL_MAP.copy()
47
+ self._base = f"{settings.IMS_BASE_URL}/{self.station_id}/data"
48
+ self._stations_url = settings.IMS_BASE_URL
49
+
50
+ def get_station_metadata(self, station_id: Optional[int] = None) -> dict:
51
+ """
52
+ Fetch station metadata from IMS API (name, location, monitors/channels).
53
+ Returns dict with 'stationId', 'name', 'monitors' (list of {channelId, name, units, ...}).
54
+ """
55
+ sid = station_id or self.station_id
56
+ url = f"{self._stations_url}/{sid}"
57
+ headers = {"Authorization": f"ApiToken {self.token}"}
58
+ r = requests.get(url, headers=headers, timeout=30)
59
+ r.raise_for_status()
60
+ return r.json()
61
+
62
+ def list_channels(self, station_id: Optional[int] = None) -> list[dict]:
63
+ """Return list of channel descriptors for the station (channelId, name, units, active)."""
64
+ meta = self.get_station_metadata(station_id)
65
+ monitors = meta.get("monitors", meta.get("channelGroups", []))
66
+ # Flatten if nested; IMS may return list of { channelId, name, ... }
67
+ out = []
68
+ for m in monitors:
69
+ if isinstance(m, dict):
70
+ out.append({
71
+ "channelId": m.get("channelId", m.get("id")),
72
+ "name": m.get("name", m.get("channelName", "")),
73
+ "units": m.get("units", ""),
74
+ "active": m.get("active", True),
75
+ })
76
+ return out
77
+
78
+ def fetch_channel(
79
+ self,
80
+ channel_id: int,
81
+ from_date: str,
82
+ to_date: str,
83
+ ) -> pd.DataFrame:
84
+ """
85
+ Fetch one channel for date range. Dates as YYYY-MM-DD.
86
+ Returns DataFrame with timestamp_utc and one value column.
87
+ """
88
+ from_f = _parse_ims_date(from_date)
89
+ to_f = _parse_ims_date(to_date)
90
+ url = f"{self._base}/{channel_id}?from={from_f}&to={to_f}"
91
+ headers = {"Authorization": f"ApiToken {self.token}"}
92
+ r = requests.get(url, headers=headers, timeout=120)
93
+ r.raise_for_status()
94
+ if not r.text or not r.text.strip():
95
+ return pd.DataFrame()
96
+ try:
97
+ raw = r.json()
98
+ except Exception:
99
+ return pd.DataFrame()
100
+ data = raw.get("data", raw) if isinstance(raw, dict) else raw
101
+ if not isinstance(data, list):
102
+ data = []
103
+ col_name = self.channel_map.get(channel_id, f"channel_{channel_id}")
104
+ rows = []
105
+ for item in data:
106
+ dt = item.get("datetime")
107
+ # IMS returns Israel time (Asia/Jerusalem); parse and convert to UTC
108
+ if isinstance(dt, str):
109
+ ts = pd.to_datetime(dt)
110
+ if ts.tzinfo is None:
111
+ ts = ts.tz_localize("Asia/Jerusalem").tz_convert("UTC")
112
+ else:
113
+ ts = ts.tz_convert("UTC")
114
+ else:
115
+ continue
116
+ ch_list = item.get("channels", [])
117
+ val = None
118
+ for ch in ch_list:
119
+ if ch.get("id") == channel_id and ch.get("status") == 1:
120
+ val = ch.get("value")
121
+ break
122
+ rows.append({"timestamp_utc": ts, col_name: val})
123
+ df = pd.DataFrame(rows)
124
+ if not df.empty:
125
+ df = df.dropna(subset=[col_name])
126
+ df = df.set_index("timestamp_utc").sort_index()
127
+ return df
128
+
129
+ def fetch_all_channels(
130
+ self,
131
+ from_date: str,
132
+ to_date: str,
133
+ delay_seconds: float = 0.5,
134
+ ) -> pd.DataFrame:
135
+ """Fetch all configured channels and merge on timestamp_utc."""
136
+ out = None
137
+ for ch_id, col_name in self.channel_map.items():
138
+ df = self.fetch_channel(ch_id, from_date, to_date)
139
+ if df.empty:
140
+ continue
141
+ df = df.rename(columns={c: c for c in df.columns})
142
+ if out is None:
143
+ out = df
144
+ else:
145
+ out = out.join(df, how="outer")
146
+ time.sleep(delay_seconds)
147
+ if out is None:
148
+ return pd.DataFrame()
149
+ out = out.reset_index()
150
+ return out
151
+
152
+ def resample_to_15min(self, df: pd.DataFrame) -> pd.DataFrame:
153
+ """Resample 10min IMS data to 15min (mean). Expects timestamp_utc column."""
154
+ if df.empty or "timestamp_utc" not in df.columns:
155
+ return df
156
+ d = df.set_index("timestamp_utc")
157
+ d = d.resample("15min").mean().dropna(how="all")
158
+ return d.reset_index()
159
+
160
+ def load_cached(self, cache_path: Optional[Path] = None) -> pd.DataFrame:
161
+ """Load merged IMS data from cache file if it exists."""
162
+ path = cache_path or (self.cache_dir / "ims_merged_15min.csv")
163
+ if not path.exists():
164
+ return pd.DataFrame()
165
+ df = pd.read_csv(path)
166
+ if "timestamp_utc" in df.columns:
167
+ df["timestamp_utc"] = pd.to_datetime(df["timestamp_utc"], utc=True)
168
+ return df
169
+
170
+ def fetch_and_cache(
171
+ self,
172
+ from_date: str,
173
+ to_date: str,
174
+ cache_path: Optional[Path] = None,
175
+ chunk_days: Optional[int] = 60,
176
+ ) -> pd.DataFrame:
177
+ """
178
+ Fetch all channels for the date range, resample to 15min, save to cache.
179
+ If chunk_days is set, split the range into chunks to avoid API empty responses.
180
+ """
181
+ path = cache_path or (self.cache_dir / "ims_merged_15min.csv")
182
+ path.parent.mkdir(parents=True, exist_ok=True)
183
+
184
+ from datetime import datetime, timedelta
185
+
186
+ start = datetime.strptime(from_date, "%Y-%m-%d").date()
187
+ end = datetime.strptime(to_date, "%Y-%m-%d").date()
188
+ if start > end:
189
+ start, end = end, start
190
+
191
+ if chunk_days is None or (end - start).days <= chunk_days:
192
+ df = self.fetch_all_channels(from_date, to_date)
193
+ else:
194
+ chunks = []
195
+ d = start
196
+ while d < end:
197
+ chunk_end = min(d + timedelta(days=chunk_days), end)
198
+ from_s = d.strftime("%Y-%m-%d")
199
+ to_s = chunk_end.strftime("%Y-%m-%d")
200
+ try:
201
+ df_chunk = self.fetch_all_channels(from_s, to_s)
202
+ if not df_chunk.empty:
203
+ chunks.append(df_chunk)
204
+ except Exception:
205
+ pass # skip failed chunk, continue
206
+ d = chunk_end
207
+ df = pd.concat(chunks, ignore_index=True) if chunks else pd.DataFrame()
208
+ if not df.empty and "timestamp_utc" in df.columns:
209
+ df = df.drop_duplicates(subset=["timestamp_utc"]).sort_values("timestamp_utc")
210
+
211
+ if df.empty:
212
+ return df
213
+ df = self.resample_to_15min(df)
214
+ df.to_csv(path, index=False)
215
+ return df
src/data/redis_cache.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Thin Redis wrapper for cross-process caching (Upstash Redis REST API).
3
+
4
+ Falls back gracefully to ``None`` returns when Redis is unavailable,
5
+ so callers can use in-memory TTLCache as a fallback.
6
+
7
+ Usage::
8
+
9
+ from src.data.redis_cache import get_redis
10
+
11
+ redis = get_redis() # None if no UPSTASH_REDIS_URL
12
+ if redis:
13
+ redis.set_json("weather:current", data, ttl=1800)
14
+ cached = redis.get_json("weather:current")
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ import logging
21
+ import os
22
+ import threading
23
+ from typing import Any, Optional
24
+
25
+ log = logging.getLogger(__name__)
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # Singleton (thread-safe)
29
+ # ---------------------------------------------------------------------------
30
+
31
+ _instance: Optional["RedisCache"] = None
32
+ _lock = threading.Lock()
33
+
34
+
35
+ def get_redis() -> Optional["RedisCache"]:
36
+ """Return the global RedisCache instance, or *None* if not configured."""
37
+ global _instance
38
+
39
+ # Fast path (no lock)
40
+ if _instance is not None:
41
+ return _instance
42
+
43
+ url = os.environ.get("UPSTASH_REDIS_URL")
44
+ token = os.environ.get("UPSTASH_REDIS_TOKEN")
45
+ if not url or not token:
46
+ log.debug("Redis not configured (UPSTASH_REDIS_URL / UPSTASH_REDIS_TOKEN missing)")
47
+ return None
48
+
49
+ with _lock:
50
+ # Double-check after acquiring lock
51
+ if _instance is not None:
52
+ return _instance
53
+ try:
54
+ _instance = RedisCache(url=url, token=token)
55
+ log.info("Redis connected: %s", url.split("@")[-1] if "@" in url else url[:40])
56
+ return _instance
57
+ except Exception as exc:
58
+ log.error("Redis init failed: %s", exc)
59
+ return None
60
+
61
+
62
+ # ---------------------------------------------------------------------------
63
+ # RedisCache (Upstash REST)
64
+ # ---------------------------------------------------------------------------
65
+
66
+ class RedisCache:
67
+ """Minimal Redis cache using the Upstash REST API (no native driver needed)."""
68
+
69
+ def __init__(self, url: str, token: str):
70
+ self._url = url.rstrip("/")
71
+ self._headers = {"Authorization": f"Bearer {token}"}
72
+ # Lazy import — requests is already a project dependency
73
+ import requests as _req
74
+ self._req = _req
75
+ # Connectivity check
76
+ resp = self._req.get(f"{self._url}/ping", headers=self._headers, timeout=5)
77
+ resp.raise_for_status()
78
+
79
+ # -- JSON helpers -------------------------------------------------------
80
+
81
+ def get_json(self, key: str) -> Optional[Any]:
82
+ """Retrieve and JSON-decode a key. Returns None on miss or error."""
83
+ try:
84
+ resp = self._req.get(
85
+ f"{self._url}/get/{key}",
86
+ headers=self._headers,
87
+ timeout=5,
88
+ )
89
+ resp.raise_for_status()
90
+ result = resp.json().get("result")
91
+ if result is None:
92
+ return None
93
+ return json.loads(result)
94
+ except Exception as exc:
95
+ log.debug("Redis GET %s failed: %s", key, exc)
96
+ return None
97
+
98
+ def set_json(self, key: str, value: Any, ttl: int = 300) -> bool:
99
+ """JSON-encode and store *value* with a TTL in seconds."""
100
+ try:
101
+ payload = json.dumps(value, default=str)
102
+ # Upstash REST API: POST pipeline format
103
+ resp = self._req.post(
104
+ f"{self._url}/pipeline",
105
+ headers={**self._headers, "Content-Type": "application/json"},
106
+ json=[["SET", key, payload, "EX", str(ttl)]],
107
+ timeout=5,
108
+ )
109
+ resp.raise_for_status()
110
+ return True
111
+ except Exception as exc:
112
+ log.debug("Redis SET %s failed: %s", key, exc)
113
+ return False
114
+
115
+ def delete(self, key: str) -> bool:
116
+ """Delete a key."""
117
+ try:
118
+ resp = self._req.get(
119
+ f"{self._url}/del/{key}",
120
+ headers=self._headers,
121
+ timeout=5,
122
+ )
123
+ resp.raise_for_status()
124
+ return True
125
+ except Exception as exc:
126
+ log.debug("Redis DEL %s failed: %s", key, exc)
127
+ return False
128
+
129
+ def exists(self, key: str) -> bool:
130
+ """Check if a key exists."""
131
+ try:
132
+ resp = self._req.get(
133
+ f"{self._url}/exists/{key}",
134
+ headers=self._headers,
135
+ timeout=5,
136
+ )
137
+ resp.raise_for_status()
138
+ return resp.json().get("result", 0) == 1
139
+ except Exception:
140
+ return False
141
+
142
+ def ping(self) -> bool:
143
+ """Health check."""
144
+ try:
145
+ resp = self._req.get(
146
+ f"{self._url}/ping",
147
+ headers=self._headers,
148
+ timeout=5,
149
+ )
150
+ return resp.status_code == 200
151
+ except Exception:
152
+ return False
src/data/sensor_data_loader.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SensorDataLoader: load and filter sensors_wide.csv for Stage 1 (Farquhar model).
3
+ Uses only on-site sensor data from the sensor data directory.
4
+ """
5
+
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ import pandas as pd
10
+
11
+
12
+ # Stage 1 columns (Farquhar + CWSI) per context/2_plan.md
13
+ STAGE1_COLUMNS = [
14
+ "Air1_PAR_ref",
15
+ "Air1_leafTemperature_ref",
16
+ "Air1_airTemperature_ref",
17
+ "Air1_CO2_ref",
18
+ "Air1_VPD_ref",
19
+ "Air1_airHumidity_ref",
20
+ ]
21
+ # Optional spectral indices (Crop sensors); include if present
22
+ STAGE1_OPTIONAL = ["Air1_NDVI_ref", "Air1_PRI_ref", "Air1_rNDVI_ref", "Air1_RENDVI_ref"]
23
+
24
+ # Default timestamp column name in wide CSV
25
+ DEFAULT_TIMESTAMP_COL = "time"
26
+
27
+
28
+ class SensorDataLoader:
29
+ """Load sensors_wide.csv and provide Stage 1 columns and daytime filter."""
30
+
31
+ def __init__(
32
+ self,
33
+ data_path: Optional[Path] = None,
34
+ metadata_path: Optional[Path] = None,
35
+ ):
36
+ from config import settings
37
+
38
+ _default = settings.SENSORS_WIDE_PATH
39
+ if not _default.exists() and settings.SENSORS_WIDE_SAMPLE_PATH.exists():
40
+ _default = settings.SENSORS_WIDE_SAMPLE_PATH
41
+ self.data_path = data_path or _default
42
+ self.metadata_path = metadata_path or settings.SENSORS_WIDE_METADATA_PATH
43
+
44
+ def get_stage1_columns(self) -> list[str]:
45
+ """Return list of column names required for Stage 1 (Farquhar + CWSI)."""
46
+ return list(STAGE1_COLUMNS)
47
+
48
+ def load(
49
+ self,
50
+ columns: Optional[list[str]] = None,
51
+ timestamp_col: Optional[str] = None,
52
+ ) -> pd.DataFrame:
53
+ """
54
+ Load sensors_wide.csv. If columns is None, load all Stage 1 columns
55
+ plus timestamp. Columns not present are dropped from the request.
56
+ """
57
+ ts_col = timestamp_col or DEFAULT_TIMESTAMP_COL
58
+ use_cols = columns if columns is not None else self.get_stage1_columns()
59
+ use_cols = [c for c in use_cols if c != ts_col]
60
+ if ts_col not in use_cols:
61
+ use_cols = [ts_col] + use_cols
62
+
63
+ df = pd.read_csv(self.data_path, usecols=lambda c: c in use_cols)
64
+ missing = [c for c in use_cols if c not in df.columns]
65
+ if missing:
66
+ raise ValueError(
67
+ f"Sensor data missing required columns: {missing}. "
68
+ f"Available: {list(df.columns)[:20]}{'...' if len(df.columns) > 20 else ''}"
69
+ )
70
+ if ts_col in df.columns:
71
+ df[ts_col] = pd.to_datetime(df[ts_col], utc=True)
72
+ df = df.sort_values(ts_col).reset_index(drop=True)
73
+ # Correct Air1_CO2_ref — raw sensor reads ≈ 30% too high
74
+ if "Air1_CO2_ref" in df.columns:
75
+ df["Air1_CO2_ref"] = df["Air1_CO2_ref"] * 0.7
76
+ return df
77
+
78
+ def filter_daytime(
79
+ self,
80
+ df: pd.DataFrame,
81
+ par_threshold: float = 50.0,
82
+ par_column: str = "Air1_PAR_ref",
83
+ ) -> pd.DataFrame:
84
+ """Keep only rows where PAR > par_threshold (daytime, umol m-2 s-1)."""
85
+ if par_column not in df.columns:
86
+ return df
87
+ return df.loc[df[par_column] > par_threshold].copy()
src/data/thingsboard_client.py ADDED
@@ -0,0 +1,1058 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ThingsBoardClient: live telemetry client for the Seymour vineyard at
3
+ web.seymouragri.com.
4
+
5
+ Device layout
6
+ -------------
7
+ TREATMENT area (rows 501–502, under solar panels):
8
+ Air2, Air3, Air4 — microclimate sensors under the panels
9
+ Crop3, Crop5, Crop6, Crop7 — fruiting-zone crop sensors (per panel position)
10
+ Soil1, Soil3, Soil5, Soil6 — root-zone soil probes
11
+ Irrigation1 — irrigation flow/volume/quality logger
12
+ Thermocouples-1 — panel surface temperature (4 positions)
13
+
14
+ REFERENCE area (rows 503–504, open sky, no panels):
15
+ Crop1, Crop2, Crop4 — fruiting-zone crop sensors (no shading)
16
+ Soil2, Soil4, Soil7, Soil9 — root-zone soil probes
17
+ Thermocouples-2 — structural/ambient thermocouple reference
18
+
19
+ AMBIENT (site-level outdoor baseline):
20
+ Air1 — outdoor climate station (above canopy, no panel)
21
+
22
+ Credentials (env vars or .env):
23
+ THINGSBOARD_HOST — default https://web.seymouragri.com
24
+ THINGSBOARD_USERNAME — tenant login email
25
+ THINGSBOARD_PASSWORD — tenant login password
26
+ THINGSBOARD_TOKEN — pre-generated JWT (takes priority over user/pass)
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ import math
32
+ import os
33
+ import time
34
+ from concurrent.futures import ThreadPoolExecutor, as_completed
35
+ from dataclasses import dataclass, field
36
+ from datetime import datetime, timezone
37
+ from enum import Enum
38
+ from typing import Any, Dict, List, Optional, Tuple
39
+
40
+ import pandas as pd
41
+ import requests
42
+
43
+
44
+ # ---------------------------------------------------------------------------
45
+ # Enumerations
46
+ # ---------------------------------------------------------------------------
47
+
48
+ class VineArea(str, Enum):
49
+ TREATMENT = "treatment" # under solar panels
50
+ REFERENCE = "reference" # open sky, no panels
51
+ AMBIENT = "ambient" # site-level outdoor baseline
52
+
53
+
54
+ # ---------------------------------------------------------------------------
55
+ # Device registry
56
+ # ---------------------------------------------------------------------------
57
+
58
+ @dataclass(frozen=True)
59
+ class DeviceInfo:
60
+ uuid: str
61
+ device_id: int
62
+ area: VineArea
63
+ row: Optional[int]
64
+ label: str
65
+
66
+
67
+ #: Full device registry mapping short name → DeviceInfo.
68
+ #: UUIDs are from devices.csv in the Research/PV_Vine_Tradeoff repository.
69
+ DEVICE_REGISTRY: Dict[str, DeviceInfo] = {
70
+ "Air1": DeviceInfo(
71
+ uuid="373041f0-089a-11ef-9126-b746c27d34bd", device_id=4,
72
+ area=VineArea.AMBIENT, row=None, label="Outdoor Climate (ambient baseline)",
73
+ ),
74
+ "Air2": DeviceInfo(
75
+ uuid="37bf89a0-089a-11ef-9126-b746c27d34bd", device_id=5,
76
+ area=VineArea.TREATMENT, row=501, label="Indoor Climate Row 501 (under panels)",
77
+ ),
78
+ "Air3": DeviceInfo(
79
+ uuid="3860aba0-089a-11ef-9126-b746c27d34bd", device_id=6,
80
+ area=VineArea.TREATMENT, row=502, label="Indoor Climate Row 502 (under panels)",
81
+ ),
82
+ "Air4": DeviceInfo(
83
+ uuid="04452660-7114-11ef-9360-f1ed9d9dc643", device_id=7,
84
+ area=VineArea.TREATMENT, row=502, label="Treatment Row 502 North (under panels)",
85
+ ),
86
+ "Crop1": DeviceInfo(
87
+ uuid="39224df0-089a-11ef-9126-b746c27d34bd", device_id=8,
88
+ area=VineArea.REFERENCE, row=503, label="Reference crop Row 503",
89
+ ),
90
+ "Crop2": DeviceInfo(
91
+ uuid="aa0d9970-7113-11ef-9360-f1ed9d9dc643", device_id=9,
92
+ area=VineArea.REFERENCE, row=503, label="Control crop Row 503",
93
+ ),
94
+ "Crop3": DeviceInfo(
95
+ uuid="859b3ce0-29dd-11f0-96bc-55874793181d", device_id=10,
96
+ area=VineArea.TREATMENT, row=502, label="Treatment 502 – West Bottom",
97
+ ),
98
+ "Crop4": DeviceInfo(
99
+ uuid="889765e0-29dd-11f0-96bc-55874793181d", device_id=11,
100
+ area=VineArea.REFERENCE, row=502, label="Control crop Row 502 (reference vine)",
101
+ ),
102
+ "Crop5": DeviceInfo(
103
+ uuid="8b092930-29dd-11f0-96bc-55874793181d", device_id=12,
104
+ area=VineArea.TREATMENT, row=502, label="Treatment 502 – East Upper",
105
+ ),
106
+ "Crop6": DeviceInfo(
107
+ uuid="8cce31c0-29dd-11f0-96bc-55874793181d", device_id=13,
108
+ area=VineArea.TREATMENT, row=502, label="Treatment 502 – East Bottom",
109
+ ),
110
+ "Crop7": DeviceInfo(
111
+ uuid="8e7440a0-29dd-11f0-96bc-55874793181d", device_id=14,
112
+ area=VineArea.TREATMENT, row=502, label="Treatment 502 – West Upper",
113
+ ),
114
+ "Soil1": DeviceInfo(
115
+ uuid="3586b0a0-089a-11ef-9126-b746c27d34bd", device_id=16,
116
+ area=VineArea.TREATMENT, row=502, label="Soil Row 502 (treatment)",
117
+ ),
118
+ "Soil2": DeviceInfo(
119
+ uuid="35cda4b0-089a-11ef-9126-b746c27d34bd", device_id=17,
120
+ area=VineArea.REFERENCE, row=503, label="Soil Row 503 (reference)",
121
+ ),
122
+ "Soil3": DeviceInfo(
123
+ uuid="3634caf0-089a-11ef-9126-b746c27d34bd", device_id=18,
124
+ area=VineArea.TREATMENT, row=501, label="Soil Row 501 (treatment)",
125
+ ),
126
+ "Soil4": DeviceInfo(
127
+ uuid="36a4cad0-089a-11ef-9126-b746c27d34bd", device_id=19,
128
+ area=VineArea.REFERENCE, row=504, label="Soil Row 504 Control",
129
+ ),
130
+ "Soil5": DeviceInfo(
131
+ uuid="77d55280-70e7-11ef-9360-f1ed9d9dc643", device_id=20,
132
+ area=VineArea.TREATMENT, row=502, label="Treatment Row 502 South",
133
+ ),
134
+ "Soil6": DeviceInfo(
135
+ uuid="7e4e4630-70e7-11ef-9360-f1ed9d9dc643", device_id=21,
136
+ area=VineArea.TREATMENT, row=502, label="Treatment Row 502 North",
137
+ ),
138
+ "Soil7": DeviceInfo(
139
+ uuid="842e5540-70e7-11ef-9360-f1ed9d9dc643", device_id=22,
140
+ area=VineArea.REFERENCE, row=504, label="Control 504 South",
141
+ ),
142
+ "Soil9": DeviceInfo(
143
+ uuid="91e44ff0-70e7-11ef-9360-f1ed9d9dc643", device_id=23,
144
+ area=VineArea.REFERENCE, row=504, label="Control 504 South (2nd probe)",
145
+ ),
146
+ "Irrigation1": DeviceInfo(
147
+ uuid="3a066c60-089a-11ef-9126-b746c27d34bd", device_id=15,
148
+ area=VineArea.TREATMENT, row=502, label="Irrigation Row 502",
149
+ ),
150
+ "Thermocouples1": DeviceInfo(
151
+ uuid="72ce88f0-c548-11ef-8bc2-fdab9f3349b7", device_id=2,
152
+ area=VineArea.TREATMENT, row=502, label="Panel surface temps Treatment 502",
153
+ ),
154
+ "Thermocouples2": DeviceInfo(
155
+ uuid="03e40ba0-cc0e-11ef-a2e9-55874793181d", device_id=3,
156
+ area=VineArea.REFERENCE, row=None, label="Panel/structure surface temps Reference",
157
+ ),
158
+ # Tracker controllers (panel angle + mode)
159
+ "Tracker501": DeviceInfo(
160
+ uuid="aac06e50-f769-11f0-b902-5ff1ea8c4cf9", device_id=0,
161
+ area=VineArea.TREATMENT, row=501, label="Tracker row 501",
162
+ ),
163
+ "Tracker502": DeviceInfo(
164
+ uuid="b99bd630-f769-11f0-b902-5ff1ea8c4cf9", device_id=0,
165
+ area=VineArea.TREATMENT, row=502, label="Tracker row 502",
166
+ ),
167
+ "Tracker503": DeviceInfo(
168
+ uuid="caffe4c0-f769-11f0-b902-5ff1ea8c4cf9", device_id=0,
169
+ area=VineArea.TREATMENT, row=503, label="Tracker row 503",
170
+ ),
171
+ "Tracker509": DeviceInfo(
172
+ uuid="bacf7c50-fcdc-11f0-b902-5ff1ea8c4cf9", device_id=0,
173
+ area=VineArea.TREATMENT, row=509, label="Tracker row 509",
174
+ ),
175
+ }
176
+
177
+ # ---------------------------------------------------------------------------
178
+ # Asset registry (non-device entities — e.g. the plant-level energy asset)
179
+ # ---------------------------------------------------------------------------
180
+
181
+ @dataclass(frozen=True)
182
+ class AssetInfo:
183
+ uuid: str
184
+ label: str
185
+
186
+ ASSET_REGISTRY: Dict[str, AssetInfo] = {
187
+ "Plant": AssetInfo(
188
+ uuid="dc94ddb0-dbe6-11f0-9352-a53ca0b6a212",
189
+ label="Yeruham Vineyard — plant-level energy",
190
+ ),
191
+ }
192
+
193
+ ENERGY_KEYS: List[str] = ["power", "production"]
194
+ TRACKER_KEYS: List[str] = ["angle", "manualMode", "setAngle", "setMode"]
195
+
196
+ # ---------------------------------------------------------------------------
197
+ # Telemetry key sets per device type
198
+ # ---------------------------------------------------------------------------
199
+
200
+ AIR_KEYS: List[str] = [
201
+ "airTemperature", "leafTemperature", "VPD", "CO2", "PAR", "DLI",
202
+ "airHumidity", "windSpeed", "windAngle", "rain", "airPressure",
203
+ "dewTemperature", "NDVI", "PRI", "airLeafDeltaT",
204
+ ]
205
+
206
+ CROP_KEYS: List[str] = [
207
+ "PAR", "leafTemperature", "NDVI", "PRI", "DLI", "PARAvg1H", "PARAvg24H",
208
+ ]
209
+
210
+ SOIL_KEYS: List[str] = [
211
+ "soilMoisture", "soilMoisture2",
212
+ "soilTemperature", "soilTemperature2",
213
+ "soilBulkEC", "soilpH",
214
+ ]
215
+
216
+ IRRIGATION_KEYS: List[str] = [
217
+ "irrigationVolume", "irrigationMinutes", "irrigationFlowRate",
218
+ "irrigationEC", "irrigationPH", "waterTemperature",
219
+ "irrigationCycleVolume", "irrigationCycleMinutes",
220
+ ]
221
+
222
+ THERMOCOUPLE_KEYS: List[str] = [
223
+ "thermocoupleTemperature_1", "thermocoupleTemperature_2",
224
+ "thermocoupleTemperature_3", "thermocoupleTemperature_4",
225
+ ]
226
+
227
+
228
+ # ---------------------------------------------------------------------------
229
+ # VineSnapshot dataclass
230
+ # ---------------------------------------------------------------------------
231
+
232
+ @dataclass
233
+ class VineSnapshot:
234
+ """
235
+ Aggregated real-time vine state from all ThingsBoard sensors.
236
+
237
+ Fields are grouped by area:
238
+ - ambient : Air1 (outdoor climate, site-level baseline)
239
+ - treatment : under solar panels (rows 501–502)
240
+ - reference : open sky / no panels (rows 503–504)
241
+
242
+ None means the sensor did not return a value.
243
+ """
244
+
245
+ snapshot_ts: datetime
246
+ staleness_minutes: float
247
+
248
+ # --- Ambient (Air1, outdoor baseline) ---
249
+ ambient_temp_c: Optional[float] = None
250
+ ambient_humidity_pct: Optional[float] = None
251
+ ambient_wind_speed_ms: Optional[float] = None
252
+ ambient_wind_angle_deg: Optional[float] = None
253
+ ambient_rain_mm: Optional[float] = None
254
+
255
+ # --- Treatment microclimate (avg of Air2 / Air3 / Air4) ---
256
+ treatment_air_temp_c: Optional[float] = None
257
+ treatment_leaf_temp_c: Optional[float] = None
258
+ treatment_vpd_kpa: Optional[float] = None
259
+ treatment_co2_ppm: Optional[float] = None
260
+ treatment_par_umol: Optional[float] = None
261
+ treatment_dli_mol_m2: Optional[float] = None
262
+ treatment_ndvi: Optional[float] = None
263
+ treatment_pri: Optional[float] = None
264
+ treatment_air_leaf_delta_t: Optional[float] = None
265
+
266
+ # --- Treatment crop (avg of Crop3 / Crop5 / Crop6 / Crop7) ---
267
+ treatment_crop_par_umol: Optional[float] = None
268
+ treatment_crop_leaf_temp_c: Optional[float] = None
269
+ treatment_crop_ndvi: Optional[float] = None
270
+ treatment_crop_dli_mol_m2: Optional[float] = None
271
+ treatment_crop_par_avg1h: Optional[float] = None
272
+ # Per-panel-position readings {position_label: {par, leaf_temp, ndvi}}
273
+ treatment_crop_by_position: Dict[str, Dict[str, Optional[float]]] = field(default_factory=dict)
274
+
275
+ # --- Reference crop (avg of Crop1 / Crop2 / Crop4) ---
276
+ reference_crop_par_umol: Optional[float] = None
277
+ reference_crop_leaf_temp_c: Optional[float] = None
278
+ reference_crop_ndvi: Optional[float] = None
279
+ reference_crop_dli_mol_m2: Optional[float] = None
280
+ reference_crop_by_position: Dict[str, Dict[str, Optional[float]]] = field(default_factory=dict)
281
+
282
+ # --- PAR shading ratio: treatment_crop_par / reference_crop_par ---
283
+ par_shading_ratio: Optional[float] = None # <1 = panels are shading
284
+
285
+ # --- Treatment soil (avg of Soil1 / Soil3 / Soil5 / Soil6) ---
286
+ treatment_soil_moisture_pct: Optional[float] = None
287
+ treatment_soil_temp_c: Optional[float] = None
288
+ treatment_soil_ec_ds_m: Optional[float] = None
289
+ treatment_soil_ph: Optional[float] = None
290
+
291
+ # --- Reference soil (avg of Soil2 / Soil4 / Soil7 / Soil9) ---
292
+ reference_soil_moisture_pct: Optional[float] = None
293
+ reference_soil_temp_c: Optional[float] = None
294
+
295
+ # --- Irrigation (Irrigation1, row 502 treatment) ---
296
+ irrigation_last_volume_l: Optional[float] = None
297
+ irrigation_last_minutes: Optional[float] = None
298
+ irrigation_ec: Optional[float] = None
299
+ irrigation_ph: Optional[float] = None
300
+ water_temp_c: Optional[float] = None
301
+
302
+ # --- Panel surface temperatures ---
303
+ treatment_panel_temp_c: Optional[float] = None # avg Thermocouples1 positions 1-4
304
+ reference_panel_temp_c: Optional[float] = None # avg Thermocouples2 positions 1-4
305
+
306
+ def to_advisor_text(self) -> str:
307
+ """Format snapshot for inclusion in an AI advisory prompt."""
308
+ age = f"{self.staleness_minutes:.0f}" if self.staleness_minutes < 120 else ">{:.0f}".format(self.staleness_minutes)
309
+ lines = [f"VINE STATE (ThingsBoard sensors, ~{age} min ago):"]
310
+
311
+ lines.append(" TREATMENT area (rows 501-502, under solar panels):")
312
+ if self.treatment_air_temp_c is not None:
313
+ lines.append(f" Air temperature: {self.treatment_air_temp_c:.1f} C")
314
+ if self.treatment_leaf_temp_c is not None:
315
+ lines.append(f" Leaf temperature: {self.treatment_leaf_temp_c:.1f} C")
316
+ if self.treatment_air_leaf_delta_t is not None:
317
+ lines.append(f" Air-leaf delta-T: {self.treatment_air_leaf_delta_t:+.1f} C (proxy for heat stress)")
318
+ if self.treatment_vpd_kpa is not None:
319
+ lines.append(f" VPD: {self.treatment_vpd_kpa:.2f} kPa")
320
+ if self.treatment_co2_ppm is not None:
321
+ lines.append(f" CO2: {self.treatment_co2_ppm:.0f} ppm")
322
+ if self.treatment_crop_par_umol is not None:
323
+ lines.append(f" Fruiting-zone PAR: {self.treatment_crop_par_umol:.0f} umol/m2/s (avg of Crop3/5/6/7)")
324
+ if self.treatment_crop_dli_mol_m2 is not None:
325
+ lines.append(f" DLI today so far: {self.treatment_crop_dli_mol_m2:.1f} mol/m2/day")
326
+ if self.treatment_crop_ndvi is not None:
327
+ lines.append(f" Canopy NDVI: {self.treatment_crop_ndvi:.3f}")
328
+ if self.treatment_soil_moisture_pct is not None:
329
+ lines.append(f" Soil moisture: {self.treatment_soil_moisture_pct:.1f}% (avg Soil1/3/5/6)")
330
+ if self.treatment_soil_temp_c is not None:
331
+ lines.append(f" Soil temperature: {self.treatment_soil_temp_c:.1f} C")
332
+ if self.treatment_panel_temp_c is not None:
333
+ lines.append(f" Panel surface temp: {self.treatment_panel_temp_c:.1f} C")
334
+
335
+ if self.treatment_crop_by_position:
336
+ lines.append(" Per-position PAR (Crop sensors):")
337
+ for pos, vals in self.treatment_crop_by_position.items():
338
+ par = vals.get("par")
339
+ lt = vals.get("leaf_temp")
340
+ par_str = f"{par:.0f} umol/m2/s" if par is not None else "N/A"
341
+ lt_str = f" | leaf {lt:.1f} C" if lt is not None else ""
342
+ lines.append(f" {pos}: PAR {par_str}{lt_str}")
343
+
344
+ lines.append("")
345
+ lines.append(" REFERENCE area (rows 503-504, open sky, no panels):")
346
+ if self.reference_crop_par_umol is not None:
347
+ lines.append(f" Fruiting-zone PAR: {self.reference_crop_par_umol:.0f} umol/m2/s (avg of Crop1/2/4)")
348
+ if self.reference_crop_leaf_temp_c is not None:
349
+ lines.append(f" Leaf temperature: {self.reference_crop_leaf_temp_c:.1f} C")
350
+ if self.reference_crop_ndvi is not None:
351
+ lines.append(f" Canopy NDVI: {self.reference_crop_ndvi:.3f}")
352
+ if self.reference_soil_moisture_pct is not None:
353
+ lines.append(f" Soil moisture: {self.reference_soil_moisture_pct:.1f}% (avg Soil2/4/7/9)")
354
+ if self.reference_crop_by_position:
355
+ lines.append(" Per-position PAR (Crop sensors):")
356
+ for pos, vals in self.reference_crop_by_position.items():
357
+ par = vals.get("par")
358
+ par_str = f"{par:.0f} umol/m2/s" if par is not None else "N/A"
359
+ lines.append(f" {pos}: PAR {par_str}")
360
+
361
+ if self.par_shading_ratio is not None:
362
+ reduction_pct = (1 - self.par_shading_ratio) * 100
363
+ lines.append("")
364
+ lines.append(f" PAR shading ratio (treatment/reference): {self.par_shading_ratio:.2f}"
365
+ f" ({reduction_pct:.0f}% reduction by panels)")
366
+
367
+ if self.ambient_temp_c is not None:
368
+ lines.append("")
369
+ lines.append(" AMBIENT (outdoor baseline, Air1):")
370
+ lines.append(f" Air temperature: {self.ambient_temp_c:.1f} C")
371
+ if self.ambient_wind_speed_ms is not None:
372
+ lines.append(f" Wind speed: {self.ambient_wind_speed_ms:.1f} m/s")
373
+ if self.ambient_rain_mm is not None and self.ambient_rain_mm > 0:
374
+ lines.append(f" Rain: {self.ambient_rain_mm:.1f} mm")
375
+
376
+ any_irrigation = any(v is not None for v in [
377
+ self.irrigation_last_volume_l, self.irrigation_last_minutes,
378
+ self.irrigation_ec, self.irrigation_ph,
379
+ ])
380
+ if any_irrigation:
381
+ lines.append("")
382
+ lines.append(" IRRIGATION (Irrigation1, row 502):")
383
+ if self.irrigation_last_volume_l is not None:
384
+ lines.append(f" Last cycle volume: {self.irrigation_last_volume_l:.0f} L")
385
+ if self.irrigation_last_minutes is not None:
386
+ lines.append(f" Duration: {self.irrigation_last_minutes:.0f} min")
387
+ if self.irrigation_ec is not None:
388
+ lines.append(f" EC: {self.irrigation_ec:.2f} dS/m")
389
+ if self.irrigation_ph is not None:
390
+ lines.append(f" pH: {self.irrigation_ph:.1f}")
391
+ if self.water_temp_c is not None:
392
+ lines.append(f" Water temperature: {self.water_temp_c:.1f} C")
393
+
394
+ return "\n".join(lines)
395
+
396
+ def to_dict(self) -> Dict[str, Any]:
397
+ """Return a flat dict suitable for JSON serialization (e.g., chatbot tool result)."""
398
+ out: Dict[str, Any] = {
399
+ "snapshot_ts": self.snapshot_ts.isoformat(),
400
+ "staleness_minutes": round(self.staleness_minutes, 1),
401
+ }
402
+ for attr in (
403
+ "ambient_temp_c", "ambient_humidity_pct", "ambient_wind_speed_ms",
404
+ "ambient_wind_angle_deg", "ambient_rain_mm",
405
+ "treatment_air_temp_c", "treatment_leaf_temp_c", "treatment_vpd_kpa",
406
+ "treatment_co2_ppm", "treatment_par_umol", "treatment_dli_mol_m2",
407
+ "treatment_ndvi", "treatment_pri", "treatment_air_leaf_delta_t",
408
+ "treatment_crop_par_umol", "treatment_crop_leaf_temp_c",
409
+ "treatment_crop_ndvi", "treatment_crop_dli_mol_m2", "treatment_crop_par_avg1h",
410
+ "reference_crop_par_umol", "reference_crop_leaf_temp_c",
411
+ "reference_crop_ndvi", "reference_crop_dli_mol_m2",
412
+ "par_shading_ratio",
413
+ "treatment_soil_moisture_pct", "treatment_soil_temp_c",
414
+ "treatment_soil_ec_ds_m", "treatment_soil_ph",
415
+ "reference_soil_moisture_pct", "reference_soil_temp_c",
416
+ "irrigation_last_volume_l", "irrigation_last_minutes",
417
+ "irrigation_ec", "irrigation_ph", "water_temp_c",
418
+ "treatment_panel_temp_c", "reference_panel_temp_c",
419
+ ):
420
+ val = getattr(self, attr)
421
+ out[attr] = round(val, 3) if val is not None else None
422
+ out["treatment_crop_by_position"] = self.treatment_crop_by_position
423
+ out["reference_crop_by_position"] = self.reference_crop_by_position
424
+ return out
425
+
426
+
427
+ # ---------------------------------------------------------------------------
428
+ # Configuration
429
+ # ---------------------------------------------------------------------------
430
+
431
+ @dataclass
432
+ class ThingsBoardConfig:
433
+ """ThingsBoard connection settings. Data retrieval always uses prod (Seymour)."""
434
+ # Prod only — test (eu.thingsboard.cloud) is for deploying apps, not data
435
+ host: str = os.environ.get("THINGSBOARD_HOST", "https://web.seymouragri.com/")
436
+ username: Optional[str] = (
437
+ os.environ.get("THINGSBOARD_USERNAME") or os.environ.get("TB_USERNAME")
438
+ )
439
+ password: Optional[str] = (
440
+ os.environ.get("THINGSBOARD_PASSWORD") or os.environ.get("TB_PASSWORD")
441
+ )
442
+ token: Optional[str] = os.environ.get("THINGSBOARD_TOKEN")
443
+
444
+
445
+ # ---------------------------------------------------------------------------
446
+ # Client
447
+ # ---------------------------------------------------------------------------
448
+
449
+ class ThingsBoardClient:
450
+ """
451
+ Minimal ThingsBoard client for the Seymour vineyard.
452
+
453
+ Authentication
454
+ --------------
455
+ Provide THINGSBOARD_TOKEN for a pre-generated JWT, or
456
+ THINGSBOARD_USERNAME + THINGSBOARD_PASSWORD for login-based auth.
457
+ Tokens are cached and refreshed automatically before they expire.
458
+
459
+ Usage
460
+ -----
461
+ client = ThingsBoardClient()
462
+ snapshot = client.get_vine_snapshot()
463
+ print(snapshot.to_advisor_text())
464
+ """
465
+
466
+ _TOKEN_TTL_SECONDS = 8_000 # ThingsBoard default is 9000 s; be conservative
467
+
468
+ def __init__(self, config: Optional[ThingsBoardConfig] = None) -> None:
469
+ self.config = config or ThingsBoardConfig()
470
+ self._session = requests.Session()
471
+ self._session.headers.update({"Content-Type": "application/json"})
472
+ self._jwt: Optional[str] = None
473
+ self._jwt_expires_at: float = 0.0
474
+
475
+ # ------------------------------------------------------------------
476
+ # Authentication
477
+ # ------------------------------------------------------------------
478
+
479
+ def _ensure_jwt(self) -> str:
480
+ """Return a valid JWT, obtaining or refreshing as needed."""
481
+ if self.config.token:
482
+ if "X-Authorization" not in self._session.headers:
483
+ self._session.headers["X-Authorization"] = f"Bearer {self.config.token}"
484
+ return self.config.token
485
+
486
+ if self._jwt and time.monotonic() < self._jwt_expires_at:
487
+ return self._jwt
488
+
489
+ if not self.config.username or not self.config.password:
490
+ raise RuntimeError(
491
+ "ThingsBoard authentication requires THINGSBOARD_TOKEN "
492
+ "or both THINGSBOARD_USERNAME and THINGSBOARD_PASSWORD."
493
+ )
494
+
495
+ url = f"{self.config.host.rstrip('/')}/api/auth/login"
496
+ resp = self._session.post(
497
+ url,
498
+ json={"username": self.config.username, "password": self.config.password},
499
+ timeout=10,
500
+ )
501
+ resp.raise_for_status()
502
+ token = resp.json()["token"]
503
+ self._jwt = token
504
+ self._jwt_expires_at = time.monotonic() + self._TOKEN_TTL_SECONDS
505
+ self._session.headers["X-Authorization"] = f"Bearer {token}"
506
+ return token
507
+
508
+ # ------------------------------------------------------------------
509
+ # Low-level API calls
510
+ # ------------------------------------------------------------------
511
+
512
+ # ------------------------------------------------------------------
513
+ # Shared low-level helpers (DEVICE and ASSET use the same REST API,
514
+ # differing only in the entity-type path segment).
515
+ # ------------------------------------------------------------------
516
+
517
+ def _fetch_latest_raw(
518
+ self,
519
+ entity_type: str,
520
+ uuid: str,
521
+ keys: List[str],
522
+ ) -> Tuple[Dict[str, Optional[float]], Optional[datetime]]:
523
+ """Fetch most-recent telemetry for any entity type (DEVICE or ASSET)."""
524
+ self._ensure_jwt()
525
+ url = (
526
+ f"{self.config.host.rstrip('/')}/api/plugins/telemetry/{entity_type}"
527
+ f"/{uuid}/values/timeseries"
528
+ )
529
+ resp = self._session.get(url, params={"keys": ",".join(keys)}, timeout=15)
530
+ resp.raise_for_status()
531
+ raw: Dict[str, List[Dict]] = resp.json()
532
+
533
+ values: Dict[str, Optional[float]] = {}
534
+ newest_ts_ms: Optional[int] = None
535
+ for key in keys:
536
+ entries = raw.get(key, [])
537
+ if entries:
538
+ values[key] = _safe_float(entries[0]["value"])
539
+ ts_ms = entries[0].get("ts")
540
+ if ts_ms and (newest_ts_ms is None or ts_ms > newest_ts_ms):
541
+ newest_ts_ms = ts_ms
542
+ else:
543
+ values[key] = None
544
+
545
+ newest_ts = (
546
+ datetime.fromtimestamp(newest_ts_ms / 1000, tz=timezone.utc)
547
+ if newest_ts_ms else None
548
+ )
549
+ return values, newest_ts
550
+
551
+ def _fetch_timeseries_raw(
552
+ self,
553
+ entity_type: str,
554
+ uuid: str,
555
+ keys: List[str],
556
+ start: datetime,
557
+ end: datetime,
558
+ limit: int = 1000,
559
+ interval_ms: int = 900_000,
560
+ agg: str = "NONE",
561
+ ) -> pd.DataFrame:
562
+ """Fetch time-series telemetry for any entity type (DEVICE or ASSET)."""
563
+ self._ensure_jwt()
564
+ start_ms = int(start.timestamp() * 1000)
565
+ end_ms = int(end.timestamp() * 1000)
566
+ url = (
567
+ f"{self.config.host.rstrip('/')}/api/plugins/telemetry/{entity_type}"
568
+ f"/{uuid}/values/timeseries"
569
+ )
570
+ params: Dict[str, Any] = {
571
+ "keys": ",".join(keys),
572
+ "startTs": start_ms,
573
+ "endTs": end_ms,
574
+ "limit": limit,
575
+ "agg": agg,
576
+ }
577
+ if agg != "NONE":
578
+ params["interval"] = interval_ms
579
+
580
+ resp = self._session.get(url, params=params, timeout=30)
581
+ resp.raise_for_status()
582
+ raw: Dict[str, List[Dict]] = resp.json()
583
+
584
+ frames: Dict[str, pd.Series] = {}
585
+ for key, entries in raw.items():
586
+ if key in keys and entries:
587
+ ts = pd.to_datetime([e["ts"] for e in entries], unit="ms", utc=True)
588
+ vals = [_safe_float(e["value"]) for e in entries]
589
+ frames[key] = pd.Series(vals, index=ts)
590
+
591
+ if not frames:
592
+ return pd.DataFrame()
593
+ return pd.DataFrame(frames).sort_index()
594
+
595
+ # ------------------------------------------------------------------
596
+ # Device API (public)
597
+ # ------------------------------------------------------------------
598
+
599
+ def _fetch_latest(
600
+ self,
601
+ device_name: str,
602
+ keys: List[str],
603
+ ) -> Tuple[Dict[str, Optional[float]], Optional[datetime]]:
604
+ """Fetch most-recent values for a named device."""
605
+ info = DEVICE_REGISTRY[device_name]
606
+ return self._fetch_latest_raw("DEVICE", info.uuid, keys)
607
+
608
+ def get_latest_telemetry(
609
+ self,
610
+ device_name: str,
611
+ keys: List[str],
612
+ ) -> Dict[str, Optional[float]]:
613
+ """Return the most recent value for each key. Missing keys return None."""
614
+ if device_name not in DEVICE_REGISTRY:
615
+ raise KeyError(
616
+ f"Unknown device: {device_name!r}. "
617
+ f"Valid names: {sorted(DEVICE_REGISTRY)}"
618
+ )
619
+ values, _ = self._fetch_latest(device_name, keys)
620
+ return values
621
+
622
+ def get_timeseries(
623
+ self,
624
+ device_name: str,
625
+ keys: List[str],
626
+ start: datetime,
627
+ end: datetime,
628
+ limit: int = 1000,
629
+ interval_ms: int = 900_000, # 15 minutes
630
+ agg: str = "NONE",
631
+ ) -> pd.DataFrame:
632
+ """Fetch time-series telemetry for a named device."""
633
+ if device_name not in DEVICE_REGISTRY:
634
+ raise KeyError(f"Unknown device: {device_name!r}")
635
+ info = DEVICE_REGISTRY[device_name]
636
+ return self._fetch_timeseries_raw(
637
+ "DEVICE", info.uuid, keys, start, end, limit, interval_ms, agg,
638
+ )
639
+
640
+ # ------------------------------------------------------------------
641
+ # Asset API (public)
642
+ # ------------------------------------------------------------------
643
+
644
+ def get_asset_timeseries(
645
+ self,
646
+ asset_name: str,
647
+ keys: List[str],
648
+ start: datetime,
649
+ end: datetime,
650
+ limit: int = 1000,
651
+ interval_ms: int = 3_600_000, # 1 hour
652
+ agg: str = "SUM",
653
+ ) -> pd.DataFrame:
654
+ """Fetch time-series from a ThingsBoard ASSET (e.g. Plant energy)."""
655
+ if asset_name not in ASSET_REGISTRY:
656
+ raise KeyError(f"Unknown asset: {asset_name!r}. Valid: {sorted(ASSET_REGISTRY)}")
657
+ info = ASSET_REGISTRY[asset_name]
658
+ return self._fetch_timeseries_raw(
659
+ "ASSET", info.uuid, keys, start, end, limit, interval_ms, agg,
660
+ )
661
+
662
+ def get_asset_latest(
663
+ self,
664
+ asset_name: str,
665
+ keys: List[str],
666
+ ) -> Dict[str, Optional[float]]:
667
+ """Fetch latest telemetry from a ThingsBoard ASSET."""
668
+ if asset_name not in ASSET_REGISTRY:
669
+ raise KeyError(f"Unknown asset: {asset_name!r}")
670
+ info = ASSET_REGISTRY[asset_name]
671
+ values, _ = self._fetch_latest_raw("ASSET", info.uuid, keys)
672
+ return values
673
+
674
+ # ------------------------------------------------------------------
675
+ # Device commands (RPC + attribute writes)
676
+ # ------------------------------------------------------------------
677
+
678
+ def send_rpc_command(
679
+ self,
680
+ device_name: str,
681
+ method: str,
682
+ params: Any = None,
683
+ timeout: float = 10.0,
684
+ ) -> Dict[str, Any]:
685
+ """Send a two-way RPC command to a device.
686
+
687
+ Uses POST /api/plugins/rpc/twoway/{deviceId}.
688
+ Falls back to one-way if two-way returns 404.
689
+ """
690
+ if device_name not in DEVICE_REGISTRY:
691
+ raise KeyError(f"Unknown device: {device_name!r}")
692
+ info = DEVICE_REGISTRY[device_name]
693
+ self._ensure_jwt()
694
+
695
+ payload = {"method": method, "params": params if params is not None else {}}
696
+
697
+ # Try two-way RPC first
698
+ url = (
699
+ f"{self.config.host.rstrip('/')}/api/plugins/rpc/twoway"
700
+ f"/{info.uuid}"
701
+ )
702
+ resp = self._session.post(url, json=payload, timeout=timeout)
703
+ if resp.status_code in (404, 405):
704
+ # Fallback to one-way RPC
705
+ url = (
706
+ f"{self.config.host.rstrip('/')}/api/plugins/rpc/oneway"
707
+ f"/{info.uuid}"
708
+ )
709
+ resp = self._session.post(url, json=payload, timeout=timeout)
710
+ resp.raise_for_status()
711
+ try:
712
+ return resp.json()
713
+ except Exception:
714
+ return {"status": "ok", "status_code": resp.status_code}
715
+
716
+ def set_device_attributes(
717
+ self,
718
+ device_name: str,
719
+ attributes: Dict[str, Any],
720
+ scope: str = "SHARED_SCOPE",
721
+ ) -> None:
722
+ """Write server-side attributes to a device.
723
+
724
+ Uses POST /api/plugins/telemetry/DEVICE/{id}/attributes/{scope}.
725
+ This is an alternative to RPC for setting tracker targets.
726
+ """
727
+ if device_name not in DEVICE_REGISTRY:
728
+ raise KeyError(f"Unknown device: {device_name!r}")
729
+ info = DEVICE_REGISTRY[device_name]
730
+ self._ensure_jwt()
731
+
732
+ url = (
733
+ f"{self.config.host.rstrip('/')}/api/plugins/telemetry/DEVICE"
734
+ f"/{info.uuid}/attributes/{scope}"
735
+ )
736
+ resp = self._session.post(url, json=attributes, timeout=10)
737
+ resp.raise_for_status()
738
+
739
+ # ------------------------------------------------------------------
740
+ # High-level vine snapshot
741
+ # ------------------------------------------------------------------
742
+
743
+ # Dashboard-only: 4 devices for farmer view (temp, soil, irrigation)
744
+ _DASHBOARD_FETCH_PLAN: Dict[str, List[str]] = {
745
+ "Air1": AIR_KEYS, # ambient weather
746
+ "Air2": AIR_KEYS, # treatment air
747
+ "Soil1": SOIL_KEYS, # treatment soil
748
+ "Irrigation1": IRRIGATION_KEYS,
749
+ }
750
+
751
+ # Light mode: 6 devices (adds crop PAR for chatbot/detailed view)
752
+ _LIGHT_FETCH_PLAN: Dict[str, List[str]] = {
753
+ "Air1": AIR_KEYS, # ambient
754
+ "Air2": AIR_KEYS, # treatment air (one representative)
755
+ "Crop1": CROP_KEYS, # reference crop
756
+ "Crop3": CROP_KEYS, # treatment crop
757
+ "Soil1": SOIL_KEYS, # treatment soil
758
+ "Irrigation1": IRRIGATION_KEYS,
759
+ }
760
+
761
+ _FULL_FETCH_PLAN: Dict[str, List[str]] = {
762
+ "Air1": AIR_KEYS,
763
+ "Air2": AIR_KEYS,
764
+ "Air3": AIR_KEYS,
765
+ "Air4": AIR_KEYS,
766
+ "Crop1": CROP_KEYS,
767
+ "Crop2": CROP_KEYS,
768
+ "Crop3": CROP_KEYS,
769
+ "Crop4": CROP_KEYS,
770
+ "Crop5": CROP_KEYS,
771
+ "Crop6": CROP_KEYS,
772
+ "Crop7": CROP_KEYS,
773
+ "Soil1": SOIL_KEYS,
774
+ "Soil2": SOIL_KEYS,
775
+ "Soil3": SOIL_KEYS,
776
+ "Soil4": SOIL_KEYS,
777
+ "Soil5": SOIL_KEYS,
778
+ "Soil6": SOIL_KEYS,
779
+ "Soil7": SOIL_KEYS,
780
+ "Soil9": SOIL_KEYS,
781
+ "Irrigation1": IRRIGATION_KEYS,
782
+ "Thermocouples1": THERMOCOUPLE_KEYS,
783
+ "Thermocouples2": THERMOCOUPLE_KEYS,
784
+ }
785
+
786
+ def get_vine_snapshot(self, light: bool = False,
787
+ mode: Optional[str] = None) -> VineSnapshot:
788
+ """
789
+ Fetch latest telemetry from all relevant devices and return an
790
+ aggregated VineSnapshot distinguishing treatment vs reference areas.
791
+
792
+ Uses a thread pool to parallelise HTTP requests.
793
+ Individual device failures are silently skipped (returns None fields).
794
+
795
+ Parameters
796
+ ----------
797
+ light : bool
798
+ If True, fetch only ~6 key devices instead of all 21.
799
+ mode : str, optional
800
+ "dashboard" = 4 devices only (air + soil + irrigation).
801
+ Overrides `light` when set.
802
+ """
803
+ if mode == "dashboard":
804
+ fetch_plan = self._DASHBOARD_FETCH_PLAN
805
+ elif light:
806
+ fetch_plan = self._LIGHT_FETCH_PLAN
807
+ else:
808
+ fetch_plan = self._FULL_FETCH_PLAN
809
+
810
+ # Ensure auth token before spawning threads (avoid race on login)
811
+ self._ensure_jwt()
812
+
813
+ raw_results: Dict[str, Dict[str, Optional[float]]] = {}
814
+ newest_ts_overall: Optional[datetime] = None
815
+
816
+ with ThreadPoolExecutor(max_workers=8) as pool:
817
+ future_map = {
818
+ pool.submit(self._fetch_latest, name, keys): name
819
+ for name, keys in fetch_plan.items()
820
+ }
821
+ for future in as_completed(future_map, timeout=25):
822
+ name = future_map[future]
823
+ try:
824
+ values, ts = future.result()
825
+ raw_results[name] = values
826
+ if ts and (newest_ts_overall is None or ts > newest_ts_overall):
827
+ newest_ts_overall = ts
828
+ except Exception:
829
+ raw_results[name] = {}
830
+
831
+ now = datetime.now(tz=timezone.utc)
832
+ staleness = (
833
+ (now - newest_ts_overall).total_seconds() / 60
834
+ if newest_ts_overall else float("nan")
835
+ )
836
+
837
+ # ---------- Ambient (Air1) ----------
838
+ air1 = raw_results.get("Air1", {})
839
+
840
+ # ---------- Treatment microclimate (Air2/3/4) ----------
841
+ treatment_air = [raw_results.get(d, {}) for d in ("Air2", "Air3", "Air4")]
842
+
843
+ # ---------- Treatment crop by position ----------
844
+ position_labels = {
845
+ "Crop3": "502-west-bottom",
846
+ "Crop5": "502-east-upper",
847
+ "Crop6": "502-east-bottom",
848
+ "Crop7": "502-west-upper",
849
+ }
850
+ treatment_crop_devs = {
851
+ label: raw_results.get(dev, {})
852
+ for dev, label in position_labels.items()
853
+ }
854
+ treatment_crop_by_pos: Dict[str, Dict[str, Optional[float]]] = {
855
+ label: {
856
+ "par": v.get("PAR"),
857
+ "leaf_temp": v.get("leafTemperature"),
858
+ "ndvi": v.get("NDVI"),
859
+ "dli": v.get("DLI"),
860
+ }
861
+ for label, v in treatment_crop_devs.items()
862
+ }
863
+
864
+ # ---------- Reference crop by position ----------
865
+ ref_position_labels = {
866
+ "Crop1": "503-ref",
867
+ "Crop2": "503-control",
868
+ "Crop4": "502-control",
869
+ }
870
+ reference_crop_devs = {
871
+ label: raw_results.get(dev, {})
872
+ for dev, label in ref_position_labels.items()
873
+ }
874
+ reference_crop_by_pos: Dict[str, Dict[str, Optional[float]]] = {
875
+ label: {
876
+ "par": v.get("PAR"),
877
+ "leaf_temp": v.get("leafTemperature"),
878
+ "ndvi": v.get("NDVI"),
879
+ "dli": v.get("DLI"),
880
+ }
881
+ for label, v in reference_crop_devs.items()
882
+ }
883
+
884
+ # ---------- Soil averages ----------
885
+ treatment_soil_devs = [raw_results.get(d, {}) for d in ("Soil1", "Soil3", "Soil5", "Soil6")]
886
+ reference_soil_devs = [raw_results.get(d, {}) for d in ("Soil2", "Soil4", "Soil7", "Soil9")]
887
+
888
+ def _avg_soil_moisture(devs: List[Dict]) -> Optional[float]:
889
+ all_vals = []
890
+ for d in devs:
891
+ for k in ("soilMoisture", "soilMoisture2"):
892
+ if d.get(k) is not None:
893
+ all_vals.append(d[k])
894
+ lo, hi = _BOUNDS["soil_moisture"]
895
+ return _bounded_avg(lo, hi, *all_vals) if all_vals else None
896
+
897
+ def _avg_soil_temp(devs: List[Dict]) -> Optional[float]:
898
+ all_vals = []
899
+ for d in devs:
900
+ for k in ("soilTemperature", "soilTemperature2"):
901
+ if d.get(k) is not None:
902
+ all_vals.append(d[k])
903
+ lo, hi = _BOUNDS["soil_temp"]
904
+ return _bounded_avg(lo, hi, *all_vals) if all_vals else None
905
+
906
+ # ---------- Panel temps ----------
907
+ tc1 = raw_results.get("Thermocouples1", {})
908
+ tc2 = raw_results.get("Thermocouples2", {})
909
+
910
+ irr = raw_results.get("Irrigation1", {})
911
+
912
+ # ---------- PAR shading ratio (bounded to reject sensor faults) ----------
913
+ t_par = _bounded_avg(*_BOUNDS["par"], *[v.get("PAR") for v in treatment_crop_devs.values()])
914
+ r_par = _bounded_avg(*_BOUNDS["par"], *[v.get("PAR") for v in reference_crop_devs.values()])
915
+ par_ratio: Optional[float] = None
916
+ if t_par is not None and r_par is not None and r_par > 0:
917
+ par_ratio = t_par / r_par
918
+
919
+ snapshot = VineSnapshot(
920
+ snapshot_ts=now,
921
+ staleness_minutes=staleness,
922
+
923
+ # Ambient — apply bounds to catch single-device faults too
924
+ ambient_temp_c=_bounded_avg(*_BOUNDS["air_temp"], air1.get("airTemperature")),
925
+ ambient_humidity_pct=_bounded_avg(0, 100, air1.get("airHumidity")),
926
+ ambient_wind_speed_ms=_bounded_avg(0, 60, air1.get("windSpeed")),
927
+ ambient_wind_angle_deg=_bounded_avg(0, 360, air1.get("windAngle")),
928
+ ambient_rain_mm=_bounded_avg(0, 500, air1.get("rain")),
929
+
930
+ # Treatment climate — bounded to reject sensor faults
931
+ treatment_air_temp_c=_bounded_avg(*_BOUNDS["air_temp"], *[d.get("airTemperature") for d in treatment_air]),
932
+ treatment_leaf_temp_c=_bounded_avg(*_BOUNDS["leaf_temp"], *[d.get("leafTemperature") for d in treatment_air]),
933
+ treatment_vpd_kpa=_bounded_avg(*_BOUNDS["vpd"], *[d.get("VPD") for d in treatment_air]),
934
+ treatment_co2_ppm=_bounded_avg(*_BOUNDS["co2"], *[d.get("CO2") for d in treatment_air]),
935
+ treatment_par_umol=_bounded_avg(*_BOUNDS["par"], *[d.get("PAR") for d in treatment_air]),
936
+ treatment_dli_mol_m2=_bounded_avg(*_BOUNDS["dli"], *[d.get("DLI") for d in treatment_air]),
937
+ treatment_ndvi=_bounded_avg(*_BOUNDS["ndvi"], *[d.get("NDVI") for d in treatment_air]),
938
+ treatment_pri=_bounded_avg(*_BOUNDS["pri"], *[d.get("PRI") for d in treatment_air]),
939
+ treatment_air_leaf_delta_t=_bounded_avg(-20, 20, *[d.get("airLeafDeltaT") for d in treatment_air]),
940
+
941
+ # Treatment crop
942
+ treatment_crop_par_umol=t_par,
943
+ treatment_crop_leaf_temp_c=_bounded_avg(
944
+ *_BOUNDS["leaf_temp"], *[v.get("leafTemperature") for v in treatment_crop_devs.values()]
945
+ ),
946
+ treatment_crop_ndvi=_bounded_avg(
947
+ *_BOUNDS["ndvi"], *[v.get("NDVI") for v in treatment_crop_devs.values()]
948
+ ),
949
+ treatment_crop_dli_mol_m2=_bounded_avg(
950
+ *_BOUNDS["dli"], *[v.get("DLI") for v in treatment_crop_devs.values()]
951
+ ),
952
+ treatment_crop_par_avg1h=_bounded_avg(
953
+ *_BOUNDS["par"], *[v.get("PARAvg1H") for v in treatment_crop_devs.values()]
954
+ ),
955
+ treatment_crop_by_position=treatment_crop_by_pos,
956
+
957
+ # Reference crop
958
+ reference_crop_par_umol=r_par,
959
+ reference_crop_leaf_temp_c=_bounded_avg(
960
+ *_BOUNDS["leaf_temp"], *[v.get("leafTemperature") for v in reference_crop_devs.values()]
961
+ ),
962
+ reference_crop_ndvi=_bounded_avg(
963
+ *_BOUNDS["ndvi"], *[v.get("NDVI") for v in reference_crop_devs.values()]
964
+ ),
965
+ reference_crop_dli_mol_m2=_bounded_avg(
966
+ *_BOUNDS["dli"], *[v.get("DLI") for v in reference_crop_devs.values()]
967
+ ),
968
+ reference_crop_by_position=reference_crop_by_pos,
969
+
970
+ par_shading_ratio=par_ratio,
971
+
972
+ # Treatment soil
973
+ treatment_soil_moisture_pct=_avg_soil_moisture(treatment_soil_devs),
974
+ treatment_soil_temp_c=_avg_soil_temp(treatment_soil_devs),
975
+ treatment_soil_ec_ds_m=_safe_avg(*[d.get("soilBulkEC") for d in treatment_soil_devs]),
976
+ treatment_soil_ph=_safe_avg(*[d.get("soilpH") for d in treatment_soil_devs]),
977
+
978
+ # Reference soil
979
+ reference_soil_moisture_pct=_avg_soil_moisture(reference_soil_devs),
980
+ reference_soil_temp_c=_avg_soil_temp(reference_soil_devs),
981
+
982
+ # Irrigation
983
+ irrigation_last_volume_l=irr.get("irrigationCycleVolume") or irr.get("irrigationVolume"),
984
+ irrigation_last_minutes=irr.get("irrigationCycleMinutes") or irr.get("irrigationMinutes"),
985
+ irrigation_ec=irr.get("irrigationEC"),
986
+ irrigation_ph=irr.get("irrigationPH"),
987
+ water_temp_c=irr.get("waterTemperature"),
988
+
989
+ # Panel temps
990
+ treatment_panel_temp_c=_bounded_avg(
991
+ *_BOUNDS["panel_temp"], *[tc1.get(k) for k in THERMOCOUPLE_KEYS]
992
+ ),
993
+ reference_panel_temp_c=_bounded_avg(
994
+ *_BOUNDS["panel_temp"], *[tc2.get(k) for k in THERMOCOUPLE_KEYS]
995
+ ),
996
+ )
997
+ return snapshot
998
+
999
+
1000
+ # ---------------------------------------------------------------------------
1001
+ # Helpers (module-level so threads can share without self)
1002
+ # ---------------------------------------------------------------------------
1003
+
1004
+ def _safe_float(val: Any) -> Optional[float]:
1005
+ """Convert a TB telemetry value string/number to float, or None on failure."""
1006
+ if val is None:
1007
+ return None
1008
+ try:
1009
+ f = float(val)
1010
+ return None if math.isnan(f) or math.isinf(f) else f
1011
+ except (TypeError, ValueError):
1012
+ return None
1013
+
1014
+
1015
+ def _safe_avg(*vals: Any) -> Optional[float]:
1016
+ """Return the mean of non-None, finite values, or None if none available."""
1017
+ valid = [v for v in vals if v is not None and isinstance(v, (int, float))
1018
+ and not math.isnan(v) and not math.isinf(v)]
1019
+ return sum(valid) / len(valid) if valid else None
1020
+
1021
+
1022
+ def _bounded_avg(lo: float, hi: float, *vals: Any) -> Optional[float]:
1023
+ """Return the mean of values within [lo, hi], rejecting sensor faults outside that range."""
1024
+ valid = [v for v in vals if v is not None and isinstance(v, (int, float))
1025
+ and not math.isnan(v) and not math.isinf(v) and lo <= v <= hi]
1026
+ return sum(valid) / len(valid) if valid else None
1027
+
1028
+
1029
+ # Physical plausibility bounds for Negev site
1030
+ _BOUNDS = {
1031
+ "air_temp": (-5.0, 55.0), # °C — extreme Negev range
1032
+ "leaf_temp": (-5.0, 60.0), # °C — leaves can exceed air under direct sun
1033
+ "soil_temp": (-2.0, 45.0), # °C — soil in Negev
1034
+ "soil_moisture": (0.0, 100.0), # %
1035
+ "par": (0.0, 3000.0), # µmol m⁻² s⁻¹
1036
+ "vpd": (0.0, 10.0), # kPa
1037
+ "co2": (300.0, 2000.0), # ppm
1038
+ "ndvi": (-1.0, 1.0),
1039
+ "pri": (-1.0, 1.0),
1040
+ "dli": (0.0, 80.0), # mol m⁻² day⁻¹
1041
+ "panel_temp": (-10.0, 100.0), # °C — panel surface
1042
+ }
1043
+
1044
+
1045
+ # ---------------------------------------------------------------------------
1046
+ # CLI smoke test
1047
+ # ---------------------------------------------------------------------------
1048
+
1049
+ if __name__ == "__main__":
1050
+ client = ThingsBoardClient()
1051
+ print("Fetching vine snapshot from ThingsBoard...")
1052
+ try:
1053
+ snap = client.get_vine_snapshot()
1054
+ print(snap.to_advisor_text())
1055
+ print(f"\nSnapshot age: {snap.staleness_minutes:.1f} min")
1056
+ except Exception as exc:
1057
+ print(f"Error: {exc}")
1058
+ print("Make sure THINGSBOARD_USERNAME/PASSWORD or THINGSBOARD_TOKEN are set in your .env")
src/data_providers.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ """Backward-compatible re-export from src.data.data_providers."""
2
+ from src.data.data_providers import * # noqa: F401, F403
src/data_schema.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ """Backward-compatible re-export from src.data.data_schema."""
2
+ from src.data.data_schema import * # noqa: F401, F403
src/day_ahead_advisor.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ """Backward-compatible re-export from src.advisor.day_ahead_advisor."""
2
+ from src.advisor.day_ahead_advisor import * # noqa: F401, F403
src/day_ahead_planner.py ADDED
@@ -0,0 +1,580 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ DayAheadPlanner: dynamic-programming trajectory optimizer for agrivoltaic control.
3
+
4
+ Given a day-ahead weather forecast (temperature, GHI) and the current energy
5
+ budget, finds the optimal tilt-offset trajectory for the next day that
6
+ maximises a combined utility of crop protection and energy generation.
7
+
8
+ Algorithm
9
+ ---------
10
+ For each 15-min slot t from sunrise to sunset:
11
+ 1. Predict vine state: Tleaf ≈ Tair (proxy), GHI from forecast, CWSI from
12
+ temperature heuristic, shading_helps from FvCB Rubisco transition.
13
+ 2. Run InterventionGate — if blocked, slot must stay at θ_astro (offset=0).
14
+ 3. For each candidate offset θ ∈ CANDIDATE_OFFSETS:
15
+ U_t(θ) = Price_energy · E_t(θ) + Price_crop · A_t(θ) − MovementCost(θ, θ_{t-1})
16
+ where E_t is energy generated and A_t is agronomic value (weighted by
17
+ phenological stage and zone).
18
+ 4. DP recurrence: V_t(θ) = U_t(θ) + max_{θ'} V_{t-1}(θ')
19
+ with cumulative energy sacrifice ≤ daily budget constraint.
20
+
21
+ The result is a DayAheadPlan: a list of SlotPlan objects, one per 15-min slot,
22
+ each containing the chosen offset, expected energy cost, and explainability tags.
23
+
24
+ References
25
+ ----------
26
+ - config/settings.py §Day-Ahead DP Planner
27
+ - context/2_plan.md §3.3
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import math
33
+ from dataclasses import dataclass, field
34
+ from datetime import date, datetime, timedelta
35
+ from typing import List, Optional
36
+
37
+ import numpy as np
38
+ import pandas as pd
39
+
40
+ from config.settings import (
41
+ CANDIDATE_OFFSETS,
42
+ DP_BASE_CROP_VALUE,
43
+ DP_FLAT_ENERGY_PRICE_ILS_KWH,
44
+ DP_MOVEMENT_COST,
45
+ DP_SLOT_DURATION_MIN,
46
+ NO_SHADE_BEFORE_HOUR,
47
+ SEMILLON_TRANSITION_TEMP_C,
48
+ SHADE_ELIGIBLE_CWSI_ABOVE,
49
+ SHADE_ELIGIBLE_GHI_ABOVE,
50
+ SHADE_ELIGIBLE_TLEAF_ABOVE,
51
+ STAGE_CROP_MULTIPLIER,
52
+ ZONE_CROP_WEIGHTS,
53
+ )
54
+
55
+
56
+ # ---------------------------------------------------------------------------
57
+ # Data containers
58
+ # ---------------------------------------------------------------------------
59
+
60
+ @dataclass
61
+ class SlotPlan:
62
+ """Planned tilt offset for a single 15-min slot."""
63
+
64
+ time: str # "HH:MM" UTC
65
+ offset_deg: float # degrees off astronomical tracking (0 = full tracking)
66
+ energy_cost_kwh: float # estimated energy sacrifice (kWh)
67
+ gate_passed: bool # whether InterventionGate allowed intervention
68
+ tags: List[str] = field(default_factory=list) # explainability tags
69
+
70
+
71
+ @dataclass
72
+ class DayAheadPlan:
73
+ """Complete day-ahead tilt trajectory plan."""
74
+
75
+ target_date: str # ISO date string
76
+ slots: List[SlotPlan] # one per daylight 15-min slot
77
+ total_energy_cost_kwh: float # sum of all slot costs
78
+ daily_budget_kwh: float # available daily budget
79
+ budget_utilisation_pct: float # total_cost / budget × 100
80
+ stage_id: str # phenological stage used
81
+ n_intervention_slots: int # slots where offset > 0
82
+
83
+ def to_dict(self) -> dict:
84
+ return {
85
+ "target_date": self.target_date,
86
+ "stage_id": self.stage_id,
87
+ "daily_budget_kwh": round(self.daily_budget_kwh, 4),
88
+ "total_energy_cost_kwh": round(self.total_energy_cost_kwh, 4),
89
+ "budget_utilisation_pct": round(self.budget_utilisation_pct, 1),
90
+ "n_intervention_slots": self.n_intervention_slots,
91
+ "slots": [
92
+ {
93
+ "time": s.time,
94
+ "offset_deg": s.offset_deg,
95
+ "energy_cost_kwh": round(s.energy_cost_kwh, 6),
96
+ "gate_passed": s.gate_passed,
97
+ "tags": s.tags,
98
+ }
99
+ for s in self.slots
100
+ ],
101
+ }
102
+
103
+
104
+ # ---------------------------------------------------------------------------
105
+ # DayAheadPlanner
106
+ # ---------------------------------------------------------------------------
107
+
108
+ class DayAheadPlanner:
109
+ """DP-based day-ahead trajectory optimizer.
110
+
111
+ Parameters
112
+ ----------
113
+ shadow_model : object, optional
114
+ ShadowModel instance for solar position and tracker geometry.
115
+ baseline_predictor : BaselinePredictor, optional
116
+ Hybrid FvCB+ML predictor for per-slot photosynthesis baseline.
117
+ If provided, ``plan_day()`` uses predicted A for crop value instead
118
+ of the temperature-only heuristic.
119
+ energy_price : float
120
+ Energy price (ILS/kWh) for the utility function.
121
+ crop_value : float
122
+ Base crop value (ILS per µmol CO₂ m⁻² s⁻¹ per slot).
123
+ movement_cost : float
124
+ Penalty per degree of tilt change between consecutive slots (ILS-equivalent).
125
+ """
126
+
127
+ def __init__(
128
+ self,
129
+ shadow_model=None,
130
+ baseline_predictor=None,
131
+ energy_price: float = DP_FLAT_ENERGY_PRICE_ILS_KWH,
132
+ crop_value: float = DP_BASE_CROP_VALUE,
133
+ movement_cost: float = DP_MOVEMENT_COST,
134
+ ):
135
+ self._shadow_model = shadow_model
136
+ self._baseline_predictor = baseline_predictor
137
+ self.energy_price = energy_price
138
+ self.crop_value = crop_value
139
+ self.movement_cost = movement_cost
140
+
141
+ @property
142
+ def shadow_model(self):
143
+ if self._shadow_model is None:
144
+ from src.shading.solar_geometry import ShadowModel
145
+ self._shadow_model = ShadowModel()
146
+ return self._shadow_model
147
+
148
+ # ------------------------------------------------------------------
149
+ # Main entry point
150
+ # ------------------------------------------------------------------
151
+
152
+ def plan_day(
153
+ self,
154
+ target_date: date,
155
+ forecast_temps: List[float],
156
+ forecast_ghi: List[float],
157
+ daily_budget_kwh: float,
158
+ stage_id: Optional[str] = None,
159
+ ) -> DayAheadPlan:
160
+ """Generate an optimal tilt trajectory for the given day.
161
+
162
+ Parameters
163
+ ----------
164
+ target_date : date
165
+ The day to plan for.
166
+ forecast_temps : list of float
167
+ Forecast air temperature (°C) for each 15-min slot (96 values).
168
+ Only daylight slots are used; nighttime values are ignored.
169
+ forecast_ghi : list of float
170
+ Forecast GHI (W/m²) for each 15-min slot (96 values).
171
+ daily_budget_kwh : float
172
+ Available energy sacrifice budget for the day (kWh).
173
+ stage_id : str, optional
174
+ Phenological stage identifier. If None, estimated from date.
175
+
176
+ Returns
177
+ -------
178
+ DayAheadPlan
179
+ """
180
+ if stage_id is None:
181
+ from src.models.phenology import estimate_stage_for_date
182
+ stage_id = estimate_stage_for_date(target_date).id
183
+
184
+ # Crop value multiplier for this phenological stage
185
+ crop_multiplier = self._get_crop_multiplier(stage_id)
186
+
187
+ # Compute baseline A predictions if predictor is available
188
+ baseline_a: Optional[List[float]] = None
189
+ if self._baseline_predictor is not None:
190
+ try:
191
+ baseline_a = self._baseline_predictor.predict_day(
192
+ forecast_temps, forecast_ghi,
193
+ )
194
+ except Exception as exc:
195
+ import logging
196
+ logging.getLogger(__name__).warning(
197
+ "Baseline predictor failed, using temperature heuristic: %s", exc,
198
+ )
199
+
200
+ # Build slot timeline (sunrise to sunset only)
201
+ slots_info = self._build_slot_info(
202
+ target_date, forecast_temps, forecast_ghi, crop_multiplier,
203
+ baseline_a=baseline_a,
204
+ )
205
+
206
+ if not slots_info:
207
+ return DayAheadPlan(
208
+ target_date=str(target_date),
209
+ slots=[],
210
+ total_energy_cost_kwh=0.0,
211
+ daily_budget_kwh=daily_budget_kwh,
212
+ budget_utilisation_pct=0.0,
213
+ stage_id=stage_id,
214
+ n_intervention_slots=0,
215
+ )
216
+
217
+ # Run DP optimization
218
+ offsets = [0] + [o for o in CANDIDATE_OFFSETS if o > 0]
219
+ planned_slots = self._dp_optimize(
220
+ slots_info, offsets, daily_budget_kwh,
221
+ )
222
+
223
+ total_cost = sum(s.energy_cost_kwh for s in planned_slots)
224
+ n_interventions = sum(1 for s in planned_slots if s.offset_deg > 0)
225
+ utilisation = (total_cost / daily_budget_kwh * 100) if daily_budget_kwh > 0 else 0.0
226
+
227
+ return DayAheadPlan(
228
+ target_date=str(target_date),
229
+ slots=planned_slots,
230
+ total_energy_cost_kwh=total_cost,
231
+ daily_budget_kwh=daily_budget_kwh,
232
+ budget_utilisation_pct=utilisation,
233
+ stage_id=stage_id,
234
+ n_intervention_slots=n_interventions,
235
+ )
236
+
237
+ # ------------------------------------------------------------------
238
+ # Slot info builder
239
+ # ------------------------------------------------------------------
240
+
241
+ def _build_slot_info(
242
+ self,
243
+ target_date: date,
244
+ forecast_temps: List[float],
245
+ forecast_ghi: List[float],
246
+ crop_multiplier: float,
247
+ baseline_a: Optional[List[float]] = None,
248
+ ) -> List[dict]:
249
+ """Build per-slot metadata for daylight hours.
250
+
251
+ Returns list of dicts with keys: time_str, hour, temp_c, ghi,
252
+ solar_elevation, solar_azimuth, astro_tilt, gate_passed,
253
+ gate_reason, energy_per_slot_kwh, crop_value_weight.
254
+ """
255
+ day_start = pd.Timestamp(target_date, tz="UTC")
256
+ times = pd.date_range(day_start, periods=96, freq="15min")
257
+
258
+ # Solar positions for the whole day
259
+ solar_pos = self.shadow_model.get_solar_position(times)
260
+
261
+ slots = []
262
+ for i, ts in enumerate(times):
263
+ hour = ts.hour + ts.minute / 60.0
264
+ elev = float(solar_pos.iloc[i]["solar_elevation"])
265
+
266
+ # Skip nighttime
267
+ if elev <= 2:
268
+ continue
269
+
270
+ temp_c = forecast_temps[i] if i < len(forecast_temps) else 25.0
271
+ ghi = forecast_ghi[i] if i < len(forecast_ghi) else 0.0
272
+
273
+ # Skip slots with no meaningful irradiance
274
+ if ghi < 50:
275
+ continue
276
+
277
+ azim = float(solar_pos.iloc[i]["solar_azimuth"])
278
+ tracker = self.shadow_model.compute_tracker_tilt(azim, elev)
279
+ astro_tilt = float(tracker["tracker_theta"])
280
+
281
+ # Gate check (simplified — uses forecast data as proxy)
282
+ gate_passed, gate_reason = self._check_gate(
283
+ temp_c, ghi, hour,
284
+ )
285
+
286
+ # Energy at astronomical tracking (kWh per kWp for this slot)
287
+ aoi = float(tracker["aoi"])
288
+ energy_astro = max(0.0, math.cos(math.radians(aoi))) * 0.25
289
+
290
+ slot_dict = {
291
+ "time_str": ts.strftime("%H:%M"),
292
+ "hour": hour,
293
+ "temp_c": temp_c,
294
+ "ghi": ghi,
295
+ "solar_elevation": elev,
296
+ "solar_azimuth": azim,
297
+ "astro_tilt": astro_tilt,
298
+ "gate_passed": gate_passed,
299
+ "gate_reason": gate_reason,
300
+ "energy_astro_kwh": energy_astro,
301
+ "crop_multiplier": crop_multiplier,
302
+ }
303
+ # Attach baseline A if available (from BaselinePredictor)
304
+ if baseline_a is not None and i < len(baseline_a):
305
+ slot_dict["baseline_a"] = baseline_a[i]
306
+ slots.append(slot_dict)
307
+
308
+ return slots
309
+
310
+ def _check_gate(
311
+ self,
312
+ temp_c: float,
313
+ ghi: float,
314
+ hour: float,
315
+ ) -> tuple[bool, str]:
316
+ """Simplified gate check using forecast data.
317
+
318
+ Uses the same thresholds as InterventionGate but without sensor data.
319
+ CWSI is estimated from temperature (proxy).
320
+ """
321
+ # No shade before configured hour
322
+ if hour < NO_SHADE_BEFORE_HOUR:
323
+ return False, f"before_{NO_SHADE_BEFORE_HOUR}:00"
324
+
325
+ # Temperature below Rubisco transition
326
+ if temp_c < SHADE_ELIGIBLE_TLEAF_ABOVE:
327
+ return False, f"temp_{temp_c:.0f}C_below_threshold"
328
+
329
+ # GHI below meaningful radiation
330
+ if ghi < SHADE_ELIGIBLE_GHI_ABOVE:
331
+ return False, f"ghi_{ghi:.0f}_below_threshold"
332
+
333
+ # CWSI proxy from temperature (simplified: T>35 → stressed)
334
+ cwsi_proxy = max(0.0, min(1.0, (temp_c - 30.0) / 10.0))
335
+ if cwsi_proxy < SHADE_ELIGIBLE_CWSI_ABOVE:
336
+ return False, f"cwsi_proxy_{cwsi_proxy:.2f}_below_threshold"
337
+
338
+ # FvCB shading_helps: above transition temp + high GHI = Rubisco-limited
339
+ shading_helps = temp_c >= SEMILLON_TRANSITION_TEMP_C and ghi >= 400
340
+ if not shading_helps:
341
+ return False, "fvcb_shading_not_helpful"
342
+
343
+ return True, "gate_passed"
344
+
345
+ # ------------------------------------------------------------------
346
+ # DP optimizer
347
+ # ------------------------------------------------------------------
348
+
349
+ def _dp_optimize(
350
+ self,
351
+ slots_info: List[dict],
352
+ offsets: List[float],
353
+ daily_budget_kwh: float,
354
+ ) -> List[SlotPlan]:
355
+ """Dynamic programming over slots × offsets with budget constraint.
356
+
357
+ State: (slot_index, offset_index)
358
+ Constraint: cumulative energy cost ≤ daily_budget_kwh
359
+ Objective: maximise total utility (energy revenue + crop protection − movement cost)
360
+ """
361
+ n_slots = len(slots_info)
362
+ n_offsets = len(offsets)
363
+
364
+ # Discretise budget into steps for tractable DP
365
+ budget_steps = 100
366
+ budget_per_step = daily_budget_kwh / budget_steps if daily_budget_kwh > 0 else 0.001
367
+
368
+ # DP table: V[t][o][b] = best utility from slot t onwards
369
+ # with offset o at slot t and b budget steps remaining
370
+ # Use forward pass to fill, then backtrack.
371
+ INF = float("-inf")
372
+
373
+ # Pre-compute per-slot utilities for each offset
374
+ slot_utilities = [] # [slot][offset] → (utility, energy_cost)
375
+ for si in slots_info:
376
+ utils_for_slot = []
377
+ for offset in offsets:
378
+ u, cost = self._slot_utility(si, offset)
379
+ utils_for_slot.append((u, cost))
380
+ slot_utilities.append(utils_for_slot)
381
+
382
+ # Forward DP
383
+ # V[t][o][b] = max total utility achievable from slots 0..t
384
+ # ending at offset o with b budget steps consumed
385
+ V = np.full((n_slots, n_offsets, budget_steps + 1), INF)
386
+ choice = np.full((n_slots, n_offsets, budget_steps + 1), -1, dtype=int)
387
+
388
+ # Initialize slot 0
389
+ for oi, offset in enumerate(offsets):
390
+ if not slots_info[0]["gate_passed"] and offset > 0:
391
+ continue # gate blocked
392
+ u, cost = slot_utilities[0][oi]
393
+ b_used = int(math.ceil(cost / budget_per_step)) if cost > 0 else 0
394
+ if b_used <= budget_steps:
395
+ V[0, oi, b_used] = u
396
+
397
+ # Fill forward
398
+ for t in range(1, n_slots):
399
+ gate_passed = slots_info[t]["gate_passed"]
400
+ for oi, offset in enumerate(offsets):
401
+ if not gate_passed and offset > 0:
402
+ continue # gate blocked — only offset=0 allowed
403
+
404
+ u_t, cost_t = slot_utilities[t][oi]
405
+ b_cost = int(math.ceil(cost_t / budget_per_step)) if cost_t > 0 else 0
406
+
407
+ for prev_oi, prev_offset in enumerate(offsets):
408
+ # Movement cost between consecutive offsets
409
+ move_penalty = self.movement_cost * abs(offset - prev_offset)
410
+
411
+ for b_prev in range(budget_steps + 1):
412
+ if V[t - 1, prev_oi, b_prev] == INF:
413
+ continue
414
+ b_total = b_prev + b_cost
415
+ if b_total > budget_steps:
416
+ continue # budget exceeded
417
+
418
+ val = V[t - 1, prev_oi, b_prev] + u_t - move_penalty
419
+ if val > V[t, oi, b_total]:
420
+ V[t, oi, b_total] = val
421
+ choice[t, oi, b_total] = prev_oi
422
+
423
+ # Backtrack: find best final state
424
+ best_val = INF
425
+ best_oi = 0
426
+ best_b = 0
427
+ for oi in range(n_offsets):
428
+ for b in range(budget_steps + 1):
429
+ if V[n_slots - 1, oi, b] > best_val:
430
+ best_val = V[n_slots - 1, oi, b]
431
+ best_oi = oi
432
+ best_b = b
433
+
434
+ # Trace back the path
435
+ path = [0] * n_slots
436
+ path[n_slots - 1] = best_oi
437
+ current_b = best_b
438
+ for t in range(n_slots - 1, 0, -1):
439
+ prev_oi = choice[t, path[t], current_b]
440
+ if prev_oi < 0:
441
+ prev_oi = 0 # fallback to astronomical
442
+ # Recover budget used at slot t
443
+ _, cost_t = slot_utilities[t][path[t]]
444
+ b_cost = int(math.ceil(cost_t / budget_per_step)) if cost_t > 0 else 0
445
+ current_b = max(0, current_b - b_cost)
446
+ path[t - 1] = prev_oi
447
+
448
+ # Build SlotPlan list
449
+ planned: List[SlotPlan] = []
450
+ for t, si in enumerate(slots_info):
451
+ oi = path[t]
452
+ offset = offsets[oi]
453
+ _, cost = slot_utilities[t][oi]
454
+
455
+ tags = []
456
+ if not si["gate_passed"]:
457
+ tags.append(f"gate_blocked:{si['gate_reason']}")
458
+ elif offset > 0:
459
+ tags.append(f"intervention:{offset}deg")
460
+ else:
461
+ tags.append("full_tracking")
462
+
463
+ planned.append(SlotPlan(
464
+ time=si["time_str"],
465
+ offset_deg=offset,
466
+ energy_cost_kwh=round(cost, 6),
467
+ gate_passed=si["gate_passed"],
468
+ tags=tags,
469
+ ))
470
+
471
+ return planned
472
+
473
+ def _slot_utility(self, si: dict, offset_deg: float) -> tuple[float, float]:
474
+ """Compute utility and energy cost for a slot at a given offset.
475
+
476
+ Utility = energy_revenue + crop_protection_value
477
+ Energy cost = energy_astro − energy_at_offset (kWh)
478
+
479
+ Returns (utility, energy_cost_kwh).
480
+ """
481
+ energy_astro = si["energy_astro_kwh"]
482
+
483
+ # Energy at offset: cos(AOI + offset) approximation
484
+ sacrifice_frac = 1.0 - math.cos(math.radians(offset_deg))
485
+ energy_at_offset = energy_astro * (1.0 - sacrifice_frac)
486
+ energy_cost = energy_astro - energy_at_offset # kWh sacrificed
487
+
488
+ # Energy revenue (ILS)
489
+ energy_revenue = energy_at_offset * self.energy_price
490
+
491
+ # Crop protection value: non-zero only when gate passes and offset > 0
492
+ crop_value = 0.0
493
+ if si["gate_passed"] and offset_deg > 0:
494
+ # Higher offset → more shade → more crop protection (diminishing returns)
495
+ shade_benefit = math.sqrt(offset_deg / 20.0) # diminishing returns
496
+
497
+ if "baseline_a" in si and si["baseline_a"] > 0:
498
+ # Use actual photosynthesis prediction for stress severity.
499
+ # Higher A under full sun means more to protect; the benefit of
500
+ # shading scales with how much photosynthesis is at risk.
501
+ baseline_a = si["baseline_a"]
502
+ # Normalize: A ~ 10-20 µmol typical → severity 1.0-2.0
503
+ stress_severity = baseline_a / 10.0
504
+ else:
505
+ # Fallback: temperature heuristic
506
+ stress_severity = max(0.0, si["temp_c"] - SEMILLON_TRANSITION_TEMP_C) / 10.0
507
+
508
+ crop_value = (
509
+ self.crop_value
510
+ * si["crop_multiplier"]
511
+ * stress_severity
512
+ * shade_benefit
513
+ )
514
+
515
+ utility = energy_revenue + crop_value
516
+ return utility, energy_cost
517
+
518
+ # ------------------------------------------------------------------
519
+ # Helpers
520
+ # ------------------------------------------------------------------
521
+
522
+ @staticmethod
523
+ def _get_crop_multiplier(stage_id: str) -> float:
524
+ """Map phenological stage ID to crop value multiplier."""
525
+ # Map stage IDs to the STAGE_CROP_MULTIPLIER keys
526
+ stage_map = {
527
+ "budburst_vegetative": "pre_flowering",
528
+ "flowering_fruit_set": "fruit_set",
529
+ "berry_growth": "fruit_set",
530
+ "veraison_ripening": "veraison",
531
+ "post_harvest_reserves": "post_harvest",
532
+ "winter_dormancy": "post_harvest",
533
+ }
534
+ mapped = stage_map.get(stage_id, "fruit_set")
535
+ return STAGE_CROP_MULTIPLIER.get(mapped, 1.0)
536
+
537
+
538
+ # ---------------------------------------------------------------------------
539
+ # CLI smoke test
540
+ # ---------------------------------------------------------------------------
541
+
542
+ if __name__ == "__main__":
543
+ from src.shading.solar_geometry import ShadowModel
544
+
545
+ shadow = ShadowModel()
546
+ planner = DayAheadPlanner(shadow_model=shadow)
547
+
548
+ # Simulate a hot July day in Sde Boker
549
+ test_date = date(2025, 7, 15)
550
+
551
+ # Generate synthetic forecast: sinusoidal temperature peaking at 38°C at 14:00 UTC
552
+ temps = []
553
+ ghis = []
554
+ for slot in range(96):
555
+ hour = slot * 0.25
556
+ # Temperature: 25°C at night, peaks at 38°C around 11:00 UTC (14:00 local)
557
+ t = 25.0 + 13.0 * max(0, math.sin(math.pi * (hour - 5) / 14)) if 5 <= hour <= 19 else 25.0
558
+ temps.append(t)
559
+ # GHI: 0 at night, peaks at 950 W/m² at solar noon (~9:40 UTC)
560
+ g = max(0, 950 * math.sin(math.pi * (hour - 4) / 12)) if 4 <= hour <= 16 else 0.0
561
+ ghis.append(g)
562
+
563
+ plan = planner.plan_day(
564
+ target_date=test_date,
565
+ forecast_temps=temps,
566
+ forecast_ghi=ghis,
567
+ daily_budget_kwh=2.0, # typical daily budget from EnergyBudgetPlanner
568
+ )
569
+
570
+ print(f"Day-Ahead Plan for {plan.target_date}")
571
+ print(f" Stage: {plan.stage_id}")
572
+ print(f" Budget: {plan.daily_budget_kwh:.2f} kWh")
573
+ print(f" Total cost: {plan.total_energy_cost_kwh:.4f} kWh ({plan.budget_utilisation_pct:.1f}%)")
574
+ print(f" Intervention slots: {plan.n_intervention_slots}/{len(plan.slots)}")
575
+ print()
576
+ print(f" {'Time':>5} {'Offset':>7} {'Cost':>10} {'Gate':>6} Tags")
577
+ print(f" {'-' * 60}")
578
+ for s in plan.slots:
579
+ status = "PASS" if s.gate_passed else "BLOCK"
580
+ print(f" {s.time:>5} {s.offset_deg:>5.0f}° {s.energy_cost_kwh:>10.6f} {status:>6} {', '.join(s.tags)}")
src/energy_budget.py ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ EnergyBudgetPlanner: hierarchical energy sacrifice budget for agrivoltaic control.
3
+
4
+ Budget hierarchy:
5
+ Annual → Monthly → Weekly → Daily → 15-min Slot
6
+
7
+ The system defaults to full astronomical tracking (max energy). Shading
8
+ interventions draw from a tight budget (default 5% of annual generation).
9
+ Budget is pre-allocated down the hierarchy so that hot days/hours get more,
10
+ and the system never overspends.
11
+
12
+ References:
13
+ - config/settings.py for all thresholds and weights
14
+ - context/2_plan.md §3.1 for design rationale
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from datetime import date, timedelta
20
+ from typing import Optional
21
+
22
+ import numpy as np
23
+ import pandas as pd
24
+
25
+ from config.settings import (
26
+ ANNUAL_RESERVE_PCT,
27
+ DAILY_MARGIN_PCT,
28
+ MAX_ENERGY_REDUCTION_PCT,
29
+ MONTHLY_BUDGET_WEIGHTS,
30
+ NO_SHADE_BEFORE_HOUR,
31
+ WEEKLY_RESERVE_PCT,
32
+ )
33
+
34
+
35
+ class EnergyBudgetPlanner:
36
+ """Hierarchical energy sacrifice budget for agrivoltaic shading control.
37
+
38
+ Parameters
39
+ ----------
40
+ max_energy_reduction_pct : float
41
+ Maximum fraction of annual PV generation the vines can "spend" on
42
+ shading (default from config: 5%).
43
+ shadow_model : object, optional
44
+ ShadowModel instance used to estimate slot-level energy potential.
45
+ If None, annual plan uses a simplified analytical estimate.
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ max_energy_reduction_pct: float = MAX_ENERGY_REDUCTION_PCT,
51
+ shadow_model=None,
52
+ ):
53
+ self.max_pct = max_energy_reduction_pct
54
+ self.shadow = shadow_model
55
+
56
+ # ------------------------------------------------------------------
57
+ # Annual plan
58
+ # ------------------------------------------------------------------
59
+
60
+ def compute_annual_plan(self, year: int) -> dict:
61
+ """Compute seasonal energy potential and allocate monthly budgets.
62
+
63
+ Iterates every 15-min slot from May 1 to Sep 30, computing energy
64
+ under astronomical tracking. Then distributes the sacrifice budget
65
+ across months using MONTHLY_BUDGET_WEIGHTS.
66
+
67
+ Returns dict with:
68
+ year, total_potential_kWh, total_budget_kWh, annual_reserve_kWh,
69
+ monthly_budgets (dict[int, float]), budget_spent_kWh
70
+ """
71
+ season_start = pd.Timestamp(f"{year}-05-01", tz="UTC")
72
+ season_end = pd.Timestamp(f"{year}-09-30 23:45", tz="UTC")
73
+ times = pd.date_range(season_start, season_end, freq="15min")
74
+
75
+ if self.shadow is not None:
76
+ energy_per_slot = self._energy_from_shadow_model(times)
77
+ else:
78
+ energy_per_slot = self._energy_analytical(times)
79
+
80
+ total_potential = float(np.sum(energy_per_slot))
81
+ total_budget = total_potential * self.max_pct / 100.0
82
+ annual_reserve = total_budget * ANNUAL_RESERVE_PCT / 100.0
83
+ distributable = total_budget - annual_reserve
84
+
85
+ monthly_budgets = {
86
+ month: distributable * weight
87
+ for month, weight in MONTHLY_BUDGET_WEIGHTS.items()
88
+ }
89
+
90
+ return {
91
+ "year": year,
92
+ "total_potential_kWh": round(total_potential, 2),
93
+ "total_budget_kWh": round(total_budget, 2),
94
+ "annual_reserve_kWh": round(annual_reserve, 2),
95
+ "monthly_budgets": {m: round(v, 4) for m, v in monthly_budgets.items()},
96
+ "budget_spent_kWh": 0.0,
97
+ }
98
+
99
+ def _energy_from_shadow_model(self, times: pd.DatetimeIndex) -> np.ndarray:
100
+ """Estimate per-slot energy using the ShadowModel's solar position."""
101
+ solar_pos = self.shadow.get_solar_position(times)
102
+ energy = []
103
+ for _, sp in solar_pos.iterrows():
104
+ if sp["solar_elevation"] <= 0:
105
+ energy.append(0.0)
106
+ continue
107
+ tracker = self.shadow.compute_tracker_tilt(
108
+ sp["solar_azimuth"], sp["solar_elevation"]
109
+ )
110
+ # cos(AOI) × 0.25h slot duration → kWh per kWp
111
+ e = max(0.0, np.cos(np.radians(tracker["aoi"]))) * 0.25
112
+ energy.append(e)
113
+ return np.array(energy)
114
+
115
+ @staticmethod
116
+ def _energy_analytical(times: pd.DatetimeIndex) -> np.ndarray:
117
+ """Simplified analytical estimate when no ShadowModel is available.
118
+
119
+ Vectorized: computes all ~15k slots in one numpy pass.
120
+ Uses a sinusoidal day profile peaking at solar noon. Good enough
121
+ for budget planning; not used for real-time control.
122
+ """
123
+ from config.settings import SITE_LATITUDE
124
+
125
+ hour_utc = times.hour + times.minute / 60.0
126
+ solar_noon_utc = 12.0 - 34.8 / 15.0 # ≈ 9.68 UTC
127
+ hour_angle = (hour_utc - solar_noon_utc) * 15.0 # degrees
128
+
129
+ lat_rad = np.radians(SITE_LATITUDE)
130
+ doy = times.dayofyear
131
+ decl_rad = np.radians(23.45 * np.sin(np.radians(360.0 / 365.0 * (doy - 81))))
132
+ ha_rad = np.radians(hour_angle)
133
+
134
+ sin_elev = (
135
+ np.sin(lat_rad) * np.sin(decl_rad)
136
+ + np.cos(lat_rad) * np.cos(decl_rad) * np.cos(ha_rad)
137
+ )
138
+ # Astronomical tracking → AOI ≈ 0 → cos(AOI) ≈ 1
139
+ # Scale by clearness (~0.75 for Sde Boker) and slot duration (0.25h)
140
+ return np.where(sin_elev > 0, sin_elev * 0.75 * 0.25, 0.0)
141
+
142
+ # ------------------------------------------------------------------
143
+ # Weekly plan
144
+ # ------------------------------------------------------------------
145
+
146
+ def compute_weekly_plan(
147
+ self,
148
+ week_start: pd.Timestamp | date,
149
+ monthly_remaining: float,
150
+ forecast_tmax: Optional[list[float]] = None,
151
+ rollover: float = 0.0,
152
+ ) -> dict:
153
+ """Distribute weekly budget to days, weighted by (Tmax - 30)².
154
+
155
+ Days with forecast Tmax < 30°C get zero allocation (no stress
156
+ expected). Hot days get quadratically more budget.
157
+
158
+ Parameters
159
+ ----------
160
+ week_start : date-like
161
+ First day of the week.
162
+ monthly_remaining : float
163
+ Remaining monthly budget (kWh).
164
+ forecast_tmax : list of 7 floats, optional
165
+ Forecast daily maximum temperature for each day of the week.
166
+ If None, budget is split evenly.
167
+ rollover : float
168
+ Unspent budget rolled over from the previous week.
169
+
170
+ Returns dict with:
171
+ weekly_total_kWh, weekly_reserve_kWh, daily_budgets_kWh (list[7])
172
+ """
173
+ if not isinstance(week_start, pd.Timestamp):
174
+ week_start = pd.Timestamp(week_start)
175
+
176
+ month = week_start.month
177
+ # Estimate weeks remaining in the month
178
+ if month == 12:
179
+ month_end = pd.Timestamp(f"{week_start.year}-12-31")
180
+ elif month == 9:
181
+ month_end = pd.Timestamp(f"{week_start.year}-09-30")
182
+ else:
183
+ month_end = pd.Timestamp(
184
+ f"{week_start.year}-{month + 1:02d}-01"
185
+ ) - timedelta(days=1)
186
+ days_left = max(1, (month_end - week_start).days)
187
+ weeks_left = max(1, days_left // 7)
188
+
189
+ weekly_raw = monthly_remaining / weeks_left + rollover
190
+ weekly_reserve = weekly_raw * WEEKLY_RESERVE_PCT / 100.0
191
+ distributable = weekly_raw - weekly_reserve
192
+
193
+ if forecast_tmax is not None and len(forecast_tmax) == 7:
194
+ weights = [max(0.0, t - 30.0) ** 2 for t in forecast_tmax]
195
+ total_w = sum(weights)
196
+ if total_w > 0:
197
+ daily = [distributable * w / total_w for w in weights]
198
+ else:
199
+ daily = [0.0] * 7 # all days < 30°C → no budget needed
200
+ else:
201
+ daily = [distributable / 7.0] * 7
202
+
203
+ return {
204
+ "weekly_total_kWh": round(weekly_raw, 4),
205
+ "weekly_reserve_kWh": round(weekly_reserve, 4),
206
+ "daily_budgets_kWh": [round(d, 4) for d in daily],
207
+ }
208
+
209
+ # ------------------------------------------------------------------
210
+ # Daily plan
211
+ # ------------------------------------------------------------------
212
+
213
+ def compute_daily_plan(
214
+ self,
215
+ day: date | pd.Timestamp,
216
+ daily_budget: float,
217
+ rollover: float = 0.0,
218
+ ) -> dict:
219
+ """Distribute daily budget to 15-min slots.
220
+
221
+ Zero before NO_SHADE_BEFORE_HOUR (10:00). Peak allocation at
222
+ 11:00–14:00 (60% of planned budget).
223
+
224
+ Returns dict with:
225
+ date, daily_total_kWh, daily_margin_kWh, daily_margin_remaining_kWh,
226
+ slot_budgets (dict[str, float]), cumulative_spent
227
+ """
228
+ daily_raw = daily_budget + rollover
229
+ daily_margin = daily_raw * DAILY_MARGIN_PCT / 100.0
230
+ planned = daily_raw - daily_margin
231
+
232
+ # Time blocks with their share of the planned budget.
233
+ # The non-zero weights must sum to 1.0.
234
+ transition_end = max(NO_SHADE_BEFORE_HOUR + 1, 11)
235
+ blocks = [
236
+ ((5, NO_SHADE_BEFORE_HOUR), 0.00), # morning — no shade
237
+ ((NO_SHADE_BEFORE_HOUR, transition_end), 0.05), # transition
238
+ ((transition_end, 14), 0.60), # peak stress window
239
+ ((14, 16), 0.30), # sustained heat
240
+ ((16, 20), 0.05), # rare late stress
241
+ ]
242
+
243
+ slot_budgets: dict[str, float] = {}
244
+ for (h_start, h_end), weight in blocks:
245
+ block_budget = planned * weight
246
+ n_slots = (h_end - h_start) * 4 # 4 slots per hour
247
+ per_slot = block_budget / n_slots if n_slots > 0 else 0.0
248
+ for h in range(h_start, h_end):
249
+ for m in (0, 15, 30, 45):
250
+ slot_budgets[f"{h:02d}:{m:02d}"] = round(per_slot, 6)
251
+
252
+ return {
253
+ "date": str(day),
254
+ "daily_total_kWh": round(daily_raw, 4),
255
+ "daily_margin_kWh": round(daily_margin, 4),
256
+ "daily_margin_remaining_kWh": round(daily_margin, 4),
257
+ "slot_budgets": slot_budgets,
258
+ "cumulative_spent": 0.0,
259
+ }
260
+
261
+ # ------------------------------------------------------------------
262
+ # Slot-level execution helpers
263
+ # ------------------------------------------------------------------
264
+
265
+ def spend_slot(self, daily_plan: dict, slot_key: str, amount: float) -> float:
266
+ """Deduct energy from a slot's budget. Returns amount actually spent.
267
+
268
+ If the slot budget is insufficient, draws from the daily margin.
269
+ """
270
+ available = daily_plan["slot_budgets"].get(slot_key, 0.0)
271
+ if amount <= available:
272
+ daily_plan["slot_budgets"][slot_key] -= amount
273
+ daily_plan["cumulative_spent"] += amount
274
+ return amount
275
+
276
+ # Slot budget exhausted — try daily margin
277
+ shortfall = amount - available
278
+ margin = daily_plan["daily_margin_remaining_kWh"]
279
+ from_margin = min(shortfall, margin)
280
+ total_spent = available + from_margin
281
+
282
+ daily_plan["slot_budgets"][slot_key] = 0.0
283
+ daily_plan["daily_margin_remaining_kWh"] -= from_margin
284
+ daily_plan["cumulative_spent"] += total_spent
285
+ return round(total_spent, 6)
286
+
287
+ def emergency_draw(self, annual_plan: dict, amount: float) -> float:
288
+ """Draw from annual reserve for extreme heat events.
289
+
290
+ Returns the amount actually drawn (may be less than requested if
291
+ the reserve is depleted).
292
+ """
293
+ available = annual_plan["annual_reserve_kWh"]
294
+ drawn = min(amount, available)
295
+ annual_plan["annual_reserve_kWh"] = round(available - drawn, 4)
296
+ annual_plan["budget_spent_kWh"] = round(
297
+ annual_plan["budget_spent_kWh"] + drawn, 4
298
+ )
299
+ return round(drawn, 4)
300
+
301
+ # ------------------------------------------------------------------
302
+ # Rollover helper
303
+ # ------------------------------------------------------------------
304
+
305
+ def compute_daily_rollover(self, daily_plan: dict) -> float:
306
+ """Compute unspent budget at end of day (available for next day)."""
307
+ unspent_slots = sum(daily_plan["slot_budgets"].values())
308
+ unspent_margin = daily_plan["daily_margin_remaining_kWh"]
309
+ return round(unspent_slots + unspent_margin, 4)