File size: 2,543 Bytes
76f3ae6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | """Per-query energy footprint estimate.
Conservative, defensible numbers — no overclaim. We measure local
inference time and apply a published-range package-power figure for
Apple-Silicon LLM inference; we compare to the most recent published
estimate of frontier-cloud per-query energy (Epoch AI, 2025).
This is not a benchmark — it's a transparent rule-of-thumb that the
user can audit. The system prompt and the UI both surface the
underlying numbers and the citation.
"""
from __future__ import annotations
# Local: Granite 4.1:3b on Apple M-series (M3/M4 Pro range)
# Sustained package power during ~5 s of LLM inference, q4_K_M quant.
# Source: ml.energy + community measurements; conservative midpoint.
LOCAL_PACKAGE_POWER_W = 20.0
# Frontier cloud per-query inference energy.
# Source: Epoch AI, "How much energy does ChatGPT use?" (2025).
# https://epoch.ai/gradient-updates/how-much-energy-does-chatgpt-use
# This is a typical-query estimate for GPT-4o-class inference; long-context
# queries scale roughly linearly with token count.
CLOUD_PER_QUERY_WH = 0.30
# Citation strings used in the UI.
LOCAL_SOURCE = ("ml.energy / community measurements; ~20 W package power "
"during Granite 4.1:3b q4_K_M inference on Apple M-series.")
CLOUD_SOURCE = ('Epoch AI (2025), "How much energy does ChatGPT use?", '
"estimating ~0.3 Wh per typical GPT-4o query.")
def estimate(reconcile_seconds: float, total_seconds: float | None = None) -> dict:
"""Return a per-query energy estimate.
Args:
reconcile_seconds: wallclock of the Granite reconcile step (the
only step that meaningfully draws CPU/GPU power).
total_seconds: optional full-FSM wallclock for context.
"""
local_wh = LOCAL_PACKAGE_POWER_W * reconcile_seconds / 3600.0
return {
"local_wh": round(local_wh, 4),
"local_mwh": round(local_wh * 1000, 1),
"cloud_wh": CLOUD_PER_QUERY_WH,
"cloud_mwh": round(CLOUD_PER_QUERY_WH * 1000, 1),
"ratio_cloud_over_local": round(CLOUD_PER_QUERY_WH / local_wh, 1) if local_wh > 0 else None,
"method": {
"local": f"{LOCAL_PACKAGE_POWER_W} W × {reconcile_seconds:.2f} s ÷ 3600",
"local_source": LOCAL_SOURCE,
"cloud": f"{CLOUD_PER_QUERY_WH} Wh per query (published estimate)",
"cloud_source": CLOUD_SOURCE,
},
"reconcile_seconds": round(reconcile_seconds, 2),
"total_seconds": round(total_seconds, 2) if total_seconds is not None else None,
}
|