Spaces:

lablab-ai-amd-developer-hackathon
/

riprap-nyc

Running

App Files Files Community

riprap-nyc / app /energy.py

seriffic

Reconciler skeleton + composite score + energy stub

76f3ae6 4 days ago

raw

history blame contribute delete

2.54 kB

	"""Per-query energy footprint estimate.

	Conservative, defensible numbers — no overclaim. We measure local
	inference time and apply a published-range package-power figure for
	Apple-Silicon LLM inference; we compare to the most recent published
	estimate of frontier-cloud per-query energy (Epoch AI, 2025).

	This is not a benchmark — it's a transparent rule-of-thumb that the
	user can audit. The system prompt and the UI both surface the
	underlying numbers and the citation.
	"""
	from __future__ import annotations

	# Local: Granite 4.1:3b on Apple M-series (M3/M4 Pro range)
	# Sustained package power during ~5 s of LLM inference, q4_K_M quant.
	# Source: ml.energy + community measurements; conservative midpoint.
	LOCAL_PACKAGE_POWER_W = 20.0

	# Frontier cloud per-query inference energy.
	# Source: Epoch AI, "How much energy does ChatGPT use?" (2025).
	# https://epoch.ai/gradient-updates/how-much-energy-does-chatgpt-use
	# This is a typical-query estimate for GPT-4o-class inference; long-context
	# queries scale roughly linearly with token count.
	CLOUD_PER_QUERY_WH = 0.30

	# Citation strings used in the UI.
	LOCAL_SOURCE = ("ml.energy / community measurements; ~20 W package power "
	"during Granite 4.1:3b q4_K_M inference on Apple M-series.")
	CLOUD_SOURCE = ('Epoch AI (2025), "How much energy does ChatGPT use?", '
	"estimating ~0.3 Wh per typical GPT-4o query.")


	def estimate(reconcile_seconds: float, total_seconds: float \| None = None) -> dict:
	"""Return a per-query energy estimate.

	Args:
	reconcile_seconds: wallclock of the Granite reconcile step (the
	only step that meaningfully draws CPU/GPU power).
	total_seconds: optional full-FSM wallclock for context.
	"""
	local_wh = LOCAL_PACKAGE_POWER_W * reconcile_seconds / 3600.0
	return {
	"local_wh": round(local_wh, 4),
	"local_mwh": round(local_wh * 1000, 1),
	"cloud_wh": CLOUD_PER_QUERY_WH,
	"cloud_mwh": round(CLOUD_PER_QUERY_WH * 1000, 1),
	"ratio_cloud_over_local": round(CLOUD_PER_QUERY_WH / local_wh, 1) if local_wh > 0 else None,
	"method": {
	"local": f"{LOCAL_PACKAGE_POWER_W} W × {reconcile_seconds:.2f} s ÷ 3600",
	"local_source": LOCAL_SOURCE,
	"cloud": f"{CLOUD_PER_QUERY_WH} Wh per query (published estimate)",
	"cloud_source": CLOUD_SOURCE,
	},
	"reconcile_seconds": round(reconcile_seconds, 2),
	"total_seconds": round(total_seconds, 2) if total_seconds is not None else None,
	}