lexenvs-harbor / src /lexenvs /config /constants.py
endishai's picture
Upload folder using huggingface_hub
2312199 verified
"""Shared constants and utilities for evaluation services."""
from __future__ import annotations
from difflib import SequenceMatcher
# Required fields in the agent's structured JSON output
REQUIRED_JSON_FIELDS = {
"recommended_cards",
"total_ev_usd",
"ev_breakdown",
"constraints_respected",
}
# Multiplier applied to reward when hard constraint is violated.
# Preserves some gradient signal instead of zeroing out entirely.
HARD_CONSTRAINT_PENALTY = 0.1
def fuzzy_match_cards(expected: set[str], agent: set[str]) -> float:
"""Count matching cards with fuzzy matching.
Exact matches count as 1.0, fuzzy matches (>= 0.8 similarity) as 0.5.
Returns a float "true positive" count that can exceed integer values
due to partial credit.
"""
tp = 0.0
remaining_agent = set(agent)
for exp_card in expected:
if exp_card in remaining_agent:
tp += 1.0
remaining_agent.discard(exp_card)
else:
best_score = 0.0
best_match = None
for ag_card in remaining_agent:
sim = SequenceMatcher(None, exp_card, ag_card).ratio()
if sim > best_score:
best_score = sim
best_match = ag_card
if best_score >= 0.8 and best_match is not None:
tp += 0.5
remaining_agent.discard(best_match)
return tp
def compute_f1_score(expected_set: set[str], agent_set: set[str]) -> float:
"""Compute F1 score for card matching using fuzzy matching.
Returns 1.0 when both sets are empty, 0.0 if only one is empty.
"""
if not expected_set and not agent_set:
return 1.0
if not expected_set or not agent_set:
return 0.0
tp = fuzzy_match_cards(expected_set, agent_set)
precision = tp / len(agent_set)
recall = tp / len(expected_set)
if precision + recall == 0:
return 0.0
return 2 * precision * recall / (precision + recall)
def compute_composite_reward(
scored_dimensions: list[tuple[float, float]],
structure_bonus: float,
hard_constraint_violated: bool,
) -> float:
"""Compute overall reward from scored dimension (score, weight) pairs.
Strategy:
- If hard constraint violated -> reward * HARD_CONSTRAINT_PENALTY (not zero)
- If no dimensions scored -> use structure bonus as fallback
- Otherwise -> weighted average of scored dimensions + structure bonus
"""
if not scored_dimensions:
reward = structure_bonus
else:
total_weight = sum(w for _, w in scored_dimensions)
if total_weight == 0:
reward = 0.0
else:
weighted_sum = sum(s * w for s, w in scored_dimensions)
reward = weighted_sum / total_weight + structure_bonus
if hard_constraint_violated:
reward *= HARD_CONSTRAINT_PENALTY
return min(max(reward, 0.0), 1.0)
def compute_structure_bonus(
parsed_json: dict | None, # type: ignore[type-arg]
bonus_basic: float,
bonus_partial: float,
bonus_full: float,
) -> tuple[float, bool, bool]:
"""Compute tiered structure bonus for valid JSON.
Returns (structure_bonus, json_valid, json_has_required_fields).
"""
json_valid = parsed_json is not None
json_has_required = (
json_valid
and isinstance(parsed_json, dict)
and REQUIRED_JSON_FIELDS.issubset(parsed_json.keys())
)
structure_bonus = 0.0
if json_valid and isinstance(parsed_json, dict):
has_cards = "recommended_cards" in parsed_json
has_ev = "total_ev_usd" in parsed_json
if json_has_required:
structure_bonus = bonus_full
elif has_cards and has_ev:
structure_bonus = bonus_partial
else:
structure_bonus = bonus_basic
return structure_bonus, json_valid, json_has_required