Spaces:
Sleeping
Sleeping
| """ | |
| ui_env.py | |
| --------- | |
| Environment Engine for an Adaptive UI Layout Optimization system. | |
| """ | |
| from __future__ import annotations | |
| import random | |
| from typing import Literal, Optional | |
| from pydantic import BaseModel, Field, model_validator | |
| # --------------------------------------------------------------------------- | |
| # Task Class (Required by OpenEnv Validator) | |
| # --------------------------------------------------------------------------- | |
| EPS = 1e-6 | |
| def safe_grader(fn): | |
| def wrapper(x=None): | |
| try: | |
| if x is None: | |
| val = fn() | |
| else: | |
| try: | |
| val = fn(x) | |
| except TypeError: | |
| val = fn() | |
| if not isinstance(val, (int, float)): | |
| val = 0.5 | |
| val = float(val) | |
| val = max(min(val, 1.0 - EPS), EPS) | |
| return val | |
| except Exception: | |
| return 0.5 | |
| return wrapper | |
| def clamp_score(raw: float) -> float: | |
| # Retained as a stub just in case other direct internal refs exist, | |
| # though safe_grader is now the strict master gate | |
| if not isinstance(raw, (int, float)): | |
| return 0.5 | |
| val = float(raw) | |
| return max(min(val, 1.0 - EPS), EPS) | |
| def normalize(x: float, lo: float, hi: float) -> float: | |
| """Min-max normalize x from [lo, hi] to [0, 1], clamped.""" | |
| if hi <= lo: | |
| return 0.5 | |
| return max(0.0, min(1.0, (x - lo) / (hi - lo))) | |
| def grade_easy(x=None) -> float: | |
| try: | |
| return clamp_score(UIEnv.grade_easy(x)) | |
| except Exception: | |
| return 0.5 | |
| def grade_medium(x=None) -> float: | |
| try: | |
| return clamp_score(UIEnv.grade_medium(x)) | |
| except Exception: | |
| return 0.5 | |
| def grade_hard(x=None) -> float: | |
| try: | |
| return clamp_score(UIEnv.grade_hard(x)) | |
| except Exception: | |
| return 0.5 | |
| class Task: | |
| def __init__(self, name: str, grader): | |
| self.name = name | |
| self.grader = grader | |
| # --------------------------------------------------------------------------- | |
| # Constants | |
| # --------------------------------------------------------------------------- | |
| BUTTON_SIZE_MIN: float = 0.5 | |
| BUTTON_SIZE_MAX: float = 2.0 | |
| FORM_LENGTH_MIN: int = 1 | |
| FORM_LENGTH_MAX: int = 10 | |
| STEPS_MIN: int = 1 | |
| STEPS_MAX: int = 10 | |
| BUTTON_SIZE_DELTA: float = 0.1 | |
| FORM_LENGTH_DELTA: int = 1 | |
| STEPS_DELTA: int = 1 | |
| INVALID_ACTION_REWARD: float = -0.1 | |
| MAX_STEPS_PER_EPISODE: int = 20 | |
| BUTTON_SWEET_LOW: float = 0.9 | |
| BUTTON_SWEET_HIGH: float = 1.3 | |
| # --------------------------------------------------------------------------- | |
| # Data Models | |
| # --------------------------------------------------------------------------- | |
| class Layout(BaseModel): | |
| """Represents the current UI layout configuration.""" | |
| button_size: float = Field( | |
| default=1.0, | |
| ge=BUTTON_SIZE_MIN, | |
| le=BUTTON_SIZE_MAX, | |
| description="Size multiplier for UI buttons (0.5 - 2.0).", | |
| ) | |
| form_length: int = Field( | |
| default=5, | |
| ge=FORM_LENGTH_MIN, | |
| le=FORM_LENGTH_MAX, | |
| description="Number of fields in the form (1 - 10).", | |
| ) | |
| steps: int = Field( | |
| default=3, | |
| ge=STEPS_MIN, | |
| le=STEPS_MAX, | |
| description="Number of wizard / checkout steps (1 - 10).", | |
| ) | |
| class Observation(BaseModel): | |
| """Full observable state returned to the agent after every transition.""" | |
| device: Literal["mobile", "desktop"] = Field( | |
| description="Device type the user is on.", | |
| ) | |
| layout: Layout = Field( | |
| description="Current layout configuration.", | |
| ) | |
| progress: float = Field( | |
| ge=0.0, | |
| le=1.0, | |
| description="User's task-completion progress in [0, 1].", | |
| ) | |
| last_action: Optional[str] = Field( | |
| default=None, | |
| description="String name of the most recently applied action, or None.", | |
| ) | |
| reward: float = Field(default=0.0, description="Step reward") | |
| done: bool = Field(default=False, description="Is episode done") | |
| info: dict = Field(default_factory=dict, description="Extra info") | |
| class Action(BaseModel): | |
| """An action the agent can submit to the environment.""" | |
| type: Literal[ | |
| "increase_button", | |
| "decrease_form", | |
| "increase_steps", | |
| "decrease_steps", | |
| "reorder_sections", | |
| "set_button_size", | |
| "noop", | |
| ] = Field(description="Discrete action type.") | |
| value: Optional[float] = Field( | |
| default=None, | |
| description="Optional scalar payload (used by set_button_size).", | |
| ) | |
| def _value_required_for_set_button_size(self) -> "Action": | |
| """Ensure `value` is provided when action type requires it.""" | |
| if self.type == "set_button_size" and self.value is None: | |
| raise ValueError("'value' must be provided for action type 'set_button_size'.") | |
| return self | |
| # --------------------------------------------------------------------------- | |
| # Environment Engine | |
| # --------------------------------------------------------------------------- | |
| class UIEnv: | |
| """Adaptive UI Layout Optimization - Environment Engine.""" | |
| def __init__(self, seed: int = 42, task: str = "easy") -> None: | |
| self._seed: int = seed | |
| self.task: str = task | |
| self._rng: random.Random = random.Random(seed) | |
| # OpenEnv task list with graders β NO dynamic generation, NO conditionals | |
| self.tasks = [ | |
| Task(name="easy", grader=safe_grader(self.grade_easy)), | |
| Task(name="medium", grader=safe_grader(self.grade_medium)), | |
| Task(name="hard", grader=safe_grader(self.grade_hard)), | |
| ] | |
| self.task_dict = {t.name: t for t in self.tasks} | |
| self._layout: Layout = Layout() | |
| self._device: Literal["mobile", "desktop"] = "desktop" | |
| self._progress: float = 0.0 | |
| self._last_action: Optional[str] = None | |
| self._step_count: int = 0 | |
| self._prev_score: float = 0.0 | |
| self._prefers_short_forms: bool = False | |
| self._prefers_large_buttons: bool = False | |
| self._user_type: str = "new" | |
| self._ready: bool = False | |
| def reset(self) -> Observation: | |
| if self.task == "easy": | |
| steps = self._rng.randint(2, 3) | |
| form_length = self._rng.randint(2, 4) | |
| button_size = self._rng.uniform(0.9, 1.2) | |
| elif self.task == "medium": | |
| steps = self._rng.randint(3, 5) | |
| form_length = self._rng.randint(4, 6) | |
| button_size = self._rng.uniform(0.7, 1.5) | |
| elif self.task == "hard": | |
| steps = self._rng.randint(5, 8) | |
| form_length = self._rng.randint(6, 10) | |
| button_size = self._rng.uniform(0.5, 2.0) | |
| else: | |
| steps = self._rng.randint(3, 5) | |
| form_length = self._rng.randint(4, 6) | |
| button_size = 1.0 | |
| self._layout = Layout( | |
| button_size=button_size, | |
| form_length=form_length, | |
| steps=steps, | |
| ) | |
| self._clamp_layout() | |
| self._device = self._rng.choice(("mobile", "desktop")) | |
| self._progress = 0.0 | |
| self._last_action = None | |
| self._step_count = 0 | |
| self._prefers_short_forms = self._rng.choice([True, False]) | |
| self._prefers_large_buttons = self._rng.choice([True, False]) | |
| self._user_type = self._rng.choice(["impatient", "careful", "new"]) | |
| self._ready = True | |
| # Initialize prev_score for delta-based reward shaping | |
| task_obj = next((t for t in self.tasks if t.name == self.task), self.tasks[0]) | |
| self._prev_score = task_obj.grader(None) | |
| return self._get_observation() | |
| def step(self, action: Action) -> tuple[Observation, float, bool, dict]: | |
| if not self._ready: | |
| self.reset() | |
| action_reward_offset: float = self._apply_action(action) | |
| self._step_count += 1 | |
| outcome, user_reward = self._simulate_user() | |
| done = False | |
| if outcome == "drop": | |
| done = True | |
| elif outcome == "distrust": | |
| pass | |
| else: | |
| self._progress += 1.0 / max(1, self._layout.steps) | |
| if self._progress >= 0.999: | |
| self._progress = 1.0 | |
| outcome = "complete" | |
| done = True | |
| reward = user_reward + action_reward_offset | |
| if outcome == "complete": | |
| reward += 2.0 | |
| elif outcome == "continue": | |
| reward += 0.1 | |
| reward -= 0.05 | |
| # Delta-based grader-aligned shaping: reward reflects improvement | |
| task_obj = next((t for t in self.tasks if t.name == self.task), self.tasks[0]) | |
| current_score = task_obj.grader(None) | |
| score_delta = current_score - self._prev_score | |
| alpha = 10.0 | |
| reward += alpha * score_delta # reward improvement, penalize degradation | |
| self._prev_score = current_score | |
| if self._step_count >= MAX_STEPS_PER_EPISODE: | |
| done = True | |
| info: dict = { | |
| "completed": (outcome == "complete"), | |
| "outcome": outcome, | |
| "progress": self._progress, | |
| "step_count": self._step_count, | |
| "user_type": self._user_type, | |
| } | |
| if done: | |
| info["score"] = current_score | |
| # Terminal grader alignment boost | |
| reward += current_score | |
| return self._get_observation(), reward, done, info | |
| def state(self) -> Observation: | |
| if not self._ready: | |
| raise RuntimeError("Call reset() before state().") | |
| return self._get_observation() | |
| def close(self) -> None: | |
| pass | |
| async def reset_async(self) -> Observation: | |
| return self.reset() | |
| async def step_async(self, action: Action) -> Observation: | |
| obs, reward, done, info = self.step(action) | |
| obs.reward = reward | |
| obs.done = done | |
| obs.info = info | |
| return obs | |
| def _simulate_user(self) -> tuple[str, float]: | |
| if self._step_count <= 3: | |
| return "continue", 0.0 | |
| layout = self._layout | |
| drop_chance = 0.0 | |
| distrust_chance = 0.0 | |
| if layout.steps > 3: | |
| drop_chance += 0.05 * (layout.steps - 3) | |
| if layout.form_length > 5: | |
| drop_chance += 0.04 * (layout.form_length - 5) | |
| if self._prefers_short_forms and layout.form_length > 4: | |
| drop_chance += 0.05 | |
| if layout.steps < 2: | |
| distrust_chance += 0.20 | |
| if layout.button_size < 0.9 or layout.button_size > 1.3: | |
| distrust_chance += 0.10 | |
| drop_chance += 0.02 | |
| if self._user_type == "impatient": | |
| drop_chance += 0.06 | |
| elif self._user_type == "careful": | |
| distrust_chance += 0.08 | |
| if self.task == "hard": | |
| drop_chance += 0.04 | |
| elif self.task == "easy": | |
| drop_chance -= 0.05 | |
| distrust_chance -= 0.05 | |
| drop_chance = max(0.0, min(1.0, drop_chance)) | |
| distrust_chance = max(0.0, min(1.0 - drop_chance, distrust_chance)) | |
| roll = self._rng.random() | |
| if roll < drop_chance: | |
| return "drop", -1.0 | |
| elif roll < drop_chance + distrust_chance: | |
| return "distrust", -0.2 | |
| else: | |
| return "continue", 0.0 | |
| def _apply_action(self, action: Action) -> float: | |
| reward: float = 0.0 | |
| match action.type: | |
| case "increase_button": | |
| self._layout.button_size += BUTTON_SIZE_DELTA | |
| case "decrease_form": | |
| self._layout.form_length -= FORM_LENGTH_DELTA | |
| case "increase_steps": | |
| self._layout.steps += STEPS_DELTA | |
| case "decrease_steps": | |
| self._layout.steps -= STEPS_DELTA | |
| case "set_button_size": | |
| proposed: float = action.value | |
| if not (BUTTON_SIZE_MIN <= proposed <= BUTTON_SIZE_MAX): | |
| reward = INVALID_ACTION_REWARD | |
| self._layout.button_size = proposed | |
| case "reorder_sections" | "noop": | |
| pass | |
| self._clamp_layout() | |
| self._last_action = action.type | |
| return reward | |
| def _clamp_layout(self) -> None: | |
| self._layout.button_size = max(BUTTON_SIZE_MIN, min(BUTTON_SIZE_MAX, self._layout.button_size)) | |
| self._layout.form_length = max(FORM_LENGTH_MIN, min(FORM_LENGTH_MAX, self._layout.form_length)) | |
| self._layout.steps = max(STEPS_MIN, min(STEPS_MAX, self._layout.steps)) | |
| def _get_observation(self) -> Observation: | |
| return Observation( | |
| device=self._device, | |
| layout=self._layout.model_copy(), | |
| progress=self._progress, | |
| last_action=self._last_action, | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Graders (deterministic Β· partial-credit Β· strictly bounded in (0,1)) | |
| # --------------------------------------------------------------------------- | |
| def grade_easy(self, *args, **kwargs) -> float: | |
| """Easy task β single objective: maximize completion progress. | |
| Sub-metrics (weighted sum): | |
| 80 % completion progress | |
| 20 % button-size proximity to sweet spot (1.1) | |
| A baseline agent can reach moderate scores easily. | |
| """ | |
| progress = getattr(self, '_progress', 0.0) | |
| layout = getattr(self, '_layout', Layout()) | |
| # --- sub-metric 1: completion progress --- | |
| m_progress = normalize(progress, 0.0, 1.0) | |
| # --- sub-metric 2: button in sweet spot (peak at 1.1) --- | |
| bs_err = abs(layout.button_size - 1.1) | |
| m_button = 1.0 - normalize(bs_err, 0.0, 1.6) # 1.6 = max possible error | |
| score = 0.80 * m_progress + 0.20 * m_button | |
| return clamp_score(score) | |
| def grade_medium(self, *args, **kwargs) -> float: | |
| """Medium task β multiple objectives, weighted sum, mild interactions. | |
| Sub-metrics: | |
| 40 % completion progress | |
| 25 % button-size proximity | |
| 20 % form-length optimality (ideal β 3) | |
| 15 % step-count optimality (ideal β 2) | |
| Requires coordinated improvements across dimensions. | |
| """ | |
| progress = getattr(self, '_progress', 0.0) | |
| layout = getattr(self, '_layout', Layout()) | |
| m_progress = normalize(progress, 0.0, 1.0) | |
| bs_err = abs(layout.button_size - 1.1) | |
| m_button = 1.0 - normalize(bs_err, 0.0, 1.6) | |
| fl_err = abs(layout.form_length - 3) | |
| m_form = 1.0 - normalize(fl_err, 0.0, 9.0) # range 1-10, ideal 3 | |
| st_err = abs(layout.steps - 2) | |
| m_steps = 1.0 - normalize(st_err, 0.0, 9.0) # range 1-10, ideal 2 | |
| score = 0.40 * m_progress + 0.25 * m_button + 0.20 * m_form + 0.15 * m_steps | |
| return clamp_score(score) | |
| def grade_hard(self, *args, **kwargs) -> float: | |
| """Hard task β conflicting objectives, geometric mean. | |
| Trade-offs (CANNOT maximise all simultaneously): | |
| Conversion β wants short forms + few steps | |
| Data quality β wants more fields + more steps | |
| Usability β device-dependent button sweet-spot | |
| Progress β completion rate | |
| Scoring: weighted geometric mean in log-space. | |
| Final score min-max stretched so worst β 0.05, best β 0.95. | |
| """ | |
| import math | |
| progress = getattr(self, '_progress', 0.0) | |
| layout = getattr(self, '_layout', Layout()) | |
| device = getattr(self, '_device', 'desktop') | |
| FLOOR = 0.05 # sub-metric floor (keeps log finite) | |
| # --- conversion: wants form_length β€ 3 and steps β€ 2 --- | |
| conv_raw = 1.0 - 0.08 * max(0, layout.form_length - 3) \ | |
| - 0.10 * max(0, layout.steps - 2) | |
| m_conv = max(FLOOR, min(1.0, conv_raw)) | |
| # --- data quality: wants form_length β₯ 6 and steps β₯ 5 --- | |
| qual_raw = 0.10 * min(layout.form_length, 10) \ | |
| + 0.07 * min(layout.steps, 10) | |
| m_qual = max(FLOOR, normalize(qual_raw, 0.0, 1.7)) | |
| # --- usability: device-dependent button sweet-spot --- | |
| optimal_bs = 1.3 if device == 'mobile' else 1.0 | |
| usab_raw = 1.0 - abs(layout.button_size - optimal_bs) / 1.5 | |
| m_usab = max(FLOOR, min(1.0, usab_raw)) | |
| # --- progress --- | |
| m_prog = max(FLOOR, normalize(progress, 0.0, 1.0)) | |
| # --- weighted geometric mean (log-space) --- | |
| log_score = (0.25 * math.log(m_prog) | |
| + 0.30 * math.log(m_conv) | |
| + 0.25 * math.log(m_qual) | |
| + 0.20 * math.log(m_usab)) | |
| raw = math.exp(log_score) | |
| # stretch so empirical range [~0.05, ~0.85] maps to [~0.05, ~0.95] | |
| score = normalize(raw, 0.05, 0.90) | |
| return clamp_score(score) | |
| if __name__ == "__main__": | |
| env = UIEnv() | |
| print("\n--- self-test ---") | |
| for t in env.tasks: | |
| # test with 0 args, None, and {} | |
| for label, call_args in [("None", (None,)), ("dict", ({},)), ("text", ("test",))]: | |
| val = t.grader(*call_args) | |
| ok = isinstance(val, float) and 0.0 < val < 1.0 | |
| print(f" {t.name:8s} grader({label:6s}) = {val:.6f} {'OK' if ok else 'FAIL'}") | |
| # variation: change state and re-grade | |
| env._progress = 0.8 | |
| env._layout = Layout(button_size=1.1, form_length=3, steps=2) | |
| print("\n--- after optimized state ---") | |
| for t in env.tasks: | |
| val = t.grader(None) | |
| print(f" {t.name:8s} = {val:.6f}") | |