import os import random import time from typing import Any, Tuple from env import UIEnv, Action, Observation VALID_ACTIONS = [ "increase_button", "decrease_form", "increase_steps", "decrease_steps", "reorder_sections", "set_button_size", "noop", ] MAX_STEPS = 20 DEBUG = False random.seed(42) def load_env(task: str = "easy") -> UIEnv: return UIEnv(seed=42, task=task) def heuristic_policy(obs: Observation) -> Action: layout = obs.layout # Calculate which dimension creates the most drop risk step_risk = max(0, layout.steps - 3) * 0.06 form_risk = max(0, layout.form_length - 5) * 0.04 # Fix highest risk first if step_risk > 0 or form_risk > 0: if form_risk >= step_risk and layout.form_length > 4: return Action(type="decrease_form") if layout.steps > 2: return Action(type="decrease_steps") if layout.form_length > 4: return Action(type="decrease_form") # Fix button size instantly (targets hidden preference bonus at > 1.2) if layout.button_size < 0.9 or layout.button_size > 1.3: return Action(type="set_button_size", value=1.25) # Fine-tune: bring steps and form to optimal completion thresholds if layout.steps > 2: return Action(type="decrease_steps") if layout.form_length > 4: return Action(type="decrease_form") return Action(type="noop") def llm_policy(client: Any, obs: Observation, model_name: str) -> Action: state_desc = ( f"Device: {obs.device}\n" f"Button Size: {obs.layout.button_size:.2f}\n" f"Form Length: {obs.layout.form_length}\n" f"Steps: {obs.layout.steps}\n" f"Progress: {obs.progress:.2f}\n" f"Last Action: {obs.last_action or 'None'}" ) prompt = ( "You are optimizing a UI checkout flow to maximize user completion.\n" "Fewer steps and shorter forms reduce friction. Button size between 0.9-1.3 is ideal.\n\n" f"State:\n{state_desc}\n\n" "Respond with ONLY one word from this list:\n" "increase_button, decrease_form, increase_steps, decrease_steps, reorder_sections, set_button_size, noop" ) max_retries = 2 for attempt in range(max_retries + 1): try: response = client.chat.completions.create( model=model_name, messages=[ {"role": "system", "content": "You are a UI optimization agent."}, {"role": "user", "content": prompt}, ], temperature=0.001, max_tokens=20, ) content = response.choices[0].message.content action_str = content.strip().lower() for action in VALID_ACTIONS: if action in action_str: action_str = action break if action_str not in VALID_ACTIONS: return Action(type="noop") if action_str == "set_button_size": return Action(type=action_str, value=1.1) return Action(type=action_str) except Exception as e: if "429" in str(e): if DEBUG: print(" [Rate Limit] Waiting 30s...") time.sleep(30) else: if DEBUG: print(f" [API Error] {e}") if attempt == max_retries: return Action(type="noop") time.sleep(2 ** attempt) return Action(type="noop") def agent_policy(client: Any, obs: Observation, model_name: str) -> Action: heuristic_action = heuristic_policy(obs) if heuristic_action.type != "noop": return heuristic_action else: return llm_policy(client, obs, model_name) def run_episode(env: UIEnv, client: Any, model_name: str) -> Tuple[float, bool]: obs = env.reset() total_reward = 0.0 done = False completed = False steps = 0 while not done and steps < MAX_STEPS: action = agent_policy(client, obs, model_name) obs, reward, done, info = env.step(action) total_reward += reward steps += 1 if info.get("outcome") == "complete": completed = True time.sleep(1) # Reduced for HF readiness if DEBUG: print(f" step={steps} action={action.type} reward={reward:+.3f} outcome={info.get('outcome')}") return total_reward, completed def evaluate_task(task: str, client: Any, model_name: str, n_episodes: int = 1) -> Tuple[float, float, float]: total_rewards = 0.0 completions = 0 for ep in range(n_episodes): env = load_env(task) reward, completed = run_episode(env, client, model_name) total_rewards += reward if completed: completions += 1 if DEBUG: print(f" [{task}] ep={ep+1}/{n_episodes} reward={reward:+.3f} completed={completed}") avg_reward = total_rewards / n_episodes completion_rate = completions / n_episodes score = 0.7 * completion_rate + 0.3 * avg_reward return avg_reward, completion_rate, score def main(): base_url = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1") api_key = os.getenv("API_KEY") or os.getenv("HF_TOKEN") if api_key: from openai import OpenAI print("Using Proxy / HF Router API...") client = OpenAI( base_url=base_url, api_key=api_key ) model_name = os.getenv("MODEL_NAME", "katanemo/Arch-Router-1.5B") provider_name = f"API ({model_name})" else: print("Error: Neither API_KEY nor HF_TOKEN environment variable set.") return tasks = ["easy", "medium", "hard"] print("=" * 50) print(f" UIEnv Baseline Evaluation ({provider_name})") print("=" * 50) for task in tasks: print(f"\n> Evaluating task: {task}...") avg_reward, completion_rate, score = evaluate_task(task, client, model_name) print(f"\nTask: {task}") print(f" Avg Reward: {avg_reward:.4f}") print(f" Completion Rate: {completion_rate:.4f}") print(f" Score: {score:.4f}") print("\n" + "=" * 50) if __name__ == "__main__": main()