""" RhythmEnv Visual Explorer β€” Life Simulator v2 Run: python ui/app.py """ import sys import os sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import matplotlib.patches as mpatches import gradio as gr from server.rhythm_environment import ( RhythmEnvironment, MAX_STEPS, METERS, ACTION_EFFECTS, PROFILES ) from models import RhythmAction, ActionType SLOT_NAMES = ["Morning", "Afternoon", "Evening", "Night"] SLOT_ICONS = ["πŸŒ…", "β˜€οΈ", "πŸŒ†", "πŸŒ™"] DAY_NAMES = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] PROFILE_NAMES = ["introvert_morning", "extrovert_night_owl", "workaholic_stoic", "random"] ACTION_NAMES = [at.value.upper() for at in ActionType] METER_COLORS = { "vitality": "#3b82f6", "cognition": "#8b5cf6", "progress": "#22c55e", "serenity": "#14b8a6", "connection": "#f97316", } # --------------------------------------------------------------------------- # Global session state # --------------------------------------------------------------------------- _env = None _last_obs = None _step_log = [] _meter_history = [] # list of {meter: value} per step _completed_slots = [] # (day, slot) pairs already acted on def get_env(): global _env if _env is None: _env = RhythmEnvironment() return _env # --------------------------------------------------------------------------- # HTML β€” colored meter bars # --------------------------------------------------------------------------- def _bar_color(v: float) -> str: if v < 0.20: return "#ef4444" if v < 0.40: return "#f59e0b" return "#22c55e" def format_meters_html(obs) -> str: day_name = DAY_NAMES[obs.day] if obs.day < 7 else f"Day {obs.day+1}" slot_name = SLOT_NAMES[obs.slot] if obs.slot < 4 else f"Slot {obs.slot}" event_bit = ( f'⚑ {obs.active_event}' if obs.active_event else "" ) html = f"""
πŸ“… {day_name} {slot_name}  Β·  Step {obs.timestep}/{MAX_STEPS}  Β·  {obs.remaining_steps} steps left {event_bit}
""" for meter in METERS: val = getattr(obs, meter) pct = int(val * 100) color = _bar_color(val) dot = METER_COLORS[meter] html += f"""
{meter.capitalize()}
{val:.2f}
""" html += "\n
" return html # --------------------------------------------------------------------------- # HTML β€” week calendar grid # --------------------------------------------------------------------------- def format_week_grid(obs) -> str: html = """
Week Progress
""" for day in DAY_NAMES: html += f'' html += "" current_step = obs.timestep # 0-based: next step to take # timestep goes 0β†’27; obs.timestep is the step about to be taken # slots completed = those < current_step for slot_idx, icon in enumerate(SLOT_ICONS): html += f'' for day_idx in range(7): step_num = day_idx * 4 + slot_idx if step_num < current_step: cell = "βœ…" bg = "#d1fae5" elif step_num == current_step and not obs.done: cell = "πŸ”΅" bg = "#dbeafe" else: cell = "Β·" bg = "transparent" html += f'' html += "" html += "
{day}
{icon}{cell}
" return html # --------------------------------------------------------------------------- # Matplotlib β€” meter trajectory chart # --------------------------------------------------------------------------- def make_chart(history: list) -> plt.Figure: fig, ax = plt.subplots(figsize=(7, 3.5)) fig.patch.set_facecolor("#f9fafb") ax.set_facecolor("#f9fafb") if history: steps = list(range(len(history))) for meter, color in METER_COLORS.items(): vals = [h[meter] for h in history] ax.plot(steps, vals, color=color, linewidth=2.0, label=meter.capitalize(), solid_capstyle="round") ax.axhline(y=0.20, color="#ef4444", linestyle="--", linewidth=0.8, alpha=0.4) patches = [mpatches.Patch(color=c, label=m.capitalize()) for m, c in METER_COLORS.items()] ax.legend(handles=patches, loc="upper right", fontsize=8, ncol=2, framealpha=0.7, edgecolor="#e5e7eb") ax.set_xlim(0, MAX_STEPS) ax.set_ylim(-0.02, 1.08) ax.set_xlabel("Step (1 step = 1 time slot)", fontsize=9, color="#6b7280") ax.set_ylabel("Meter value", fontsize=9, color="#6b7280") ax.set_title("Life Meters Over the Week", fontsize=11, color="#374151", pad=8) ax.tick_params(labelsize=8, colors="#9ca3af") for spine in ax.spines.values(): spine.set_edgecolor("#e5e7eb") ax.grid(True, alpha=0.3, color="#d1d5db") plt.tight_layout(pad=1.2) return fig # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _snap(obs): return {m: getattr(obs, m) for m in METERS} def _step_line(obs, action_name: str) -> str: sign = "+" if obs.reward >= 0 else "" day = DAY_NAMES[obs.day] if obs.day < 7 else f"D{obs.day}" slot = SLOT_NAMES[obs.slot] if obs.slot < 4 else f"S{obs.slot}" line = f"Step {obs.timestep:>2} [{day} {slot}] {action_name:<15} {sign}{obs.reward:.3f}" if obs.active_event: line += f" ⚑{obs.active_event}" return line # --------------------------------------------------------------------------- # Tab 1 callbacks # --------------------------------------------------------------------------- OUTPUTS_COUNT = 5 # meters_html, week_grid, chart, log, score def reset_episode(profile_name: str, seed_str: str): global _last_obs, _step_log, _meter_history try: seed = int(seed_str.strip()) if seed_str.strip() else 42 except ValueError: seed = 42 env = get_env() _last_obs = env.reset(seed=seed) if profile_name == "random" else env.reset(seed=seed, profile=profile_name) _step_log = [f"β–Ά Profile: {env._profile['name']} | Seed: {seed} | 28 steps to go"] _meter_history = [_snap(_last_obs)] return ( format_meters_html(_last_obs), format_week_grid(_last_obs), make_chart(_meter_history), "\n".join(_step_log), "β€”", ) def take_action(action_str: str): global _last_obs, _step_log, _meter_history if _last_obs is None: return "⚠️ Reset the episode first.", "", make_chart([]), "β€”", "β€”" if _last_obs.done: return ( format_meters_html(_last_obs), format_week_grid(_last_obs), make_chart(_meter_history), "\n".join(_step_log[-22:]), "Episode done β€” press Reset to play again.", ) env = get_env() obs = env.step(RhythmAction(action_type=ActionType(action_str.lower()))) _last_obs = obs _meter_history.append(_snap(obs)) _step_log.append(_step_line(obs, action_str)) if obs.done: final = obs.reward_breakdown.get("final_score", 0.0) _step_log.append("─" * 52) _step_log.append(f"βœ… Final score: {final:.4f}") score = ( f"Final: {obs.reward_breakdown.get('final_score', 0.0):.4f}" if obs.done else f"Step reward: {obs.reward:+.4f}" ) return ( format_meters_html(obs), format_week_grid(obs), make_chart(_meter_history), "\n".join(_step_log[-22:]), score, ) def _run_auto(profile_name: str, seed_str: str, strategy: str): global _last_obs, _step_log, _meter_history import random as _random from training.inference_eval import heuristic_action try: seed = int(seed_str.strip()) if seed_str.strip() else 42 except ValueError: seed = 42 rng = _random.Random(seed + 999) all_actions = list(ActionType) env = get_env() obs = env.reset(seed=seed) if profile_name == "random" else env.reset(seed=seed, profile=profile_name) _last_obs = obs _step_log = [f"β–Ά Auto-run ({strategy}) | Profile: {env._profile['name']} | Seed: {seed}"] _meter_history = [_snap(obs)] while not obs.done: action_type = heuristic_action(obs) if strategy == "heuristic" else rng.choice(all_actions) obs = env.step(RhythmAction(action_type=action_type)) _last_obs = obs _meter_history.append(_snap(obs)) _step_log.append(_step_line(obs, action_type.value.upper())) final = obs.reward_breakdown.get("final_score", 0.0) _step_log += ["─" * 52, f"βœ… Final score: {final:.4f}"] return ( format_meters_html(obs), format_week_grid(obs), make_chart(_meter_history), "\n".join(_step_log[-25:]), f"Final: {final:.4f}", ) def run_heuristic(p, s): return _run_auto(p, s, "heuristic") def run_random(p, s): return _run_auto(p, s, "random") # --------------------------------------------------------------------------- # Reference tab helpers # --------------------------------------------------------------------------- def show_action_effects() -> str: header = f"{'Action':<15}" + "".join(f" {m[:3]:>6}" for m in METERS) lines = [header, "─" * 52] for action, effects in ACTION_EFFECTS.items(): row = f"{action:<15}" for m in METERS: row += f" {effects[m]:>+6.2f}" lines.append(row) return "\n".join(lines) def show_profiles() -> str: lines = [] for p in PROFILES: weights = p["reward_weights"] lines += [f"\n{'═'*52}", f" {p['name'].upper()}", f"{'═'*52}"] lines.append(" Reward weights (hidden from agent):") for m, w in weights.items(): bar = "β–ˆ" * int(w * 20) lines.append(f" {m:<12} {bar:<20} {w:.0%}") lines.append("\n Key hidden modifiers:") if p.get("morning_cognition_bonus"): lines.append(f" β€’ Morning: cognition/progress Γ—{p['morning_cognition_bonus']} (peak window)") if p.get("evening_night_cognition_bonus"): lines.append(f" β€’ Evening/Night: cognition/progress Γ—{p['evening_night_cognition_bonus']} (peak zone)") if p.get("morning_penalty"): lines.append(f" β€’ Morning: cognition/progress Γ—{p['morning_penalty']} (groggy zone)") sv = p.get("social_vitality_multiplier", 1.0) if sv != 1.0: lines.append(f" β€’ Social vitality drain Γ—{sv}") if p.get("binge_shame"): lines.append(f" β€’ Binge watch: shame spiral βˆ’0.15 serenity") if p.get("progress_serenity_bonus"): lines.append(f" β€’ Work gives serenity +{p['progress_serenity_bonus']} (meaning)") if p.get("idle_serenity_decay"): lines.append(f" β€’ Idle drains serenity βˆ’{p['idle_serenity_decay']} (guilt)") if p.get("work_vitality_recovery"): lines.append(f" β€’ Work recovers vitality +{p['work_vitality_recovery']} (energized)") if p.get("solo_serenity_bonus"): lines.append(f" β€’ Solo time gives serenity +{p['solo_serenity_bonus']} (recharge)") scm = p.get("social_connection_multiplier", 1.0) if scm != 1.0: lines.append(f" β€’ Social connection Γ—{scm}") lines.append(f" β€’ Connection passive decay: βˆ’{p['connection_decay_rate']}/step") return "\n".join(lines) # --------------------------------------------------------------------------- # Build UI # --------------------------------------------------------------------------- with gr.Blocks(title="RhythmEnv β€” Life Simulator") as demo: gr.Markdown( "# RhythmEnv β€” Life Simulator\n" "**Can a lightweight AI learn who you are β€” without being told?**\n\n" "Balance 5 life meters across a 7-day week. " "A hidden personality profile secretly changes how every action affects you. " "The agent must infer who you are from reward signals alone." ) with gr.Tabs(): # ── Tab 1: Play ─────────────────────────────────────────────────────── with gr.TabItem("β–Ά Play"): with gr.Row(): profile_dd = gr.Dropdown( choices=PROFILE_NAMES, value="introvert_morning", label="Hidden Profile (visible here for demo β€” agent cannot see this)", scale=3, ) seed_in = gr.Textbox(label="Seed", value="42", scale=1) reset_btn = gr.Button("⟳ Reset", variant="primary", scale=1) gr.Markdown( "| Profile | Core trait | What the agent must discover |\n" "|---|---|---|\n" "| `introvert_morning` | Recharges alone, peaks at dawn |" " Social drain Γ—3 Β· Morning deep work gives Γ—2 progress |\n" "| `extrovert_night_owl` | Energised by people, peaks at night |" " Morning is a penalty zone Β· Social gives Γ—2 connection |\n" "| `workaholic_stoic` | Finds meaning in output, resilient |" " Idle time drains serenity Β· Work recovers vitality |" ) with gr.Row(): with gr.Column(scale=2): meters_html = gr.HTML() week_grid_html = gr.HTML() score_display = gr.Textbox(label="Score", interactive=False, lines=1) with gr.Column(scale=3): chart_display = gr.Plot(label="Meter Trajectories") with gr.Row(): action_dd = gr.Dropdown( choices=ACTION_NAMES, value="DEEP_WORK", label="Choose action", scale=4, ) step_btn = gr.Button("β–Ά Take Step", variant="primary", scale=1) with gr.Row(): heuristic_btn = gr.Button("β–Άβ–Ά Full Episode β€” Heuristic Baseline") random_btn = gr.Button("β–Άβ–Ά Full Episode β€” Random Baseline") log_display = gr.Textbox( label="Step Log (last 22 steps)", lines=10, interactive=False, ) # ── Tab 2: Environment Reference ───────────────────────────────────── with gr.TabItem("πŸ“‹ Environment Reference"): gr.Markdown("### Action Effect Matrix") gr.Markdown( "Base delta per action on each meter. " "Profile modifiers and time-of-day multipliers are applied on top β€” invisibly." ) gr.Textbox(value=show_action_effects(), lines=14, interactive=False, label="") gr.Markdown("### Hidden Personality Profiles") gr.Markdown( "The agent **cannot see these** during play. " "It must infer the active profile through reward patterns β€” " "the core learning challenge of RhythmEnv." ) gr.Textbox(value=show_profiles(), lines=55, interactive=False, label="") # ── Wire up ────────────────────────────────────────────────────────────── _out = [meters_html, week_grid_html, chart_display, log_display, score_display] reset_btn.click(reset_episode, inputs=[profile_dd, seed_in], outputs=_out) step_btn.click(take_action, inputs=[action_dd], outputs=_out) heuristic_btn.click(run_heuristic, inputs=[profile_dd, seed_in], outputs=_out) random_btn.click(run_random, inputs=[profile_dd, seed_in], outputs=_out) if __name__ == "__main__": demo.launch(server_port=7862, share=False, theme=gr.themes.Soft())