Spaces:

hannan2859r
/

focusflow_env

Sleeping

File size: 7,771 Bytes

fdd45f1
 
36c262c
b5bdc31
 
 
 
 
36c262c
b5bdc31
 
 
36c262c
b5bdc31
 
 
fdd45f1
36c262c
fdd45f1
 
b5bdc31
 
 
fdd45f1
36c262c
b5bdc31
 
 
 
 
 
36c262c
b5bdc31
 
 
 
36c262c
b5bdc31
36c262c
b5bdc31
36c262c
b5bdc31
 
 
 
 
 
 
 
 
 
36c262c
b5bdc31
 
 
 
 
 
36c262c
b5bdc31
 
 
 
36c262c
b5bdc31
36c262c
 
b5bdc31
 
 
 
 
 
 
 
 
36c262c
b5bdc31
 
 
 
 
 
36c262c
b5bdc31
36c262c
b5bdc31
 
 
 
 
36c262c
b5bdc31
 
 
36c262c
b5bdc31
 
 
 
36c262c
b5bdc31
 
36c262c
b5bdc31
 
36c262c
b5bdc31
 
 
 
36c262c
b5bdc31
36c262c
 
b5bdc31
 
 
fdd45f1
b5bdc31
 
 
 
 
 
fdd45f1
b5bdc31
 
36c262c
 
b5bdc31
 
 
 
 
36c262c
b5bdc31
 
 
 
36c262c
 
b5bdc31
 
 
36c262c
b5bdc31
fdd45f1
b5bdc31
36c262c
b5bdc31
 
 
 
 
36c262c
b5bdc31
 
36c262c
fdd45f1
b5bdc31
 
 
fdd45f1
b5bdc31
 
36c262c
b5bdc31
 
 
36c262c
fdd45f1
b5bdc31
36c262c
b5bdc31
 
 
 
 
36c262c
b5bdc31
36c262c
 
b5bdc31
36c262c
fdd45f1
36c262c
 
 
 
b5bdc31
fdd45f1
36c262c
 
fdd45f1
b5bdc31
 
 
36c262c
b5bdc31
 
36c262c
b5bdc31
 
 
 
36c262c
b5bdc31
 
 
 
 
 
 
36c262c
b5bdc31
 
 
36c262c
 
fdd45f1
36c262c

"""
FocusFlow RL Environment — inference.py

LLM agent that:
  1. Reads the full observation (NL events, deadlines, cognitive load)
  2. Produces a chain-of-thought reasoning string
  3. Selects the best action
  4. Logs reward curves for showing training progress

Usage:
  export API_BASE_URL=https://api.groq.com/openai/v1
  export MODEL_NAME=llama-3.1-8b-instant
  export GROQ_API_KEY=your_groq_key_here
  export ENV_BASE_URL=http://localhost:7860
  export TASK_ID=task_1
  python inference.py
"""

import os
import json
import requests
import time
from typing import Optional
from openai import OpenAI

# ── Config ────────────────────────────────────────────────────────────────────
API_BASE_URL = os.getenv("API_BASE_URL", "https://api.groq.com/openai/v1")
MODEL_NAME   = os.getenv("MODEL_NAME",   "llama-3.1-8b-instant")
ENV_URL      = os.getenv("ENV_BASE_URL", "http://localhost:7860")
TASK_ID      = os.getenv("TASK_ID",      "task_1")
MAX_EPISODES = int(os.getenv("MAX_EPISODES", "5"))

client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY", os.getenv("GROQ_API_KEY", "dummy")),
    base_url=API_BASE_URL,
)

SYSTEM_PROMPT = """You are FocusAgent — an AI assistant helping a student stay focused during study sessions.

You will receive an observation about the student's current state and must choose the best action.

AVAILABLE ACTIONS:
- focus              : Stay on task, no special action needed
- block_app          : Block a distracting app (specify app_name)
- take_break         : Take a study break
- defer_event        : Postpone handling a distraction event for later
- respond_to_event   : Immediately handle an urgent event (provide response_text)
- plan_day           : Set a day plan (provide day_plan as a list of steps)
- adjust_energy      : Do something to restore energy/focus (stretch, hydrate, etc.)
- check_app          : (BAD) Give in to distraction — avoid this!
- quit_session       : (BAD) End the session early — avoid this!

DECISION FRAMEWORK:
1. Check pending_event first — is it urgent? Can it be deferred?
2. Check cognitive_load — if > 0.75, consider a break
3. Check deadline_pressure — if > 0.7, prioritise study tasks
4. Block apps proactively, especially high-temptation ones
5. Plan the day at the start of each day (step 1)

CRITICAL: Your `reasoning` field MUST explain:
  - What the most important signal in the observation is
  - Why you chose this action over alternatives
  - How this action serves the long-term goal

Poor reasoning = lower reward. Think carefully."""


def obs_to_prompt(obs: dict, step: int) -> str:
    event_str = "None"
    if obs.get("pending_event"):
        e = obs["pending_event"]
        event_str = (
            f"[{e['type']}] {e['description']} "
            f"(urgency={e['urgency']:.2f}, can_defer={e['can_defer']}, "
            f"expires_in={e.get('deadline_steps', 'N/A')} steps)"
        )

    dc = obs.get("day_context", {})
    deadlines = dc.get("pending_deadlines", [])
    dl_str = ", ".join(
        f"{d['task']} (due step {d['due_step']})"
        for d in deadlines if not d.get("completed")
    ) or "None"

    return f"""=== STEP {step} OBSERVATION ===

SESSION STATE:
  Phase            : {obs['current_phase']}
  Time remaining   : {obs['time_remaining_seconds']//60}m {obs['time_remaining_seconds']%60}s
  Sessions done    : {obs['sessions_completed']}
  Focus score      : {obs['focus_score']:.3f}

ENVIRONMENT:
  Active distractions : {', '.join(obs['active_distractions']) or 'None'}
  Blocked apps        : {', '.join(obs['blocked_apps']) or 'None'}

COGNITIVE & ENERGY:
  Cognitive load    : {obs['cognitive_load']:.2f}  {'⚠ HIGH' if obs['cognitive_load'] > 0.75 else '✓ OK'}
  Energy level      : {dc.get('energy_level', 1.0):.2f}
  Deadline pressure : {obs['deadline_pressure']:.2f}  {'⚠ URGENT' if obs['deadline_pressure'] > 0.7 else '✓ OK'}

PENDING EVENT:
  {event_str}

PENDING DEADLINES:
  {dl_str}

LAST FEEDBACK:
  {obs['last_action_feedback']}
  Last reward      : {obs.get('last_action_reward', 0.0):.4f}
  Reasoning score  : {obs.get('reasoning_quality_score', 0.0):.2f}

Choose your next action and provide clear reasoning."""


def call_llm(prompt: str) -> dict:
    """Call the LLM and parse its action response."""
    response = client.chat.completions.create(
        model=MODEL_NAME,
        temperature=0.3,
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user",   "content": prompt + "\n\nRespond ONLY with valid JSON matching FocusAction schema."}
        ],
        response_format={"type": "json_object"},
    )
    raw = response.choices[0].message.content
    return json.loads(raw)


def run_episode(task_id: str, episode: int) -> dict:
    """Run a single episode. Returns episode stats."""
    r = requests.post(f"{ENV_URL}/reset", params={"task_id": task_id})
    r.raise_for_status()
    obs = r.json()

    step           = 0
    total_reward   = 0.0
    reward_history = []
    done           = False
    result         = {}

    print(f"\n{'='*60}")
    print(f"EPISODE {episode+1} | Task: {task_id}")
    print(f"{'='*60}")

    while not done:
        step += 1
        prompt = obs_to_prompt(obs, step)

        try:
            action = call_llm(prompt)
        except Exception as e:
            print(f"  [LLM ERROR] {e} — defaulting to focus")
            action = {"action_type": "focus", "reasoning": "LLM call failed, defaulting to focus."}

        if not action.get("reasoning") or len(action["reasoning"].strip()) < 10:
            action["reasoning"] = "Staying focused to complete the session efficiently."

        try:
            resp = requests.post(f"{ENV_URL}/step", json=action)
            resp.raise_for_status()
            result = resp.json()
        except Exception as e:
            print(f"  [ENV ERROR] {e}")
            break

        obs    = result
        reward = result.get("last_action_reward", 0.0)
        done   = result.get("done", False)

        total_reward += reward
        reward_history.append(reward)

        print(
            f"  Step {step:3d} | action={action['action_type']:<18} "
            f"reward={reward:+.4f} | cumulative={total_reward:.4f} | "
            f"reasoning_q={result.get('reasoning_quality_score', 0):.2f}"
        )

        time.sleep(0.1)   # rate limit

    success = result.get("success", False) if result else False
    print(f"\n  Episode {episode+1} done. Total reward: {total_reward:.4f} | Success: {success}")

    return {
        "episode":        episode + 1,
        "total_reward":   round(total_reward, 4),
        "steps":          step,
        "success":        success,
        "reward_history": reward_history,
    }


def main():
    print(f"FocusFlow Agent | Model: {MODEL_NAME} | Task: {TASK_ID}")
    print(f"Environment: {ENV_URL}")
    print()

    h = requests.get(f"{ENV_URL}/health")
    print(f"Health: {h.json()}\n")

    all_stats = []
    for ep in range(MAX_EPISODES):
        stats = run_episode(TASK_ID, ep)
        all_stats.append(stats)

    rewards = [s["total_reward"] for s in all_stats]
    print(f"\n{'='*60}")
    print(f"SUMMARY over {MAX_EPISODES} episodes:")
    print(f"  Rewards : {[f'{r:.3f}' for r in rewards]}")
    print(f"  Mean    : {sum(rewards)/len(rewards):.4f}")
    print(f"  Best    : {max(rewards):.4f}")
    print(f"  Success : {sum(s['success'] for s in all_stats)}/{MAX_EPISODES}")

    with open("reward_log.json", "w") as f:
        json.dump(all_stats, f, indent=2)
    print("\nReward log saved to reward_log.json")


if __name__ == "__main__":
    main()