Spaces:
Running
Running
Commit ·
2305b9f
1
Parent(s): 4719066
added minimal ui and all 4 apps+workflows
Browse files- client.py +39 -33
- inference.py +245 -97
- models.py +21 -13
- server/app.py +181 -79
- server/apps/__init__.py +8 -0
- server/apps/base_app.py +43 -4
- server/apps/jira.py +243 -0
- server/apps/salesforce.py +198 -0
- server/apps/workday.py +195 -0
- server/apps/zendesk.py +238 -0
- server/business_rules.py +92 -25
- server/data_generator.py +229 -53
- server/environment.py +170 -63
- server/schema_drift.py +65 -22
- server/workflow_engine.py +161 -35
- training/grpo_orgos.ipynb +550 -0
- ui/index.html +651 -0
client.py
CHANGED
|
@@ -1,105 +1,111 @@
|
|
| 1 |
"""
|
| 2 |
-
Synchronous HTTP client for the
|
| 3 |
|
| 4 |
Usage
|
| 5 |
-----
|
| 6 |
-
from client import
|
|
|
|
| 7 |
|
| 8 |
-
client =
|
| 9 |
|
| 10 |
-
# Start a new episode (
|
| 11 |
-
result = client.reset(
|
| 12 |
-
print(result.observation.
|
| 13 |
|
| 14 |
# Take a step
|
| 15 |
-
action =
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
)
|
| 20 |
result = client.step(action)
|
| 21 |
print(result.observation.current_score, result.reward, result.done)
|
| 22 |
|
| 23 |
# Inspect state
|
| 24 |
state = client.state()
|
| 25 |
-
print(state.episode_id, state.
|
| 26 |
"""
|
| 27 |
|
| 28 |
from typing import Optional
|
| 29 |
import httpx
|
| 30 |
from pydantic import BaseModel
|
| 31 |
|
| 32 |
-
from models import
|
| 33 |
|
| 34 |
|
| 35 |
class StepResult(BaseModel):
|
| 36 |
"""Returned by reset() and step()."""
|
| 37 |
-
observation:
|
| 38 |
reward: float
|
| 39 |
done: bool
|
| 40 |
info: dict = {}
|
| 41 |
|
| 42 |
|
| 43 |
-
class
|
| 44 |
"""
|
| 45 |
-
Thin synchronous wrapper around the
|
| 46 |
|
| 47 |
All methods raise httpx.HTTPStatusError on non-2xx responses.
|
| 48 |
"""
|
| 49 |
|
| 50 |
def __init__(self, base_url: str = "http://localhost:8000", timeout: float = 30.0):
|
| 51 |
-
self.base_url
|
| 52 |
self._client = httpx.Client(base_url=self.base_url, timeout=timeout)
|
| 53 |
|
| 54 |
# ------------------------------------------------------------------
|
| 55 |
# Core API
|
| 56 |
# ------------------------------------------------------------------
|
| 57 |
|
| 58 |
-
def reset(self,
|
| 59 |
"""
|
| 60 |
Start a new episode.
|
| 61 |
|
| 62 |
Parameters
|
| 63 |
----------
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
None = round-robin (
|
| 69 |
"""
|
| 70 |
-
payload = {"
|
| 71 |
resp = self._client.post("/reset", json=payload)
|
| 72 |
resp.raise_for_status()
|
| 73 |
return StepResult(**resp.json())
|
| 74 |
|
| 75 |
-
def step(self, action:
|
| 76 |
"""
|
| 77 |
-
|
| 78 |
|
| 79 |
Parameters
|
| 80 |
----------
|
| 81 |
-
action :
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
params : dict – operation-specific parameters
|
| 86 |
"""
|
| 87 |
resp = self._client.post("/step", json=action.model_dump())
|
| 88 |
resp.raise_for_status()
|
| 89 |
return StepResult(**resp.json())
|
| 90 |
|
| 91 |
-
def state(self) ->
|
| 92 |
"""Return current episode metadata without modifying state."""
|
| 93 |
resp = self._client.get("/state")
|
| 94 |
resp.raise_for_status()
|
| 95 |
-
return
|
| 96 |
|
| 97 |
def health(self) -> dict:
|
| 98 |
-
"""Ping the server. Returns {"status": "
|
| 99 |
resp = self._client.get("/health")
|
| 100 |
resp.raise_for_status()
|
| 101 |
return resp.json()
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
# ------------------------------------------------------------------
|
| 104 |
# Context manager support
|
| 105 |
# ------------------------------------------------------------------
|
|
|
|
| 1 |
"""
|
| 2 |
+
Synchronous HTTP client for the OrgOS OpenEnv environment.
|
| 3 |
|
| 4 |
Usage
|
| 5 |
-----
|
| 6 |
+
from client import OrgOSEnvClient
|
| 7 |
+
from models import OrgOSAction
|
| 8 |
|
| 9 |
+
client = OrgOSEnvClient(base_url="http://localhost:8000")
|
| 10 |
|
| 11 |
+
# Start a new episode (workflow_id "A"/"B"/"C" or None for round-robin)
|
| 12 |
+
result = client.reset(workflow_id="A")
|
| 13 |
+
print(result.observation.workflow_goal)
|
| 14 |
|
| 15 |
# Take a step
|
| 16 |
+
action = OrgOSAction(
|
| 17 |
+
app="zendesk",
|
| 18 |
+
operation="acknowledge_ticket",
|
| 19 |
+
args={"ticket_number": "ZD-001"},
|
| 20 |
)
|
| 21 |
result = client.step(action)
|
| 22 |
print(result.observation.current_score, result.reward, result.done)
|
| 23 |
|
| 24 |
# Inspect state
|
| 25 |
state = client.state()
|
| 26 |
+
print(state.episode_id, state.workflow_completion)
|
| 27 |
"""
|
| 28 |
|
| 29 |
from typing import Optional
|
| 30 |
import httpx
|
| 31 |
from pydantic import BaseModel
|
| 32 |
|
| 33 |
+
from models import OrgOSAction, OrgOSObservation, OrgOSState
|
| 34 |
|
| 35 |
|
| 36 |
class StepResult(BaseModel):
|
| 37 |
"""Returned by reset() and step()."""
|
| 38 |
+
observation: OrgOSObservation
|
| 39 |
reward: float
|
| 40 |
done: bool
|
| 41 |
info: dict = {}
|
| 42 |
|
| 43 |
|
| 44 |
+
class OrgOSEnvClient:
|
| 45 |
"""
|
| 46 |
+
Thin synchronous wrapper around the OrgOS HTTP API.
|
| 47 |
|
| 48 |
All methods raise httpx.HTTPStatusError on non-2xx responses.
|
| 49 |
"""
|
| 50 |
|
| 51 |
def __init__(self, base_url: str = "http://localhost:8000", timeout: float = 30.0):
|
| 52 |
+
self.base_url = base_url.rstrip("/")
|
| 53 |
self._client = httpx.Client(base_url=self.base_url, timeout=timeout)
|
| 54 |
|
| 55 |
# ------------------------------------------------------------------
|
| 56 |
# Core API
|
| 57 |
# ------------------------------------------------------------------
|
| 58 |
|
| 59 |
+
def reset(self, workflow_id: Optional[str] = None) -> StepResult:
|
| 60 |
"""
|
| 61 |
Start a new episode.
|
| 62 |
|
| 63 |
Parameters
|
| 64 |
----------
|
| 65 |
+
workflow_id : str | None
|
| 66 |
+
"A" = Customer Bug Fix (support role)
|
| 67 |
+
"B" = Employee Onboarding (manager role)
|
| 68 |
+
"C" = Churn Risk Alert (support role)
|
| 69 |
+
None = round-robin (A → B → C → A …)
|
| 70 |
"""
|
| 71 |
+
payload = {"workflow_id": workflow_id} if workflow_id is not None else {}
|
| 72 |
resp = self._client.post("/reset", json=payload)
|
| 73 |
resp.raise_for_status()
|
| 74 |
return StepResult(**resp.json())
|
| 75 |
|
| 76 |
+
def step(self, action: OrgOSAction) -> StepResult:
|
| 77 |
"""
|
| 78 |
+
Take one action in the environment.
|
| 79 |
|
| 80 |
Parameters
|
| 81 |
----------
|
| 82 |
+
action : OrgOSAction
|
| 83 |
+
app : str – "jira" | "zendesk" | "salesforce" | "workday"
|
| 84 |
+
operation : str – app-specific operation name
|
| 85 |
+
args : dict – operation arguments
|
|
|
|
| 86 |
"""
|
| 87 |
resp = self._client.post("/step", json=action.model_dump())
|
| 88 |
resp.raise_for_status()
|
| 89 |
return StepResult(**resp.json())
|
| 90 |
|
| 91 |
+
def state(self) -> OrgOSState:
|
| 92 |
"""Return current episode metadata without modifying state."""
|
| 93 |
resp = self._client.get("/state")
|
| 94 |
resp.raise_for_status()
|
| 95 |
+
return OrgOSState(**resp.json())
|
| 96 |
|
| 97 |
def health(self) -> dict:
|
| 98 |
+
"""Ping the server. Returns {"status": "healthy"} if healthy."""
|
| 99 |
resp = self._client.get("/health")
|
| 100 |
resp.raise_for_status()
|
| 101 |
return resp.json()
|
| 102 |
|
| 103 |
+
def app_schemas(self) -> dict:
|
| 104 |
+
"""Return per-app operation catalogue."""
|
| 105 |
+
resp = self._client.get("/schema/apps")
|
| 106 |
+
resp.raise_for_status()
|
| 107 |
+
return resp.json()
|
| 108 |
+
|
| 109 |
# ------------------------------------------------------------------
|
| 110 |
# Context manager support
|
| 111 |
# ------------------------------------------------------------------
|
inference.py
CHANGED
|
@@ -1,17 +1,17 @@
|
|
| 1 |
"""
|
| 2 |
-
Baseline inference script for the
|
| 3 |
-
|
| 4 |
|
| 5 |
Required environment variables:
|
| 6 |
API_BASE_URL — LLM API endpoint (OpenAI-compatible)
|
| 7 |
-
MODEL_NAME — model identifier
|
| 8 |
-
HF_TOKEN — API key
|
| 9 |
ENV_URL — environment server URL (default: http://localhost:8000)
|
| 10 |
|
| 11 |
STDOUT FORMAT (OpenEnv spec):
|
| 12 |
-
[START] task=<
|
| 13 |
-
[STEP] step=<n> action=<
|
| 14 |
-
[END] task=<
|
| 15 |
"""
|
| 16 |
|
| 17 |
import json
|
|
@@ -19,7 +19,8 @@ import os
|
|
| 19 |
import re
|
| 20 |
import sys
|
| 21 |
import time
|
| 22 |
-
from typing import List, Optional
|
|
|
|
| 23 |
import httpx
|
| 24 |
from openai import OpenAI
|
| 25 |
|
|
@@ -30,72 +31,86 @@ from openai import OpenAI
|
|
| 30 |
API_BASE_URL = os.environ.get("API_BASE_URL", "https://api.openai.com/v1")
|
| 31 |
MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o-mini")
|
| 32 |
HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 33 |
-
ENV_URL = os.environ.get("ENV_URL",
|
| 34 |
|
| 35 |
if not HF_TOKEN:
|
| 36 |
print("[WARNING] HF_TOKEN is not set — LLM calls may fail.", file=sys.stderr)
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
SYSTEM_PROMPT = """You are a data cleaning agent. You control a data cleaning environment
|
| 41 |
-
through JSON actions. Each turn you receive an observation JSON describing the current state
|
| 42 |
-
of a dataset (preview, missing counts, duplicate count, dtype issues, current score, etc.)
|
| 43 |
-
and a task description.
|
| 44 |
-
|
| 45 |
-
Your job is to pick the single best action to improve the dataset quality.
|
| 46 |
-
|
| 47 |
-
Respond ONLY with a valid JSON object — no markdown, no explanation, just the JSON.
|
| 48 |
-
|
| 49 |
-
Available operations and their required parameters:
|
| 50 |
-
|
| 51 |
-
1. fill_missing
|
| 52 |
-
{"operation": "fill_missing", "column": "<col>", "params": {"strategy": "median|mean|mode|constant", "value": <only if constant>}}
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
3. fix_format
|
| 58 |
-
{"operation": "fix_format", "column": "phone|listed_date|signup_date|country"}
|
| 59 |
-
|
| 60 |
-
4. replace_value
|
| 61 |
-
{"operation": "replace_value", "column": "<col>", "params": {"old": "<val>", "new": "<val>"}}
|
| 62 |
-
|
| 63 |
-
5. drop_outliers
|
| 64 |
-
{"operation": "drop_outliers", "column": "<numeric_col>"}
|
| 65 |
-
|
| 66 |
-
6. fix_dtype
|
| 67 |
-
{"operation": "fix_dtype", "column": "<col>", "params": {"dtype": "float|int|str"}}
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
"""
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
# ------------------------------------------------------------------
|
| 77 |
# OpenEnv stdout logging helpers
|
| 78 |
# ------------------------------------------------------------------
|
| 79 |
|
| 80 |
-
def log_start(task: str,
|
| 81 |
-
print(f"[START] task={task} env={
|
| 82 |
|
| 83 |
|
| 84 |
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
|
| 85 |
error_val = error if error else "null"
|
| 86 |
-
done_val = str(done).lower()
|
| 87 |
print(
|
| 88 |
-
f"[STEP] step={step} action={action} reward={reward:.
|
|
|
|
| 89 |
flush=True,
|
| 90 |
)
|
| 91 |
|
| 92 |
|
| 93 |
def log_end(task_name: str, score: float, steps: int) -> None:
|
| 94 |
-
safe_score = max(0.
|
| 95 |
-
print(
|
| 96 |
-
f"[END] task={task_name} score={safe_score:.4f} steps={steps}",
|
| 97 |
-
flush=True
|
| 98 |
-
)
|
| 99 |
|
| 100 |
|
| 101 |
# ------------------------------------------------------------------
|
|
@@ -117,60 +132,80 @@ def api_get(path: str) -> dict:
|
|
| 117 |
|
| 118 |
|
| 119 |
# ------------------------------------------------------------------
|
| 120 |
-
#
|
| 121 |
# ------------------------------------------------------------------
|
| 122 |
|
| 123 |
def obs_to_text(obs: dict) -> str:
|
| 124 |
lines = [
|
| 125 |
-
f"current_score:
|
| 126 |
-
f"step_count:
|
| 127 |
-
f"
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
"",
|
| 133 |
-
"===
|
| 134 |
-
obs["
|
| 135 |
"",
|
| 136 |
-
"===
|
| 137 |
-
obs["task_description"],
|
| 138 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
return "\n".join(lines)
|
| 140 |
|
| 141 |
|
| 142 |
-
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
-
# Human-readable header (stderr so it doesn't interfere with stdout format)
|
| 146 |
print(f"\n{'='*60}", file=sys.stderr)
|
| 147 |
-
print(f" Running
|
| 148 |
print(f"{'='*60}", file=sys.stderr)
|
| 149 |
|
| 150 |
-
result = api_post("/reset", {"
|
| 151 |
obs = result["observation"]
|
| 152 |
-
history = []
|
| 153 |
-
rewards: List[float] = []
|
| 154 |
steps_taken = 0
|
| 155 |
-
success = False
|
| 156 |
|
| 157 |
-
log_start(task=task_name,
|
| 158 |
|
| 159 |
try:
|
| 160 |
-
for step_num in range(1,
|
| 161 |
if obs["done"]:
|
| 162 |
-
success = obs["current_score"] >= 0.95
|
| 163 |
break
|
| 164 |
|
| 165 |
obs_text = obs_to_text(obs)
|
| 166 |
history.append({"role": "user", "content": obs_text})
|
| 167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
try:
|
| 169 |
-
response =
|
| 170 |
model = MODEL_NAME,
|
| 171 |
messages = [{"role": "system", "content": SYSTEM_PROMPT}] + history,
|
| 172 |
temperature = 0.0,
|
| 173 |
-
max_tokens =
|
| 174 |
)
|
| 175 |
action_str = response.choices[0].message.content.strip()
|
| 176 |
except Exception as exc:
|
|
@@ -193,13 +228,13 @@ def run_task(task_id: int) -> float:
|
|
| 193 |
pass
|
| 194 |
|
| 195 |
if action is None:
|
| 196 |
-
print(f" Step {step_num}: Could not parse action JSON
|
| 197 |
log_step(step_num, action_str, -0.05, False, "json_parse_error")
|
| 198 |
break
|
| 199 |
|
| 200 |
action_label = json.dumps(action, separators=(",", ":"))
|
| 201 |
print(
|
| 202 |
-
f" Step {step_num:2d} | score={obs['current_score']:.4f} |
|
| 203 |
file=sys.stderr,
|
| 204 |
)
|
| 205 |
|
|
@@ -207,13 +242,15 @@ def run_task(task_id: int) -> float:
|
|
| 207 |
obs = result["observation"]
|
| 208 |
step_reward = result["reward"]
|
| 209 |
done = result["done"]
|
| 210 |
-
error_msg =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
|
| 212 |
-
print(f"
|
| 213 |
|
| 214 |
-
rewards.append(step_reward)
|
| 215 |
steps_taken = step_num
|
| 216 |
-
|
| 217 |
log_step(
|
| 218 |
step = step_num,
|
| 219 |
action = action_label,
|
|
@@ -223,49 +260,161 @@ def run_task(task_id: int) -> float:
|
|
| 223 |
)
|
| 224 |
|
| 225 |
if done:
|
| 226 |
-
success = obs["current_score"] >= 0.95
|
| 227 |
break
|
| 228 |
|
| 229 |
-
time.sleep(0.
|
| 230 |
|
| 231 |
finally:
|
| 232 |
-
final = obs.get("current_score", 0.
|
| 233 |
log_end(task_name=task_name, score=final, steps=steps_taken)
|
| 234 |
|
| 235 |
final_score = obs["current_score"]
|
|
|
|
| 236 |
print(
|
| 237 |
-
f"\n
|
|
|
|
| 238 |
file=sys.stderr,
|
| 239 |
)
|
| 240 |
return final_score
|
| 241 |
|
| 242 |
|
| 243 |
# ------------------------------------------------------------------
|
| 244 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
# ------------------------------------------------------------------
|
| 246 |
|
| 247 |
def main():
|
| 248 |
-
print("
|
| 249 |
print(f"Model : {MODEL_NAME}", file=sys.stderr)
|
| 250 |
print(f"Env : {ENV_URL}", file=sys.stderr)
|
| 251 |
|
| 252 |
-
# Smoke-test health endpoint
|
| 253 |
try:
|
| 254 |
health = api_get("/health")
|
| 255 |
assert health.get("status") in ("ok", "healthy"), f"Unexpected status: {health}"
|
| 256 |
print("Health check: OK\n", file=sys.stderr)
|
| 257 |
except Exception as exc:
|
| 258 |
print(f"[ERROR] Environment not reachable at {ENV_URL}: {exc}", file=sys.stderr)
|
| 259 |
-
print("[ERROR] Make sure the server is running and ENV_URL is correct.", file=sys.stderr)
|
| 260 |
sys.exit(1)
|
| 261 |
|
| 262 |
-
scores = {}
|
| 263 |
-
for
|
| 264 |
try:
|
| 265 |
-
scores[f"
|
| 266 |
except Exception as exc:
|
| 267 |
-
print(f"[ERROR]
|
| 268 |
-
scores[f"
|
| 269 |
|
| 270 |
print("\n" + "="*60, file=sys.stderr)
|
| 271 |
print(" BASELINE RESULTS", file=sys.stderr)
|
|
@@ -276,11 +425,10 @@ def main():
|
|
| 276 |
print(f" average: {avg:.4f}", file=sys.stderr)
|
| 277 |
print("="*60, file=sys.stderr)
|
| 278 |
|
| 279 |
-
# Write scores to file for automated validators
|
| 280 |
with open("baseline_scores.json", "w") as f:
|
| 281 |
json.dump({"scores": scores, "average": avg}, f, indent=2)
|
| 282 |
print("\nScores written to baseline_scores.json", file=sys.stderr)
|
| 283 |
|
| 284 |
|
| 285 |
if __name__ == "__main__":
|
| 286 |
-
main()
|
|
|
|
| 1 |
"""
|
| 2 |
+
Baseline inference script for the OrgOS OpenEnv environment.
|
| 3 |
+
Runs all three workflows (A / B / C) and reports scores.
|
| 4 |
|
| 5 |
Required environment variables:
|
| 6 |
API_BASE_URL — LLM API endpoint (OpenAI-compatible)
|
| 7 |
+
MODEL_NAME — model identifier (default: gpt-4o-mini)
|
| 8 |
+
HF_TOKEN — API key for the LLM endpoint
|
| 9 |
ENV_URL — environment server URL (default: http://localhost:8000)
|
| 10 |
|
| 11 |
STDOUT FORMAT (OpenEnv spec):
|
| 12 |
+
[START] task=<workflow_name> env=orgos-openenv model=<model>
|
| 13 |
+
[STEP] step=<n> action=<json> reward=<0.00> done=<true|false> error=<msg|null>
|
| 14 |
+
[END] task=<workflow_name> score=<0.00> steps=<n>
|
| 15 |
"""
|
| 16 |
|
| 17 |
import json
|
|
|
|
| 19 |
import re
|
| 20 |
import sys
|
| 21 |
import time
|
| 22 |
+
from typing import AsyncGenerator, Dict, List, Optional
|
| 23 |
+
|
| 24 |
import httpx
|
| 25 |
from openai import OpenAI
|
| 26 |
|
|
|
|
| 31 |
API_BASE_URL = os.environ.get("API_BASE_URL", "https://api.openai.com/v1")
|
| 32 |
MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o-mini")
|
| 33 |
HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 34 |
+
ENV_URL = os.environ.get("ENV_URL", "http://localhost:8000")
|
| 35 |
|
| 36 |
if not HF_TOKEN:
|
| 37 |
print("[WARNING] HF_TOKEN is not set — LLM calls may fail.", file=sys.stderr)
|
| 38 |
|
| 39 |
+
llm_client = OpenAI(api_key=HF_TOKEN or "sk-placeholder", base_url=API_BASE_URL)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
+
# ------------------------------------------------------------------
|
| 42 |
+
# System prompt
|
| 43 |
+
# ------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
+
SYSTEM_PROMPT = """\
|
| 46 |
+
You are OrgOS Agent — an enterprise workflow automation agent.
|
| 47 |
+
You operate across four SaaS applications: Jira, Zendesk, Salesforce, and Workday.
|
| 48 |
+
|
| 49 |
+
Each turn you receive a JSON observation with:
|
| 50 |
+
- workflow_goal : the task you must complete
|
| 51 |
+
- pending_steps : remaining steps in the workflow
|
| 52 |
+
- app_states : current state of each app
|
| 53 |
+
- schema_hints : field renames in effect this episode (e.g. {"jira.priority": "severity"})
|
| 54 |
+
- active_rules : current SLA / approval thresholds
|
| 55 |
+
- message : feedback from the last action
|
| 56 |
+
- current_score : your cumulative score (0.001–0.999)
|
| 57 |
+
|
| 58 |
+
Respond ONLY with a valid JSON object — no markdown, no explanation.
|
| 59 |
+
|
| 60 |
+
Action format:
|
| 61 |
+
{"app": "<app>", "operation": "<op>", "args": {...}}
|
| 62 |
+
|
| 63 |
+
Available apps and key operations:
|
| 64 |
+
jira: get_issue, create_issue, update_status, set_priority, assign_owner,
|
| 65 |
+
add_label, link_zendesk_ticket, close_issue, list_issues
|
| 66 |
+
zendesk: get_ticket, acknowledge_ticket, set_urgency, assign_agent,
|
| 67 |
+
escalate_to_jira, resolve_ticket, add_note, list_tickets
|
| 68 |
+
salesforce: get_account, list_accounts, update_deal_stage, flag_churn_risk,
|
| 69 |
+
assign_account_owner, log_interaction, get_opportunity
|
| 70 |
+
workday: get_employee, list_employees, provision_access, log_sla_event,
|
| 71 |
+
request_budget_approval, create_onboarding_task, complete_task
|
| 72 |
+
|
| 73 |
+
CRITICAL RULES:
|
| 74 |
+
1. Read schema_hints FIRST — if "jira.priority" → "severity", use "severity" not "priority" in args.
|
| 75 |
+
2. Complete ALL pending_steps in order.
|
| 76 |
+
3. Do not repeat a successful action.
|
| 77 |
+
4. If an operation fails, read the message carefully and adapt.
|
| 78 |
+
5. Use list_* operations to discover record IDs when needed.
|
| 79 |
+
6. Stop when pending_steps is empty or done=true.
|
| 80 |
+
|
| 81 |
+
Example actions:
|
| 82 |
+
{"app": "zendesk", "operation": "acknowledge_ticket", "args": {"ticket_number": "ZD-001"}}
|
| 83 |
+
{"app": "jira", "operation": "create_issue", "args": {"title": "Bug fix for ACME-001", "linked_zendesk": "ZD-001"}}
|
| 84 |
+
{"app": "salesforce", "operation": "get_account", "args": {"account_id": "ACME-001"}}
|
| 85 |
+
{"app": "workday", "operation": "log_sla_event", "args": {"ticket_id": "ZD-001", "sla_met": true}}
|
| 86 |
"""
|
| 87 |
|
| 88 |
+
WORKFLOW_NAMES = {
|
| 89 |
+
"A": "workflow-a-bug-fix",
|
| 90 |
+
"B": "workflow-b-onboarding",
|
| 91 |
+
"C": "workflow-c-churn-alert",
|
| 92 |
+
}
|
| 93 |
|
| 94 |
# ------------------------------------------------------------------
|
| 95 |
# OpenEnv stdout logging helpers
|
| 96 |
# ------------------------------------------------------------------
|
| 97 |
|
| 98 |
+
def log_start(task: str, env_name: str, model: str) -> None:
|
| 99 |
+
print(f"[START] task={task} env={env_name} model={model}", flush=True)
|
| 100 |
|
| 101 |
|
| 102 |
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
|
| 103 |
error_val = error if error else "null"
|
|
|
|
| 104 |
print(
|
| 105 |
+
f"[STEP] step={step} action={action} reward={reward:.4f} "
|
| 106 |
+
f"done={str(done).lower()} error={error_val}",
|
| 107 |
flush=True,
|
| 108 |
)
|
| 109 |
|
| 110 |
|
| 111 |
def log_end(task_name: str, score: float, steps: int) -> None:
|
| 112 |
+
safe_score = max(0.001, min(0.999, float(score)))
|
| 113 |
+
print(f"[END] task={task_name} score={safe_score:.4f} steps={steps}", flush=True)
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
|
| 116 |
# ------------------------------------------------------------------
|
|
|
|
| 132 |
|
| 133 |
|
| 134 |
# ------------------------------------------------------------------
|
| 135 |
+
# Observation formatter
|
| 136 |
# ------------------------------------------------------------------
|
| 137 |
|
| 138 |
def obs_to_text(obs: dict) -> str:
|
| 139 |
lines = [
|
| 140 |
+
f"current_score: {obs['current_score']}",
|
| 141 |
+
f"step_count: {obs['step_count']}",
|
| 142 |
+
f"workflow_id: {obs['workflow_id']}",
|
| 143 |
+
"",
|
| 144 |
+
"=== WORKFLOW GOAL ===",
|
| 145 |
+
obs["workflow_goal"],
|
| 146 |
+
"",
|
| 147 |
+
"=== PENDING STEPS ===",
|
| 148 |
+
"\n".join(f" - {s}" for s in obs["pending_steps"]) or " (all steps complete!)",
|
| 149 |
+
"",
|
| 150 |
+
"=== SCHEMA HINTS (use these field names) ===",
|
| 151 |
+
json.dumps(obs["schema_hints"], indent=2) if obs["schema_hints"] else " (no drift — use canonical names)",
|
| 152 |
+
"",
|
| 153 |
+
"=== ACTIVE RULES ===",
|
| 154 |
+
json.dumps(obs["active_rules"], indent=2),
|
| 155 |
"",
|
| 156 |
+
"=== LAST MESSAGE ===",
|
| 157 |
+
obs["message"],
|
| 158 |
"",
|
| 159 |
+
"=== APP STATES ===",
|
|
|
|
| 160 |
]
|
| 161 |
+
for app_name, view in obs.get("app_states", {}).items():
|
| 162 |
+
lines.append(f" [{app_name.upper()}]")
|
| 163 |
+
lines.append(f" {view}")
|
| 164 |
+
lines.append("")
|
| 165 |
+
if obs.get("rule_violations"):
|
| 166 |
+
lines.append("=== RULE VIOLATIONS (fix these!) ===")
|
| 167 |
+
for v in obs["rule_violations"]:
|
| 168 |
+
lines.append(f" ⚠ {v}")
|
| 169 |
+
lines.append("")
|
| 170 |
return "\n".join(lines)
|
| 171 |
|
| 172 |
|
| 173 |
+
# ------------------------------------------------------------------
|
| 174 |
+
# Single-workflow inference loop
|
| 175 |
+
# ------------------------------------------------------------------
|
| 176 |
+
|
| 177 |
+
def run_workflow(workflow_id: str) -> float:
|
| 178 |
+
task_name = WORKFLOW_NAMES.get(workflow_id, f"workflow-{workflow_id.lower()}")
|
| 179 |
|
|
|
|
| 180 |
print(f"\n{'='*60}", file=sys.stderr)
|
| 181 |
+
print(f" Running Workflow {workflow_id}", file=sys.stderr)
|
| 182 |
print(f"{'='*60}", file=sys.stderr)
|
| 183 |
|
| 184 |
+
result = api_post("/reset", {"workflow_id": workflow_id})
|
| 185 |
obs = result["observation"]
|
| 186 |
+
history: List[dict] = []
|
|
|
|
| 187 |
steps_taken = 0
|
|
|
|
| 188 |
|
| 189 |
+
log_start(task=task_name, env_name="orgos-openenv", model=MODEL_NAME)
|
| 190 |
|
| 191 |
try:
|
| 192 |
+
for step_num in range(1, 60):
|
| 193 |
if obs["done"]:
|
|
|
|
| 194 |
break
|
| 195 |
|
| 196 |
obs_text = obs_to_text(obs)
|
| 197 |
history.append({"role": "user", "content": obs_text})
|
| 198 |
|
| 199 |
+
# Trim history to avoid context overflow
|
| 200 |
+
if len(history) > 20:
|
| 201 |
+
history = history[-20:]
|
| 202 |
+
|
| 203 |
try:
|
| 204 |
+
response = llm_client.chat.completions.create(
|
| 205 |
model = MODEL_NAME,
|
| 206 |
messages = [{"role": "system", "content": SYSTEM_PROMPT}] + history,
|
| 207 |
temperature = 0.0,
|
| 208 |
+
max_tokens = 300,
|
| 209 |
)
|
| 210 |
action_str = response.choices[0].message.content.strip()
|
| 211 |
except Exception as exc:
|
|
|
|
| 228 |
pass
|
| 229 |
|
| 230 |
if action is None:
|
| 231 |
+
print(f" Step {step_num}: Could not parse action JSON.", file=sys.stderr)
|
| 232 |
log_step(step_num, action_str, -0.05, False, "json_parse_error")
|
| 233 |
break
|
| 234 |
|
| 235 |
action_label = json.dumps(action, separators=(",", ":"))
|
| 236 |
print(
|
| 237 |
+
f" Step {step_num:2d} | score={obs['current_score']:.4f} | {action_label}",
|
| 238 |
file=sys.stderr,
|
| 239 |
)
|
| 240 |
|
|
|
|
| 242 |
obs = result["observation"]
|
| 243 |
step_reward = result["reward"]
|
| 244 |
done = result["done"]
|
| 245 |
+
error_msg = (
|
| 246 |
+
obs["message"]
|
| 247 |
+
if obs.get("rule_violations") or step_reward < 0
|
| 248 |
+
else None
|
| 249 |
+
)
|
| 250 |
|
| 251 |
+
print(f" → {obs['message']}", file=sys.stderr)
|
| 252 |
|
|
|
|
| 253 |
steps_taken = step_num
|
|
|
|
| 254 |
log_step(
|
| 255 |
step = step_num,
|
| 256 |
action = action_label,
|
|
|
|
| 260 |
)
|
| 261 |
|
| 262 |
if done:
|
|
|
|
| 263 |
break
|
| 264 |
|
| 265 |
+
time.sleep(0.2)
|
| 266 |
|
| 267 |
finally:
|
| 268 |
+
final = obs.get("current_score", 0.001) if isinstance(obs, dict) else 0.001
|
| 269 |
log_end(task_name=task_name, score=final, steps=steps_taken)
|
| 270 |
|
| 271 |
final_score = obs["current_score"]
|
| 272 |
+
wf_done = not obs.get("pending_steps")
|
| 273 |
print(
|
| 274 |
+
f"\n Workflow {workflow_id} final score: {final_score:.4f} "
|
| 275 |
+
f"steps: {obs['step_count']} completed: {wf_done}",
|
| 276 |
file=sys.stderr,
|
| 277 |
)
|
| 278 |
return final_score
|
| 279 |
|
| 280 |
|
| 281 |
# ------------------------------------------------------------------
|
| 282 |
+
# Async generator for SSE streaming from the UI
|
| 283 |
+
# ------------------------------------------------------------------
|
| 284 |
+
|
| 285 |
+
async def run_workflow_generator(
|
| 286 |
+
workflow_id: str = "A",
|
| 287 |
+
env_ref=None,
|
| 288 |
+
) -> AsyncGenerator[dict, None]:
|
| 289 |
+
"""
|
| 290 |
+
Async generator that runs one inference episode and yields
|
| 291 |
+
SSE-friendly event dicts for the dashboard UI.
|
| 292 |
+
|
| 293 |
+
Each yielded dict has a "type" key:
|
| 294 |
+
"reset" — episode started
|
| 295 |
+
"step" — one action taken
|
| 296 |
+
"done" — episode ended
|
| 297 |
+
"error" — something went wrong
|
| 298 |
+
"""
|
| 299 |
+
import asyncio
|
| 300 |
+
|
| 301 |
+
if env_ref is None:
|
| 302 |
+
# Fall back to HTTP if no direct env reference
|
| 303 |
+
result = api_post("/reset", {"workflow_id": workflow_id})
|
| 304 |
+
else:
|
| 305 |
+
from models import OrgOSAction as _Action
|
| 306 |
+
obs_obj = env_ref.reset(workflow_id=workflow_id)
|
| 307 |
+
result = {"observation": obs_obj.model_dump(), "reward": obs_obj.reward, "done": False}
|
| 308 |
+
|
| 309 |
+
obs = result["observation"]
|
| 310 |
+
history: List[dict] = []
|
| 311 |
+
|
| 312 |
+
yield {"type": "reset", "observation": obs, "workflow_id": workflow_id}
|
| 313 |
+
await asyncio.sleep(0)
|
| 314 |
+
|
| 315 |
+
for step_num in range(1, 60):
|
| 316 |
+
if obs["done"]:
|
| 317 |
+
break
|
| 318 |
+
|
| 319 |
+
obs_text = obs_to_text(obs)
|
| 320 |
+
history.append({"role": "user", "content": obs_text})
|
| 321 |
+
if len(history) > 20:
|
| 322 |
+
history = history[-20:]
|
| 323 |
+
|
| 324 |
+
try:
|
| 325 |
+
response = llm_client.chat.completions.create(
|
| 326 |
+
model = MODEL_NAME,
|
| 327 |
+
messages = [{"role": "system", "content": SYSTEM_PROMPT}] + history,
|
| 328 |
+
temperature = 0.0,
|
| 329 |
+
max_tokens = 300,
|
| 330 |
+
)
|
| 331 |
+
action_str = response.choices[0].message.content.strip()
|
| 332 |
+
except Exception as exc:
|
| 333 |
+
yield {"type": "error", "step": step_num, "message": str(exc)}
|
| 334 |
+
break
|
| 335 |
+
|
| 336 |
+
history.append({"role": "assistant", "content": action_str})
|
| 337 |
+
|
| 338 |
+
action = None
|
| 339 |
+
try:
|
| 340 |
+
action = json.loads(action_str)
|
| 341 |
+
except json.JSONDecodeError:
|
| 342 |
+
m = re.search(r"\{.*\}", action_str, re.DOTALL)
|
| 343 |
+
if m:
|
| 344 |
+
try:
|
| 345 |
+
action = json.loads(m.group())
|
| 346 |
+
except Exception:
|
| 347 |
+
pass
|
| 348 |
+
|
| 349 |
+
if action is None:
|
| 350 |
+
yield {"type": "error", "step": step_num, "message": "JSON parse error"}
|
| 351 |
+
break
|
| 352 |
+
|
| 353 |
+
if env_ref is None:
|
| 354 |
+
result = api_post("/step", action)
|
| 355 |
+
else:
|
| 356 |
+
from models import OrgOSAction as _Action
|
| 357 |
+
try:
|
| 358 |
+
act = _Action(**action)
|
| 359 |
+
obs_obj = env_ref.step(act)
|
| 360 |
+
result = {
|
| 361 |
+
"observation": obs_obj.model_dump(),
|
| 362 |
+
"reward": obs_obj.reward,
|
| 363 |
+
"done": obs_obj.done,
|
| 364 |
+
}
|
| 365 |
+
except Exception as exc:
|
| 366 |
+
yield {"type": "error", "step": step_num, "message": str(exc)}
|
| 367 |
+
break
|
| 368 |
+
|
| 369 |
+
obs = result["observation"]
|
| 370 |
+
step_reward = result["reward"]
|
| 371 |
+
done = result["done"]
|
| 372 |
+
|
| 373 |
+
yield {
|
| 374 |
+
"type": "step",
|
| 375 |
+
"step": step_num,
|
| 376 |
+
"action": action,
|
| 377 |
+
"observation": obs,
|
| 378 |
+
"reward": step_reward,
|
| 379 |
+
"done": done,
|
| 380 |
+
}
|
| 381 |
+
await asyncio.sleep(0)
|
| 382 |
+
|
| 383 |
+
if done:
|
| 384 |
+
break
|
| 385 |
+
|
| 386 |
+
yield {
|
| 387 |
+
"type": "done",
|
| 388 |
+
"final_score": obs.get("current_score", 0.001),
|
| 389 |
+
"steps": obs.get("step_count", step_num),
|
| 390 |
+
"completed": not obs.get("pending_steps"),
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
|
| 394 |
+
# ------------------------------------------------------------------
|
| 395 |
+
# Main — run all three workflows sequentially
|
| 396 |
# ------------------------------------------------------------------
|
| 397 |
|
| 398 |
def main():
|
| 399 |
+
print("OrgOS OpenEnv — Baseline Inference", file=sys.stderr)
|
| 400 |
print(f"Model : {MODEL_NAME}", file=sys.stderr)
|
| 401 |
print(f"Env : {ENV_URL}", file=sys.stderr)
|
| 402 |
|
|
|
|
| 403 |
try:
|
| 404 |
health = api_get("/health")
|
| 405 |
assert health.get("status") in ("ok", "healthy"), f"Unexpected status: {health}"
|
| 406 |
print("Health check: OK\n", file=sys.stderr)
|
| 407 |
except Exception as exc:
|
| 408 |
print(f"[ERROR] Environment not reachable at {ENV_URL}: {exc}", file=sys.stderr)
|
|
|
|
| 409 |
sys.exit(1)
|
| 410 |
|
| 411 |
+
scores: Dict[str, float] = {}
|
| 412 |
+
for wf_id in ["A", "B", "C"]:
|
| 413 |
try:
|
| 414 |
+
scores[f"workflow_{wf_id}"] = run_workflow(wf_id)
|
| 415 |
except Exception as exc:
|
| 416 |
+
print(f"[ERROR] Workflow {wf_id} failed: {exc}", file=sys.stderr)
|
| 417 |
+
scores[f"workflow_{wf_id}"] = 0.001
|
| 418 |
|
| 419 |
print("\n" + "="*60, file=sys.stderr)
|
| 420 |
print(" BASELINE RESULTS", file=sys.stderr)
|
|
|
|
| 425 |
print(f" average: {avg:.4f}", file=sys.stderr)
|
| 426 |
print("="*60, file=sys.stderr)
|
| 427 |
|
|
|
|
| 428 |
with open("baseline_scores.json", "w") as f:
|
| 429 |
json.dump({"scores": scores, "average": avg}, f, indent=2)
|
| 430 |
print("\nScores written to baseline_scores.json", file=sys.stderr)
|
| 431 |
|
| 432 |
|
| 433 |
if __name__ == "__main__":
|
| 434 |
+
main()
|
models.py
CHANGED
|
@@ -1,38 +1,46 @@
|
|
| 1 |
# models.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
class OrgOSAction(BaseModel):
|
| 4 |
-
app: str
|
| 5 |
-
operation: str
|
| 6 |
args: Dict[str, Any] = {}
|
| 7 |
|
|
|
|
| 8 |
class RewardBreakdown(BaseModel):
|
| 9 |
-
workflow_completion: float = 0.0
|
| 10 |
-
rule_compliance: float = 0.0
|
| 11 |
-
schema_adaptation: float = 0.0
|
| 12 |
-
efficiency: float = 0.0
|
| 13 |
-
policy_drift_handling: float = 0.0
|
|
|
|
| 14 |
|
| 15 |
class OrgOSObservation(BaseModel):
|
| 16 |
done: bool
|
| 17 |
reward: float
|
| 18 |
current_score: float
|
| 19 |
-
workflow_id: str
|
| 20 |
step_count: int
|
| 21 |
# Per-app state views (what the agent sees)
|
| 22 |
-
app_states: Dict[str, str]
|
| 23 |
# Workflow progress
|
| 24 |
workflow_goal: str
|
| 25 |
completed_steps: List[str]
|
| 26 |
pending_steps: List[str]
|
| 27 |
# Schema drift info (partial — agent must probe to discover rest)
|
| 28 |
-
schema_hints: Dict[str, str]
|
| 29 |
# Business rules in effect this episode
|
| 30 |
-
active_rules: Dict[str, Any]
|
| 31 |
# Per-step feedback
|
| 32 |
-
rule_violations: List[str]
|
| 33 |
reward_breakdown: RewardBreakdown
|
| 34 |
message: str
|
| 35 |
|
|
|
|
| 36 |
class OrgOSState(BaseModel):
|
| 37 |
episode_id: str
|
| 38 |
workflow_id: str
|
|
@@ -42,4 +50,4 @@ class OrgOSState(BaseModel):
|
|
| 42 |
rule_violation_count: int
|
| 43 |
workflow_completion: float
|
| 44 |
rule_compliance_rate: float
|
| 45 |
-
policy_drift_active: bool
|
|
|
|
| 1 |
# models.py
|
| 2 |
+
"""Pydantic models for the OrgOS OpenEnv environment."""
|
| 3 |
+
|
| 4 |
+
from typing import Any, Dict, List
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
+
|
| 7 |
|
| 8 |
class OrgOSAction(BaseModel):
|
| 9 |
+
app: str # "jira" | "zendesk" | "salesforce" | "workday"
|
| 10 |
+
operation: str # app-specific operation name
|
| 11 |
args: Dict[str, Any] = {}
|
| 12 |
|
| 13 |
+
|
| 14 |
class RewardBreakdown(BaseModel):
|
| 15 |
+
workflow_completion: float = 0.0 # 0.30 weight
|
| 16 |
+
rule_compliance: float = 0.0 # 0.25 weight
|
| 17 |
+
schema_adaptation: float = 0.0 # 0.20 weight
|
| 18 |
+
efficiency: float = 0.0 # 0.15 weight
|
| 19 |
+
policy_drift_handling: float = 0.0 # 0.10 weight
|
| 20 |
+
|
| 21 |
|
| 22 |
class OrgOSObservation(BaseModel):
|
| 23 |
done: bool
|
| 24 |
reward: float
|
| 25 |
current_score: float
|
| 26 |
+
workflow_id: str # "A", "B", or "C"
|
| 27 |
step_count: int
|
| 28 |
# Per-app state views (what the agent sees)
|
| 29 |
+
app_states: Dict[str, str] # app_name → string preview
|
| 30 |
# Workflow progress
|
| 31 |
workflow_goal: str
|
| 32 |
completed_steps: List[str]
|
| 33 |
pending_steps: List[str]
|
| 34 |
# Schema drift info (partial — agent must probe to discover rest)
|
| 35 |
+
schema_hints: Dict[str, str] # e.g. {"jira.priority": "severity"}
|
| 36 |
# Business rules in effect this episode
|
| 37 |
+
active_rules: Dict[str, Any] # {"sla_p0_minutes": 15, ...}
|
| 38 |
# Per-step feedback
|
| 39 |
+
rule_violations: List[str]
|
| 40 |
reward_breakdown: RewardBreakdown
|
| 41 |
message: str
|
| 42 |
|
| 43 |
+
|
| 44 |
class OrgOSState(BaseModel):
|
| 45 |
episode_id: str
|
| 46 |
workflow_id: str
|
|
|
|
| 50 |
rule_violation_count: int
|
| 51 |
workflow_completion: float
|
| 52 |
rule_compliance_rate: float
|
| 53 |
+
policy_drift_active: bool
|
server/app.py
CHANGED
|
@@ -1,63 +1,110 @@
|
|
| 1 |
"""
|
| 2 |
-
FastAPI application
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
|
|
|
|
|
|
|
| 7 |
from typing import Any, Dict, Optional
|
|
|
|
|
|
|
| 8 |
from fastapi import Body, FastAPI, HTTPException
|
|
|
|
|
|
|
| 9 |
from pydantic import BaseModel
|
| 10 |
-
import uvicorn
|
| 11 |
|
| 12 |
-
from models import
|
| 13 |
-
from server.environment import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
app = FastAPI(
|
| 16 |
-
title="
|
| 17 |
-
description=
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
| 19 |
)
|
| 20 |
|
| 21 |
-
#
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
-
#
|
|
|
|
|
|
|
|
|
|
| 26 |
class ResetRequest(BaseModel):
|
| 27 |
-
workflow_id: Optional[str] = None
|
| 28 |
|
| 29 |
|
| 30 |
class StepResponse(BaseModel):
|
| 31 |
-
observation:
|
| 32 |
reward: float
|
| 33 |
done: bool
|
| 34 |
info: dict = {}
|
| 35 |
|
| 36 |
|
| 37 |
# ------------------------------------------------------------------
|
| 38 |
-
#
|
| 39 |
# ------------------------------------------------------------------
|
| 40 |
|
| 41 |
@app.get("/health")
|
| 42 |
def health():
|
| 43 |
-
return {"status": "healthy"}
|
| 44 |
|
| 45 |
|
| 46 |
@app.get("/metadata")
|
| 47 |
def metadata():
|
| 48 |
return {
|
| 49 |
-
"name":
|
| 50 |
"description": (
|
| 51 |
-
"
|
| 52 |
-
"
|
| 53 |
-
"
|
|
|
|
| 54 |
),
|
| 55 |
-
"version": "0.
|
| 56 |
-
"tags": ["openenv", "
|
| 57 |
-
"
|
| 58 |
-
{
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
],
|
| 62 |
}
|
| 63 |
|
|
@@ -68,58 +115,54 @@ def schema():
|
|
| 68 |
"action": {
|
| 69 |
"type": "object",
|
| 70 |
"properties": {
|
| 71 |
-
"
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
"fill_missing",
|
| 75 |
-
"drop_duplicates",
|
| 76 |
-
"fix_format",
|
| 77 |
-
"replace_value",
|
| 78 |
-
"drop_outliers",
|
| 79 |
-
"fix_dtype",
|
| 80 |
-
],
|
| 81 |
-
},
|
| 82 |
-
"column": {"type": "string", "nullable": True},
|
| 83 |
-
"params": {"type": "object", "nullable": True},
|
| 84 |
},
|
| 85 |
-
"required": ["operation"],
|
| 86 |
},
|
| 87 |
"observation": {
|
| 88 |
"type": "object",
|
| 89 |
"properties": {
|
| 90 |
-
"done":
|
| 91 |
-
"reward":
|
| 92 |
-
"
|
| 93 |
-
"
|
| 94 |
-
"
|
| 95 |
-
"
|
| 96 |
-
"
|
| 97 |
-
"
|
| 98 |
-
"
|
| 99 |
-
"
|
| 100 |
-
"
|
|
|
|
|
|
|
|
|
|
| 101 |
},
|
| 102 |
},
|
| 103 |
"state": {
|
| 104 |
"type": "object",
|
| 105 |
"properties": {
|
| 106 |
-
"episode_id":
|
| 107 |
-
"
|
| 108 |
-
"
|
| 109 |
-
"
|
| 110 |
-
"
|
| 111 |
-
"
|
|
|
|
|
|
|
|
|
|
| 112 |
},
|
| 113 |
},
|
| 114 |
}
|
| 115 |
|
| 116 |
|
| 117 |
@app.post("/reset", response_model=StepResponse)
|
| 118 |
-
def reset(req: ResetRequest = ResetRequest()):
|
| 119 |
try:
|
| 120 |
-
obs = env.reset(
|
| 121 |
-
except ValueError as
|
| 122 |
-
raise HTTPException(status_code=400, detail=str(
|
| 123 |
return StepResponse(observation=obs, reward=obs.reward, done=False)
|
| 124 |
|
| 125 |
|
|
@@ -127,49 +170,108 @@ def reset(req: ResetRequest = ResetRequest()):
|
|
| 127 |
async def step(body: Dict[str, Any] = Body(...)):
|
| 128 |
"""
|
| 129 |
Accept both openenv-core wrapped format:
|
| 130 |
-
{"action": {"operation": "...", ...}, "timeout_s": 15}
|
| 131 |
-
and direct format
|
| 132 |
-
{"
|
| 133 |
"""
|
| 134 |
action_data = body.get("action", body)
|
| 135 |
try:
|
| 136 |
-
action =
|
| 137 |
-
obs
|
| 138 |
-
except (TypeError, KeyError, Exception) as
|
| 139 |
-
raise HTTPException(status_code=400, detail=str(
|
| 140 |
return StepResponse(observation=obs, reward=obs.reward, done=obs.done)
|
| 141 |
|
| 142 |
|
| 143 |
-
@app.get("/state", response_model=
|
| 144 |
def state_get():
|
| 145 |
"""GET /state — openenv-core spec."""
|
| 146 |
return env.state()
|
| 147 |
|
| 148 |
|
| 149 |
-
@app.post("/state", response_model=
|
| 150 |
def state_post():
|
| 151 |
"""POST /state — backward compatibility."""
|
| 152 |
return env.state()
|
| 153 |
|
| 154 |
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
"""Serve the demo dashboard."""
|
| 159 |
-
return FileResponse("ui/index.html")
|
| 160 |
|
| 161 |
@app.get("/schema/apps")
|
| 162 |
def app_schemas():
|
| 163 |
-
"""Return
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
|
| 167 |
# ------------------------------------------------------------------
|
| 168 |
-
# Entry point
|
| 169 |
# ------------------------------------------------------------------
|
| 170 |
|
| 171 |
def main():
|
| 172 |
-
uvicorn.run("server.app:app", host="0.0.0.0", port=8000)
|
| 173 |
|
| 174 |
|
| 175 |
if __name__ == "__main__":
|
|
|
|
| 1 |
"""
|
| 2 |
+
FastAPI application — OrgOS OpenEnv HTTP API.
|
| 3 |
+
|
| 4 |
+
Endpoints (OpenEnv-compatible):
|
| 5 |
+
GET /health — liveness probe
|
| 6 |
+
GET /metadata — env description
|
| 7 |
+
GET /schema — action / observation schema
|
| 8 |
+
POST /reset — start new episode
|
| 9 |
+
POST /step — take one action
|
| 10 |
+
GET /state — current episode metadata
|
| 11 |
+
POST /state — same (backward compat)
|
| 12 |
+
GET /schema/apps — per-app operation catalogue (used by UI)
|
| 13 |
+
GET / — serve the demo dashboard UI
|
| 14 |
+
GET /ui/run-agent — SSE stream of one inference episode (for UI)
|
| 15 |
"""
|
| 16 |
|
| 17 |
+
import json
|
| 18 |
+
import os
|
| 19 |
from typing import Any, Dict, Optional
|
| 20 |
+
|
| 21 |
+
import uvicorn
|
| 22 |
from fastapi import Body, FastAPI, HTTPException
|
| 23 |
+
from fastapi.responses import FileResponse, HTMLResponse, StreamingResponse
|
| 24 |
+
from fastapi.staticfiles import StaticFiles
|
| 25 |
from pydantic import BaseModel
|
|
|
|
| 26 |
|
| 27 |
+
from models import OrgOSAction, OrgOSObservation, OrgOSState
|
| 28 |
+
from server.environment import OrgOSEnvironment
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# ------------------------------------------------------------------
|
| 32 |
+
# App setup
|
| 33 |
+
# ------------------------------------------------------------------
|
| 34 |
|
| 35 |
app = FastAPI(
|
| 36 |
+
title="OrgOS — Multi-App Enterprise RL Environment",
|
| 37 |
+
description=(
|
| 38 |
+
"A Salesforce + Zendesk + Jira + Workday simulator for training agents "
|
| 39 |
+
"that handle real enterprise workflows under schema drift and policy changes."
|
| 40 |
+
),
|
| 41 |
+
version="2.0.0",
|
| 42 |
)
|
| 43 |
|
| 44 |
+
# Mount static assets (JS, CSS) if the ui/ directory exists
|
| 45 |
+
_UI_STATIC = os.path.join(os.path.dirname(__file__), "..", "ui", "static")
|
| 46 |
+
if os.path.isdir(_UI_STATIC):
|
| 47 |
+
app.mount("/static", StaticFiles(directory=_UI_STATIC), name="static")
|
| 48 |
+
|
| 49 |
+
# Single shared environment instance (stateful per-process)
|
| 50 |
+
env = OrgOSEnvironment()
|
| 51 |
|
| 52 |
|
| 53 |
+
# ------------------------------------------------------------------
|
| 54 |
+
# Request / response helpers
|
| 55 |
+
# ------------------------------------------------------------------
|
| 56 |
+
|
| 57 |
class ResetRequest(BaseModel):
|
| 58 |
+
workflow_id: Optional[str] = None # "A", "B", "C", or None for round-robin
|
| 59 |
|
| 60 |
|
| 61 |
class StepResponse(BaseModel):
|
| 62 |
+
observation: OrgOSObservation
|
| 63 |
reward: float
|
| 64 |
done: bool
|
| 65 |
info: dict = {}
|
| 66 |
|
| 67 |
|
| 68 |
# ------------------------------------------------------------------
|
| 69 |
+
# Core OpenEnv routes
|
| 70 |
# ------------------------------------------------------------------
|
| 71 |
|
| 72 |
@app.get("/health")
|
| 73 |
def health():
|
| 74 |
+
return {"status": "healthy", "env": "orgos", "version": "2.0.0"}
|
| 75 |
|
| 76 |
|
| 77 |
@app.get("/metadata")
|
| 78 |
def metadata():
|
| 79 |
return {
|
| 80 |
+
"name": "orgos-openenv",
|
| 81 |
"description": (
|
| 82 |
+
"OrgOS: multi-app enterprise RL environment. "
|
| 83 |
+
"The agent completes cross-app business workflows (triage, onboarding, churn) "
|
| 84 |
+
"across Jira, Zendesk, Salesforce, and Workday simulators. "
|
| 85 |
+
"Schema drift and policy changes challenge the agent to generalise."
|
| 86 |
),
|
| 87 |
+
"version": "2.0.0",
|
| 88 |
+
"tags": ["openenv", "enterprise", "multi-app", "schema-drift", "rl"],
|
| 89 |
+
"workflows": [
|
| 90 |
+
{
|
| 91 |
+
"id": "A",
|
| 92 |
+
"name": "Customer Bug Fix",
|
| 93 |
+
"difficulty": "medium",
|
| 94 |
+
"apps": ["zendesk", "jira", "salesforce", "workday"],
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"id": "B",
|
| 98 |
+
"name": "Employee Onboarding",
|
| 99 |
+
"difficulty": "medium",
|
| 100 |
+
"apps": ["workday", "salesforce", "zendesk"],
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"id": "C",
|
| 104 |
+
"name": "Churn Risk Alert",
|
| 105 |
+
"difficulty": "hard",
|
| 106 |
+
"apps": ["salesforce", "zendesk", "jira"],
|
| 107 |
+
},
|
| 108 |
],
|
| 109 |
}
|
| 110 |
|
|
|
|
| 115 |
"action": {
|
| 116 |
"type": "object",
|
| 117 |
"properties": {
|
| 118 |
+
"app": {"type": "string", "enum": ["jira", "zendesk", "salesforce", "workday"]},
|
| 119 |
+
"operation": {"type": "string", "description": "App-specific operation name"},
|
| 120 |
+
"args": {"type": "object", "description": "Operation arguments"},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
},
|
| 122 |
+
"required": ["app", "operation"],
|
| 123 |
},
|
| 124 |
"observation": {
|
| 125 |
"type": "object",
|
| 126 |
"properties": {
|
| 127 |
+
"done": {"type": "boolean"},
|
| 128 |
+
"reward": {"type": "number"},
|
| 129 |
+
"current_score": {"type": "number"},
|
| 130 |
+
"workflow_id": {"type": "string"},
|
| 131 |
+
"step_count": {"type": "integer"},
|
| 132 |
+
"app_states": {"type": "object"},
|
| 133 |
+
"workflow_goal": {"type": "string"},
|
| 134 |
+
"completed_steps": {"type": "array"},
|
| 135 |
+
"pending_steps": {"type": "array"},
|
| 136 |
+
"schema_hints": {"type": "object"},
|
| 137 |
+
"active_rules": {"type": "object"},
|
| 138 |
+
"rule_violations": {"type": "array"},
|
| 139 |
+
"reward_breakdown":{"type": "object"},
|
| 140 |
+
"message": {"type": "string"},
|
| 141 |
},
|
| 142 |
},
|
| 143 |
"state": {
|
| 144 |
"type": "object",
|
| 145 |
"properties": {
|
| 146 |
+
"episode_id": {"type": "string"},
|
| 147 |
+
"workflow_id": {"type": "string"},
|
| 148 |
+
"schema_versions": {"type": "object"},
|
| 149 |
+
"step_count": {"type": "integer"},
|
| 150 |
+
"max_steps": {"type": "integer"},
|
| 151 |
+
"rule_violation_count": {"type": "integer"},
|
| 152 |
+
"workflow_completion": {"type": "number"},
|
| 153 |
+
"rule_compliance_rate": {"type": "number"},
|
| 154 |
+
"policy_drift_active": {"type": "boolean"},
|
| 155 |
},
|
| 156 |
},
|
| 157 |
}
|
| 158 |
|
| 159 |
|
| 160 |
@app.post("/reset", response_model=StepResponse)
|
| 161 |
+
def reset(req: ResetRequest = Body(default=ResetRequest())):
|
| 162 |
try:
|
| 163 |
+
obs = env.reset(workflow_id=req.workflow_id)
|
| 164 |
+
except (ValueError, KeyError) as exc:
|
| 165 |
+
raise HTTPException(status_code=400, detail=str(exc))
|
| 166 |
return StepResponse(observation=obs, reward=obs.reward, done=False)
|
| 167 |
|
| 168 |
|
|
|
|
| 170 |
async def step(body: Dict[str, Any] = Body(...)):
|
| 171 |
"""
|
| 172 |
Accept both openenv-core wrapped format:
|
| 173 |
+
{"action": {"app": "...", "operation": "...", "args": {...}}, "timeout_s": 15}
|
| 174 |
+
and direct format:
|
| 175 |
+
{"app": "...", "operation": "...", "args": {...}}
|
| 176 |
"""
|
| 177 |
action_data = body.get("action", body)
|
| 178 |
try:
|
| 179 |
+
action = OrgOSAction(**action_data)
|
| 180 |
+
obs = env.step(action)
|
| 181 |
+
except (TypeError, KeyError, Exception) as exc:
|
| 182 |
+
raise HTTPException(status_code=400, detail=str(exc))
|
| 183 |
return StepResponse(observation=obs, reward=obs.reward, done=obs.done)
|
| 184 |
|
| 185 |
|
| 186 |
+
@app.get("/state", response_model=OrgOSState)
|
| 187 |
def state_get():
|
| 188 |
"""GET /state — openenv-core spec."""
|
| 189 |
return env.state()
|
| 190 |
|
| 191 |
|
| 192 |
+
@app.post("/state", response_model=OrgOSState)
|
| 193 |
def state_post():
|
| 194 |
"""POST /state — backward compatibility."""
|
| 195 |
return env.state()
|
| 196 |
|
| 197 |
|
| 198 |
+
# ------------------------------------------------------------------
|
| 199 |
+
# UI helper routes
|
| 200 |
+
# ------------------------------------------------------------------
|
|
|
|
|
|
|
| 201 |
|
| 202 |
@app.get("/schema/apps")
|
| 203 |
def app_schemas():
|
| 204 |
+
"""Return per-app operation catalogue. Used by the dashboard UI."""
|
| 205 |
+
from server.apps.jira import JiraApp
|
| 206 |
+
from server.apps.zendesk import ZendeskApp
|
| 207 |
+
from server.apps.salesforce import SalesforceApp
|
| 208 |
+
from server.apps.workday import WorkdayApp
|
| 209 |
+
return {
|
| 210 |
+
"jira": {"operations": JiraApp.OPERATIONS},
|
| 211 |
+
"zendesk": {"operations": ZendeskApp.OPERATIONS},
|
| 212 |
+
"salesforce": {"operations": SalesforceApp.OPERATIONS},
|
| 213 |
+
"workday": {"operations": WorkdayApp.OPERATIONS},
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
@app.get("/ui/run-agent")
|
| 218 |
+
async def run_agent_sse(workflow_id: str = "A", model: str = "gpt-4o-mini"):
|
| 219 |
+
"""
|
| 220 |
+
Server-Sent Events stream.
|
| 221 |
+
Runs one inference episode and streams step events to the UI.
|
| 222 |
+
Each event is: data: <json>\n\n
|
| 223 |
+
"""
|
| 224 |
+
import asyncio
|
| 225 |
+
|
| 226 |
+
async def _event_stream():
|
| 227 |
+
import json as _json
|
| 228 |
+
from inference import run_workflow_generator
|
| 229 |
+
try:
|
| 230 |
+
async for event in run_workflow_generator(workflow_id=workflow_id, env_ref=env):
|
| 231 |
+
yield f"data: {_json.dumps(event)}\n\n"
|
| 232 |
+
await asyncio.sleep(0) # yield control
|
| 233 |
+
except Exception as exc:
|
| 234 |
+
yield f"data: {_json.dumps({'type': 'error', 'message': str(exc)})}\n\n"
|
| 235 |
+
yield "data: {\"type\": \"done\"}\n\n"
|
| 236 |
+
|
| 237 |
+
return StreamingResponse(
|
| 238 |
+
_event_stream(),
|
| 239 |
+
media_type="text/event-stream",
|
| 240 |
+
headers={
|
| 241 |
+
"Cache-Control": "no-cache",
|
| 242 |
+
"X-Accel-Buffering": "no",
|
| 243 |
+
},
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
@app.get("/", response_class=HTMLResponse)
|
| 248 |
+
def ui():
|
| 249 |
+
"""Serve the OrgOS demo dashboard."""
|
| 250 |
+
ui_path = os.path.join(os.path.dirname(__file__), "..", "ui", "index.html")
|
| 251 |
+
if os.path.exists(ui_path):
|
| 252 |
+
return FileResponse(ui_path, media_type="text/html")
|
| 253 |
+
# Minimal inline fallback if ui/ hasn't been built yet
|
| 254 |
+
return HTMLResponse(content="""
|
| 255 |
+
<!DOCTYPE html>
|
| 256 |
+
<html lang="en">
|
| 257 |
+
<head><meta charset="UTF-8"><title>OrgOS Dashboard</title>
|
| 258 |
+
<style>body{font-family:monospace;background:#0f172a;color:#94a3b8;padding:2rem}
|
| 259 |
+
h1{color:#38bdf8}a{color:#38bdf8}</style></head>
|
| 260 |
+
<body>
|
| 261 |
+
<h1>OrgOS — Enterprise RL Environment</h1>
|
| 262 |
+
<p>The full dashboard UI is at <code>ui/index.html</code>.</p>
|
| 263 |
+
<p>API docs: <a href="/docs">/docs</a> |
|
| 264 |
+
Health: <a href="/health">/health</a></p>
|
| 265 |
+
</body></html>
|
| 266 |
+
""")
|
| 267 |
|
| 268 |
|
| 269 |
# ------------------------------------------------------------------
|
| 270 |
+
# Entry point
|
| 271 |
# ------------------------------------------------------------------
|
| 272 |
|
| 273 |
def main():
|
| 274 |
+
uvicorn.run("server.app:app", host="0.0.0.0", port=8000, reload=False)
|
| 275 |
|
| 276 |
|
| 277 |
if __name__ == "__main__":
|
server/apps/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""OrgOS app modules — 4 mock enterprise applications."""
|
| 2 |
+
|
| 3 |
+
from server.apps.jira import JiraApp
|
| 4 |
+
from server.apps.zendesk import ZendeskApp
|
| 5 |
+
from server.apps.salesforce import SalesforceApp
|
| 6 |
+
from server.apps.workday import WorkdayApp
|
| 7 |
+
|
| 8 |
+
__all__ = ["JiraApp", "ZendeskApp", "SalesforceApp", "WorkdayApp"]
|
server/apps/base_app.py
CHANGED
|
@@ -1,19 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
class BaseApp(ABC):
|
| 2 |
APP_NAME: str = ""
|
| 3 |
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
@abstractmethod
|
| 6 |
def initialize(self, records: List[Dict]) -> None:
|
| 7 |
"""Load synthetic records into in-memory state."""
|
| 8 |
|
| 9 |
@abstractmethod
|
| 10 |
def execute(self, operation: str, args: Dict) -> Dict:
|
| 11 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
@abstractmethod
|
| 14 |
def get_state_view(self, max_rows: int = 5) -> str:
|
| 15 |
-
"""Return agent-visible snapshot as a compact string."""
|
| 16 |
|
| 17 |
@abstractmethod
|
| 18 |
def count_open_items(self) -> int:
|
| 19 |
-
"""Count pending/open work items (used by grader)."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Abstract base class for all OrgOS app modules."""
|
| 2 |
+
|
| 3 |
+
from abc import ABC, abstractmethod
|
| 4 |
+
from typing import Dict, List, Optional, Tuple
|
| 5 |
+
|
| 6 |
+
from server.schema_drift import SchemaDriftEngine
|
| 7 |
+
|
| 8 |
+
|
| 9 |
class BaseApp(ABC):
|
| 10 |
APP_NAME: str = ""
|
| 11 |
|
| 12 |
+
def __init__(self, drift: SchemaDriftEngine):
|
| 13 |
+
self._drift = drift
|
| 14 |
+
|
| 15 |
+
# ------------------------------------------------------------------
|
| 16 |
+
# Core interface — every app must implement these
|
| 17 |
+
# ------------------------------------------------------------------
|
| 18 |
+
|
| 19 |
@abstractmethod
|
| 20 |
def initialize(self, records: List[Dict]) -> None:
|
| 21 |
"""Load synthetic records into in-memory state."""
|
| 22 |
|
| 23 |
@abstractmethod
|
| 24 |
def execute(self, operation: str, args: Dict) -> Dict:
|
| 25 |
+
"""
|
| 26 |
+
Execute an operation.
|
| 27 |
+
Returns dict with at minimum:
|
| 28 |
+
{"success": bool, "message": str}
|
| 29 |
+
May also include:
|
| 30 |
+
{"data": ..., "schema_error": str, "schema_adapted": bool, "ticket": dict}
|
| 31 |
+
"""
|
| 32 |
|
| 33 |
@abstractmethod
|
| 34 |
def get_state_view(self, max_rows: int = 5) -> str:
|
| 35 |
+
"""Return agent-visible snapshot as a compact multi-line string."""
|
| 36 |
|
| 37 |
@abstractmethod
|
| 38 |
def count_open_items(self) -> int:
|
| 39 |
+
"""Count pending/open work items (used by grader)."""
|
| 40 |
+
|
| 41 |
+
# ------------------------------------------------------------------
|
| 42 |
+
# Shared helpers available to all concrete apps
|
| 43 |
+
# ------------------------------------------------------------------
|
| 44 |
+
|
| 45 |
+
def _check_schema_drift(self, args: Dict) -> Tuple[Optional[str], bool]:
|
| 46 |
+
"""
|
| 47 |
+
Delegate to the drift engine to check if args use stale canonical names.
|
| 48 |
+
Returns (schema_error_field_or_None, schema_adapted_bool).
|
| 49 |
+
"""
|
| 50 |
+
return self._drift.check_args_for_drift(args, self.APP_NAME)
|
| 51 |
+
|
| 52 |
+
def _to_agent_view(self, record: Dict) -> Dict:
|
| 53 |
+
"""Translate a canonical record to the agent-visible drifted representation."""
|
| 54 |
+
return self._drift.translate_record(record, self.APP_NAME)
|
| 55 |
+
|
| 56 |
+
def _compact(self, record: Dict, fields: List[str]) -> Dict:
|
| 57 |
+
"""Return only the specified fields from a (possibly drifted) record."""
|
| 58 |
+
return {k: v for k, v in record.items() if k in fields and v is not None}
|
server/apps/jira.py
ADDED
|
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Jira-like app — engineering ticket management."""
|
| 2 |
+
|
| 3 |
+
from typing import Dict, List, Optional
|
| 4 |
+
from server.apps.base_app import BaseApp
|
| 5 |
+
from server.schema_drift import SchemaDriftEngine
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class JiraApp(BaseApp):
|
| 9 |
+
APP_NAME = "jira"
|
| 10 |
+
|
| 11 |
+
OPERATIONS = [
|
| 12 |
+
"get_issue", "create_issue", "update_status", "set_priority",
|
| 13 |
+
"assign_owner", "add_label", "link_zendesk_ticket", "close_issue", "list_issues",
|
| 14 |
+
]
|
| 15 |
+
|
| 16 |
+
def __init__(self, drift: SchemaDriftEngine):
|
| 17 |
+
super().__init__(drift)
|
| 18 |
+
self._records: Dict[str, Dict] = {}
|
| 19 |
+
# Workflow completion state tracking
|
| 20 |
+
self._linked_issues: set = set() # issue_ids linked to a Zendesk ticket
|
| 21 |
+
self._assigned_issues: set = set() # issue_ids with a non-null assignee
|
| 22 |
+
self._bugs_checked: bool = False # list_issues was called (Workflow C)
|
| 23 |
+
|
| 24 |
+
# ------------------------------------------------------------------
|
| 25 |
+
# BaseApp interface
|
| 26 |
+
# ------------------------------------------------------------------
|
| 27 |
+
|
| 28 |
+
def initialize(self, records: List[Dict]) -> None:
|
| 29 |
+
self._records = {r["issue_id"]: r for r in records}
|
| 30 |
+
self._linked_issues.clear()
|
| 31 |
+
self._assigned_issues.clear()
|
| 32 |
+
self._bugs_checked = False
|
| 33 |
+
# Seed state from loaded data
|
| 34 |
+
for issue_id, rec in self._records.items():
|
| 35 |
+
if rec.get("assignee"):
|
| 36 |
+
self._assigned_issues.add(issue_id)
|
| 37 |
+
if rec.get("linked_zendesk"):
|
| 38 |
+
self._linked_issues.add(issue_id)
|
| 39 |
+
|
| 40 |
+
def execute(self, operation: str, args: Dict) -> Dict:
|
| 41 |
+
method = getattr(self, f"_op_{operation}", None)
|
| 42 |
+
if method is None:
|
| 43 |
+
return {
|
| 44 |
+
"success": False,
|
| 45 |
+
"message": f"Unknown operation '{operation}'. Available: {', '.join(self.OPERATIONS)}",
|
| 46 |
+
}
|
| 47 |
+
try:
|
| 48 |
+
return method(**args)
|
| 49 |
+
except TypeError as exc:
|
| 50 |
+
return {"success": False, "message": f"Bad args for '{operation}': {exc}"}
|
| 51 |
+
|
| 52 |
+
def get_state_view(self, max_rows: int = 5) -> str:
|
| 53 |
+
open_issues = [r for r in self._records.values()
|
| 54 |
+
if r.get("status") not in ("closed",)][:max_rows]
|
| 55 |
+
if not open_issues:
|
| 56 |
+
return "No open issues."
|
| 57 |
+
lines = []
|
| 58 |
+
for rec in open_issues:
|
| 59 |
+
view = self._to_agent_view(rec)
|
| 60 |
+
keep = ["issue_id", "title",
|
| 61 |
+
"priority", "severity", "urgency_level",
|
| 62 |
+
"assignee", "owner", "assigned_to",
|
| 63 |
+
"status", "state", "current_state",
|
| 64 |
+
"customer_id", "linked_zendesk"]
|
| 65 |
+
compact = {k: v for k, v in view.items() if k in keep and v is not None}
|
| 66 |
+
lines.append(str(compact))
|
| 67 |
+
return "\n".join(lines)
|
| 68 |
+
|
| 69 |
+
def count_open_items(self) -> int:
|
| 70 |
+
return sum(1 for r in self._records.values() if r.get("status") != "closed")
|
| 71 |
+
|
| 72 |
+
# ------------------------------------------------------------------
|
| 73 |
+
# Workflow completion state checks
|
| 74 |
+
# ------------------------------------------------------------------
|
| 75 |
+
|
| 76 |
+
def has_linked_issue(self) -> bool:
|
| 77 |
+
"""True once any issue is linked to a Zendesk ticket (Workflow A step A2)."""
|
| 78 |
+
return len(self._linked_issues) > 0
|
| 79 |
+
|
| 80 |
+
def issue_assigned(self) -> bool:
|
| 81 |
+
"""True once JIRA-001 (primary bug) has an assignee (Workflow A step A4)."""
|
| 82 |
+
return bool(self._records.get("JIRA-001", {}).get("assignee"))
|
| 83 |
+
|
| 84 |
+
def bugs_checked(self) -> bool:
|
| 85 |
+
"""True once list_issues has been called (Workflow C step C3)."""
|
| 86 |
+
return self._bugs_checked
|
| 87 |
+
|
| 88 |
+
# ------------------------------------------------------------------
|
| 89 |
+
# Operations
|
| 90 |
+
# ------------------------------------------------------------------
|
| 91 |
+
|
| 92 |
+
def _op_get_issue(self, issue_id: str) -> Dict:
|
| 93 |
+
rec = self._records.get(issue_id)
|
| 94 |
+
if not rec:
|
| 95 |
+
return {"success": False, "message": f"Issue {issue_id} not found. Use list_issues to browse."}
|
| 96 |
+
return {"success": True, "data": self._to_agent_view(rec),
|
| 97 |
+
"message": f"Retrieved {issue_id}"}
|
| 98 |
+
|
| 99 |
+
def _op_create_issue(self, title: str, **kwargs) -> Dict:
|
| 100 |
+
schema_error, schema_adapted = self._check_schema_drift(kwargs)
|
| 101 |
+
if schema_error:
|
| 102 |
+
return {
|
| 103 |
+
"success": False,
|
| 104 |
+
"schema_error": schema_error,
|
| 105 |
+
"message": (f"Schema error: field '{schema_error}' is not in the current schema. "
|
| 106 |
+
f"Check schema_hints for the correct field name."),
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
issue_id = f"JIRA-{len(self._records) + 1:03d}"
|
| 110 |
+
# Accept both canonical and drifted names for priority / assignee
|
| 111 |
+
priority = (kwargs.get("priority") or kwargs.get("severity")
|
| 112 |
+
or kwargs.get("urgency_level", "p2"))
|
| 113 |
+
linked = kwargs.get("linked_zendesk") or kwargs.get("zendesk_ticket")
|
| 114 |
+
|
| 115 |
+
rec = {
|
| 116 |
+
"issue_id": issue_id,
|
| 117 |
+
"title": title,
|
| 118 |
+
"priority": priority,
|
| 119 |
+
"assignee": kwargs.get("assignee") or kwargs.get("owner") or kwargs.get("assigned_to"),
|
| 120 |
+
"status": "open",
|
| 121 |
+
"reporter": kwargs.get("reporter", "agent"),
|
| 122 |
+
"customer_id": kwargs.get("customer_id"),
|
| 123 |
+
"linked_zendesk": linked,
|
| 124 |
+
"labels": [],
|
| 125 |
+
"created_at": "2026-04-21T09:00:00",
|
| 126 |
+
}
|
| 127 |
+
self._records[issue_id] = rec
|
| 128 |
+
|
| 129 |
+
if linked:
|
| 130 |
+
self._linked_issues.add(issue_id)
|
| 131 |
+
if rec["assignee"]:
|
| 132 |
+
self._assigned_issues.add(issue_id)
|
| 133 |
+
|
| 134 |
+
return {
|
| 135 |
+
"success": True,
|
| 136 |
+
"data": {"issue_id": issue_id},
|
| 137 |
+
"schema_adapted": schema_adapted,
|
| 138 |
+
"message": f"Created {issue_id}: '{title}'"
|
| 139 |
+
+ (f" linked to {linked}" if linked else ""),
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
def _op_update_status(self, issue_id: str, **kwargs) -> Dict:
|
| 143 |
+
schema_error, schema_adapted = self._check_schema_drift(kwargs)
|
| 144 |
+
if schema_error:
|
| 145 |
+
return {"success": False, "schema_error": schema_error,
|
| 146 |
+
"message": f"Schema error: use current field name, not '{schema_error}'"}
|
| 147 |
+
|
| 148 |
+
rec = self._records.get(issue_id)
|
| 149 |
+
if not rec:
|
| 150 |
+
return {"success": False, "message": f"Issue {issue_id} not found"}
|
| 151 |
+
|
| 152 |
+
new_status = (kwargs.get("status") or kwargs.get("state")
|
| 153 |
+
or kwargs.get("current_state"))
|
| 154 |
+
if not new_status:
|
| 155 |
+
return {"success": False, "message": "Provide status/state/current_state value"}
|
| 156 |
+
|
| 157 |
+
rec["status"] = new_status
|
| 158 |
+
return {"success": True, "schema_adapted": schema_adapted,
|
| 159 |
+
"message": f"{issue_id} status → '{new_status}'"}
|
| 160 |
+
|
| 161 |
+
def _op_set_priority(self, issue_id: str, **kwargs) -> Dict:
|
| 162 |
+
schema_error, schema_adapted = self._check_schema_drift(kwargs)
|
| 163 |
+
if schema_error:
|
| 164 |
+
return {"success": False, "schema_error": schema_error,
|
| 165 |
+
"message": f"Schema error: '{schema_error}' is a stale field name"}
|
| 166 |
+
|
| 167 |
+
rec = self._records.get(issue_id)
|
| 168 |
+
if not rec:
|
| 169 |
+
return {"success": False, "message": f"Issue {issue_id} not found"}
|
| 170 |
+
|
| 171 |
+
new_priority = (kwargs.get("priority") or kwargs.get("severity")
|
| 172 |
+
or kwargs.get("urgency_level"))
|
| 173 |
+
if not new_priority:
|
| 174 |
+
return {"success": False,
|
| 175 |
+
"message": "Provide priority / severity / urgency_level value"}
|
| 176 |
+
|
| 177 |
+
rec["priority"] = new_priority
|
| 178 |
+
return {"success": True, "schema_adapted": schema_adapted,
|
| 179 |
+
"message": f"{issue_id} priority → '{new_priority}'"}
|
| 180 |
+
|
| 181 |
+
def _op_assign_owner(self, issue_id: str, **kwargs) -> Dict:
|
| 182 |
+
schema_error, schema_adapted = self._check_schema_drift(kwargs)
|
| 183 |
+
if schema_error:
|
| 184 |
+
hint = self._drift.translate_field("assignee", self.APP_NAME)
|
| 185 |
+
return {"success": False, "schema_error": schema_error,
|
| 186 |
+
"message": f"Schema error: use '{hint}' instead of '{schema_error}'"}
|
| 187 |
+
|
| 188 |
+
rec = self._records.get(issue_id)
|
| 189 |
+
if not rec:
|
| 190 |
+
return {"success": False, "message": f"Issue {issue_id} not found"}
|
| 191 |
+
|
| 192 |
+
assignee = (kwargs.get("assignee") or kwargs.get("owner")
|
| 193 |
+
or kwargs.get("assigned_to"))
|
| 194 |
+
if not assignee:
|
| 195 |
+
return {"success": False,
|
| 196 |
+
"message": "Provide assignee / owner / assigned_to value"}
|
| 197 |
+
|
| 198 |
+
rec["assignee"] = assignee
|
| 199 |
+
self._assigned_issues.add(issue_id)
|
| 200 |
+
return {"success": True, "schema_adapted": schema_adapted,
|
| 201 |
+
"message": f"{issue_id} assigned to '{assignee}'"}
|
| 202 |
+
|
| 203 |
+
def _op_add_label(self, issue_id: str, label: str) -> Dict:
|
| 204 |
+
rec = self._records.get(issue_id)
|
| 205 |
+
if not rec:
|
| 206 |
+
return {"success": False, "message": f"Issue {issue_id} not found"}
|
| 207 |
+
rec.setdefault("labels", []).append(label)
|
| 208 |
+
return {"success": True, "message": f"Added label '{label}' to {issue_id}"}
|
| 209 |
+
|
| 210 |
+
def _op_link_zendesk_ticket(self, issue_id: str, zendesk_ticket_number: str) -> Dict:
|
| 211 |
+
rec = self._records.get(issue_id)
|
| 212 |
+
if not rec:
|
| 213 |
+
return {"success": False, "message": f"Issue {issue_id} not found"}
|
| 214 |
+
rec["linked_zendesk"] = zendesk_ticket_number
|
| 215 |
+
self._linked_issues.add(issue_id)
|
| 216 |
+
return {"success": True,
|
| 217 |
+
"message": f"Linked {issue_id} ↔ Zendesk {zendesk_ticket_number}"}
|
| 218 |
+
|
| 219 |
+
def _op_close_issue(self, issue_id: str) -> Dict:
|
| 220 |
+
rec = self._records.get(issue_id)
|
| 221 |
+
if not rec:
|
| 222 |
+
return {"success": False, "message": f"Issue {issue_id} not found"}
|
| 223 |
+
rec["status"] = "closed"
|
| 224 |
+
return {"success": True, "message": f"Closed {issue_id}"}
|
| 225 |
+
|
| 226 |
+
def _op_list_issues(self, status: str = "open", customer_id: Optional[str] = None,
|
| 227 |
+
limit: int = 10) -> Dict:
|
| 228 |
+
self._bugs_checked = True
|
| 229 |
+
matching = [
|
| 230 |
+
r for r in self._records.values()
|
| 231 |
+
if (status == "all" or r.get("status") == status)
|
| 232 |
+
and (customer_id is None or r.get("customer_id") == customer_id)
|
| 233 |
+
][:limit]
|
| 234 |
+
drifted = [self._to_agent_view(r) for r in matching]
|
| 235 |
+
keep = ["issue_id", "title", "priority", "severity", "urgency_level",
|
| 236 |
+
"assignee", "owner", "assigned_to",
|
| 237 |
+
"status", "state", "current_state",
|
| 238 |
+
"customer_id", "linked_zendesk"]
|
| 239 |
+
compact = [{k: v for k, v in r.items() if k in keep and v is not None}
|
| 240 |
+
for r in drifted]
|
| 241 |
+
return {"success": True, "data": compact,
|
| 242 |
+
"message": f"Found {len(compact)} {status} issues"
|
| 243 |
+
+ (f" for {customer_id}" if customer_id else "")}
|
server/apps/salesforce.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Salesforce-like app — CRM account and pipeline management."""
|
| 2 |
+
|
| 3 |
+
from typing import Dict, List, Optional
|
| 4 |
+
from server.apps.base_app import BaseApp
|
| 5 |
+
from server.schema_drift import SchemaDriftEngine
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class SalesforceApp(BaseApp):
|
| 9 |
+
APP_NAME = "salesforce"
|
| 10 |
+
|
| 11 |
+
OPERATIONS = [
|
| 12 |
+
"get_account", "list_accounts", "update_deal_stage", "flag_churn_risk",
|
| 13 |
+
"assign_account_owner", "log_interaction", "get_opportunity",
|
| 14 |
+
]
|
| 15 |
+
|
| 16 |
+
def __init__(self, drift: SchemaDriftEngine):
|
| 17 |
+
super().__init__(drift)
|
| 18 |
+
self._records: Dict[str, Dict] = {}
|
| 19 |
+
|
| 20 |
+
# ------------------------------------------------------------------
|
| 21 |
+
# BaseApp interface
|
| 22 |
+
# ------------------------------------------------------------------
|
| 23 |
+
|
| 24 |
+
def initialize(self, records: List[Dict]) -> None:
|
| 25 |
+
self._records = {r["account_id"]: r for r in records}
|
| 26 |
+
|
| 27 |
+
def execute(self, operation: str, args: Dict) -> Dict:
|
| 28 |
+
method = getattr(self, f"_op_{operation}", None)
|
| 29 |
+
if method is None:
|
| 30 |
+
return {
|
| 31 |
+
"success": False,
|
| 32 |
+
"message": f"Unknown operation '{operation}'. Available: {', '.join(self.OPERATIONS)}",
|
| 33 |
+
}
|
| 34 |
+
try:
|
| 35 |
+
return method(**args)
|
| 36 |
+
except TypeError as exc:
|
| 37 |
+
return {"success": False, "message": f"Bad args for '{operation}': {exc}"}
|
| 38 |
+
|
| 39 |
+
def get_state_view(self, max_rows: int = 5) -> str:
|
| 40 |
+
at_risk = [r for r in self._records.values()
|
| 41 |
+
if r.get("health") in ("red", "yellow")][:max_rows]
|
| 42 |
+
sample = at_risk or list(self._records.values())[:max_rows]
|
| 43 |
+
if not sample:
|
| 44 |
+
return "No accounts loaded."
|
| 45 |
+
lines = []
|
| 46 |
+
for rec in sample:
|
| 47 |
+
view = self._to_agent_view(rec)
|
| 48 |
+
keep = ["account_id", "company_name",
|
| 49 |
+
"deal_stage", "pipeline_stage", "stage",
|
| 50 |
+
"health", "account_health", "risk_score",
|
| 51 |
+
"owner", "owner_name", "account_owner", "rep_email",
|
| 52 |
+
"arr", "annual_recurring_revenue",
|
| 53 |
+
"is_paying", "territory"]
|
| 54 |
+
compact = {k: v for k, v in view.items() if k in keep and v is not None}
|
| 55 |
+
lines.append(str(compact))
|
| 56 |
+
return "\n".join(lines)
|
| 57 |
+
|
| 58 |
+
def count_open_items(self) -> int:
|
| 59 |
+
return sum(1 for r in self._records.values()
|
| 60 |
+
if r.get("health") in ("red", "yellow") or
|
| 61 |
+
r.get("deal_stage") in ("prospect", "qualification", "negotiation"))
|
| 62 |
+
|
| 63 |
+
# ------------------------------------------------------------------
|
| 64 |
+
# Workflow completion state checks
|
| 65 |
+
# ------------------------------------------------------------------
|
| 66 |
+
|
| 67 |
+
def account_checked(self) -> bool:
|
| 68 |
+
"""True once get_account was called for ACME-001 (Workflow A step A3)."""
|
| 69 |
+
return bool(self._records.get("ACME-001", {}).get("_account_checked"))
|
| 70 |
+
|
| 71 |
+
def churn_flagged(self) -> bool:
|
| 72 |
+
"""True once flag_churn_risk was called for ACME-003 (Workflow C step C1)."""
|
| 73 |
+
return bool(self._records.get("ACME-003", {}).get("_churn_flagged"))
|
| 74 |
+
|
| 75 |
+
def team_assigned(self) -> bool:
|
| 76 |
+
"""True once assign_account_owner was called (Workflow B step B3)."""
|
| 77 |
+
return any(r.get("_team_assigned") for r in self._records.values())
|
| 78 |
+
|
| 79 |
+
def intervention_assigned(self) -> bool:
|
| 80 |
+
"""True once assign_account_owner called on ACME-003 (Workflow C step C4)."""
|
| 81 |
+
return bool(self._records.get("ACME-003", {}).get("_intervention_assigned"))
|
| 82 |
+
|
| 83 |
+
# ------------------------------------------------------------------
|
| 84 |
+
# Operations
|
| 85 |
+
# ------------------------------------------------------------------
|
| 86 |
+
|
| 87 |
+
def _op_get_account(self, account_id: str) -> Dict:
|
| 88 |
+
rec = self._records.get(account_id)
|
| 89 |
+
if not rec:
|
| 90 |
+
return {"success": False,
|
| 91 |
+
"message": f"Account {account_id} not found. Use list_accounts to browse."}
|
| 92 |
+
rec["_account_checked"] = True
|
| 93 |
+
return {"success": True, "data": self._to_agent_view(rec),
|
| 94 |
+
"message": f"Retrieved account {account_id} ({rec.get('company_name', '')})"}
|
| 95 |
+
|
| 96 |
+
def _op_list_accounts(self, health: Optional[str] = None,
|
| 97 |
+
territory: Optional[str] = None,
|
| 98 |
+
limit: int = 10) -> Dict:
|
| 99 |
+
matching = [
|
| 100 |
+
r for r in self._records.values()
|
| 101 |
+
if (health is None or r.get("health") == health)
|
| 102 |
+
and (territory is None or r.get("territory") == territory)
|
| 103 |
+
][:limit]
|
| 104 |
+
drifted = [self._to_agent_view(r) for r in matching]
|
| 105 |
+
keep = ["account_id", "company_name",
|
| 106 |
+
"deal_stage", "pipeline_stage", "stage",
|
| 107 |
+
"health", "account_health", "risk_score",
|
| 108 |
+
"owner", "owner_name", "account_owner", "rep_email",
|
| 109 |
+
"arr", "annual_recurring_revenue",
|
| 110 |
+
"is_paying", "territory"]
|
| 111 |
+
compact = [{k: v for k, v in r.items() if k in keep and v is not None}
|
| 112 |
+
for r in drifted]
|
| 113 |
+
return {"success": True, "data": compact,
|
| 114 |
+
"message": f"Found {len(compact)} accounts"
|
| 115 |
+
+ (f" (health={health})" if health else "")}
|
| 116 |
+
|
| 117 |
+
def _op_update_deal_stage(self, account_id: str, amount: float = 0, **kwargs) -> Dict:
|
| 118 |
+
"""Note: requires manager approval if amount > threshold (checked by BusinessRuleEngine)."""
|
| 119 |
+
schema_error, schema_adapted = self._check_schema_drift(kwargs)
|
| 120 |
+
if schema_error:
|
| 121 |
+
hint = self._drift.translate_field("deal_stage", self.APP_NAME)
|
| 122 |
+
return {"success": False, "schema_error": schema_error,
|
| 123 |
+
"message": f"Schema error: use '{hint}' not '{schema_error}'"}
|
| 124 |
+
|
| 125 |
+
rec = self._records.get(account_id)
|
| 126 |
+
if not rec:
|
| 127 |
+
return {"success": False, "message": f"Account {account_id} not found"}
|
| 128 |
+
|
| 129 |
+
new_stage = (kwargs.get("deal_stage") or kwargs.get("pipeline_stage")
|
| 130 |
+
or kwargs.get("stage"))
|
| 131 |
+
if not new_stage:
|
| 132 |
+
return {"success": False,
|
| 133 |
+
"message": "Provide deal_stage / pipeline_stage / stage value"}
|
| 134 |
+
|
| 135 |
+
rec["deal_stage"] = new_stage
|
| 136 |
+
return {"success": True, "schema_adapted": schema_adapted,
|
| 137 |
+
"message": f"{account_id} deal stage → '{new_stage}'"}
|
| 138 |
+
|
| 139 |
+
def _op_flag_churn_risk(self, account_id: str, reason: Optional[str] = None) -> Dict:
|
| 140 |
+
rec = self._records.get(account_id)
|
| 141 |
+
if not rec:
|
| 142 |
+
return {"success": False, "message": f"Account {account_id} not found"}
|
| 143 |
+
rec["_churn_flagged"] = True
|
| 144 |
+
rec["health"] = "red"
|
| 145 |
+
return {
|
| 146 |
+
"success": True,
|
| 147 |
+
"message": f"Flagged {account_id} ({rec.get('company_name', '')}) as churn risk"
|
| 148 |
+
+ (f": {reason}" if reason else ""),
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
def _op_assign_account_owner(self, account_id: str, **kwargs) -> Dict:
|
| 152 |
+
schema_error, schema_adapted = self._check_schema_drift(kwargs)
|
| 153 |
+
if schema_error:
|
| 154 |
+
hint = self._drift.translate_field("owner", self.APP_NAME)
|
| 155 |
+
return {"success": False, "schema_error": schema_error,
|
| 156 |
+
"message": f"Schema error: use '{hint}' not '{schema_error}'"}
|
| 157 |
+
|
| 158 |
+
rec = self._records.get(account_id)
|
| 159 |
+
if not rec:
|
| 160 |
+
return {"success": False, "message": f"Account {account_id} not found"}
|
| 161 |
+
|
| 162 |
+
new_owner = (kwargs.get("owner") or kwargs.get("owner_name")
|
| 163 |
+
or kwargs.get("account_owner") or kwargs.get("rep_email"))
|
| 164 |
+
if not new_owner:
|
| 165 |
+
return {"success": False,
|
| 166 |
+
"message": "Provide owner / owner_name / account_owner / rep_email"}
|
| 167 |
+
|
| 168 |
+
rec["owner"] = new_owner
|
| 169 |
+
rec["_team_assigned"] = True
|
| 170 |
+
if account_id == "ACME-003":
|
| 171 |
+
rec["_intervention_assigned"] = True
|
| 172 |
+
|
| 173 |
+
return {"success": True, "schema_adapted": schema_adapted,
|
| 174 |
+
"message": f"{account_id} owner → '{new_owner}'"}
|
| 175 |
+
|
| 176 |
+
def _op_log_interaction(self, account_id: str, note: str = "") -> Dict:
|
| 177 |
+
rec = self._records.get(account_id)
|
| 178 |
+
if not rec:
|
| 179 |
+
return {"success": False, "message": f"Account {account_id} not found"}
|
| 180 |
+
rec["_interaction_logged"] = True
|
| 181 |
+
rec.setdefault("interactions", []).append(note)
|
| 182 |
+
return {"success": True,
|
| 183 |
+
"message": f"Logged interaction for {account_id}"}
|
| 184 |
+
|
| 185 |
+
def _op_get_opportunity(self, account_id: str) -> Dict:
|
| 186 |
+
rec = self._records.get(account_id)
|
| 187 |
+
if not rec:
|
| 188 |
+
return {"success": False, "message": f"Account {account_id} not found"}
|
| 189 |
+
opp = {
|
| 190 |
+
"account_id": account_id,
|
| 191 |
+
"company_name": rec.get("company_name"),
|
| 192 |
+
"arr": rec.get("arr"),
|
| 193 |
+
"deal_stage": rec.get("deal_stage"),
|
| 194 |
+
"health": rec.get("health"),
|
| 195 |
+
"is_paying": rec.get("is_paying"),
|
| 196 |
+
}
|
| 197 |
+
return {"success": True, "data": self._to_agent_view(opp),
|
| 198 |
+
"message": f"Retrieved opportunity for {account_id}"}
|
server/apps/workday.py
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Workday-like app — HR and people operations."""
|
| 2 |
+
|
| 3 |
+
from typing import Dict, List, Optional
|
| 4 |
+
from server.apps.base_app import BaseApp
|
| 5 |
+
from server.schema_drift import SchemaDriftEngine
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class WorkdayApp(BaseApp):
|
| 9 |
+
APP_NAME = "workday"
|
| 10 |
+
|
| 11 |
+
OPERATIONS = [
|
| 12 |
+
"get_employee", "list_employees", "provision_access",
|
| 13 |
+
"log_sla_event", "request_budget_approval",
|
| 14 |
+
"create_onboarding_task", "complete_task",
|
| 15 |
+
]
|
| 16 |
+
|
| 17 |
+
def __init__(self, drift: SchemaDriftEngine):
|
| 18 |
+
super().__init__(drift)
|
| 19 |
+
self._records: Dict[str, Dict] = {}
|
| 20 |
+
|
| 21 |
+
# ------------------------------------------------------------------
|
| 22 |
+
# BaseApp interface
|
| 23 |
+
# ------------------------------------------------------------------
|
| 24 |
+
|
| 25 |
+
def initialize(self, records: List[Dict]) -> None:
|
| 26 |
+
self._records = {r["employee_id"]: r for r in records}
|
| 27 |
+
|
| 28 |
+
def execute(self, operation: str, args: Dict) -> Dict:
|
| 29 |
+
method = getattr(self, f"_op_{operation}", None)
|
| 30 |
+
if method is None:
|
| 31 |
+
return {
|
| 32 |
+
"success": False,
|
| 33 |
+
"message": f"Unknown operation '{operation}'. Available: {', '.join(self.OPERATIONS)}",
|
| 34 |
+
}
|
| 35 |
+
try:
|
| 36 |
+
return method(**args)
|
| 37 |
+
except TypeError as exc:
|
| 38 |
+
return {"success": False, "message": f"Bad args for '{operation}': {exc}"}
|
| 39 |
+
|
| 40 |
+
def get_state_view(self, max_rows: int = 5) -> str:
|
| 41 |
+
pending = [r for r in self._records.values()
|
| 42 |
+
if r.get("status") == "pending"][:max_rows]
|
| 43 |
+
sample = pending or list(self._records.values())[:max_rows]
|
| 44 |
+
if not sample:
|
| 45 |
+
return "No employee records loaded."
|
| 46 |
+
lines = []
|
| 47 |
+
for rec in sample:
|
| 48 |
+
view = self._to_agent_view(rec)
|
| 49 |
+
keep = ["employee_id", "name",
|
| 50 |
+
"level", "job_level", "seniority",
|
| 51 |
+
"manager_id", "reports_to", "direct_manager",
|
| 52 |
+
"status", "request_status", "approval_state",
|
| 53 |
+
"department", "territory", "email"]
|
| 54 |
+
compact = {k: v for k, v in view.items() if k in keep and v is not None}
|
| 55 |
+
lines.append(str(compact))
|
| 56 |
+
return "\n".join(lines)
|
| 57 |
+
|
| 58 |
+
def count_open_items(self) -> int:
|
| 59 |
+
return sum(1 for r in self._records.values()
|
| 60 |
+
if r.get("status") == "pending")
|
| 61 |
+
|
| 62 |
+
# ------------------------------------------------------------------
|
| 63 |
+
# Workflow completion state checks
|
| 64 |
+
# ------------------------------------------------------------------
|
| 65 |
+
|
| 66 |
+
def sla_logged(self) -> bool:
|
| 67 |
+
"""True once log_sla_event was called (Workflow A step A5)."""
|
| 68 |
+
return any(r.get("_sla_logged") for r in self._records.values())
|
| 69 |
+
|
| 70 |
+
def employee_created(self) -> bool:
|
| 71 |
+
"""True once create_onboarding_task was called for EMP-NEW-001 (Workflow B step B1)."""
|
| 72 |
+
return bool(self._records.get("EMP-NEW-001", {}).get("_onboarding_created"))
|
| 73 |
+
|
| 74 |
+
def access_provisioned(self, app_name: str) -> bool:
|
| 75 |
+
"""True once provision_access was called for the given app (Workflow B step B2)."""
|
| 76 |
+
return any(
|
| 77 |
+
r.get("_access_provisioned", {}).get(app_name)
|
| 78 |
+
for r in self._records.values()
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
# ------------------------------------------------------------------
|
| 82 |
+
# Operations
|
| 83 |
+
# ------------------------------------------------------------------
|
| 84 |
+
|
| 85 |
+
def _op_get_employee(self, employee_id: str) -> Dict:
|
| 86 |
+
rec = self._records.get(employee_id)
|
| 87 |
+
if not rec:
|
| 88 |
+
return {"success": False,
|
| 89 |
+
"message": f"Employee {employee_id} not found. Use list_employees to browse."}
|
| 90 |
+
return {"success": True, "data": self._to_agent_view(rec),
|
| 91 |
+
"message": f"Retrieved employee {employee_id} ({rec.get('name', '')})"}
|
| 92 |
+
|
| 93 |
+
def _op_list_employees(self, department: Optional[str] = None,
|
| 94 |
+
status: Optional[str] = None,
|
| 95 |
+
limit: int = 10) -> Dict:
|
| 96 |
+
matching = [
|
| 97 |
+
r for r in self._records.values()
|
| 98 |
+
if (department is None or r.get("department") == department)
|
| 99 |
+
and (status is None or r.get("status") == status)
|
| 100 |
+
][:limit]
|
| 101 |
+
drifted = [self._to_agent_view(r) for r in matching]
|
| 102 |
+
keep = ["employee_id", "name",
|
| 103 |
+
"level", "job_level", "seniority",
|
| 104 |
+
"manager_id", "reports_to", "direct_manager",
|
| 105 |
+
"status", "request_status", "approval_state",
|
| 106 |
+
"department", "territory"]
|
| 107 |
+
compact = [{k: v for k, v in r.items() if k in keep and v is not None}
|
| 108 |
+
for r in drifted]
|
| 109 |
+
return {"success": True, "data": compact,
|
| 110 |
+
"message": f"Found {len(compact)} employees"
|
| 111 |
+
+ (f" in {department}" if department else "")}
|
| 112 |
+
|
| 113 |
+
def _op_provision_access(self, employee_id: str, app_name: str,
|
| 114 |
+
**kwargs) -> Dict:
|
| 115 |
+
"""Grant app access to an employee (Workflow B step B2)."""
|
| 116 |
+
schema_error, schema_adapted = self._check_schema_drift(kwargs)
|
| 117 |
+
if schema_error:
|
| 118 |
+
return {"success": False, "schema_error": schema_error,
|
| 119 |
+
"message": f"Schema error: use current field name, not '{schema_error}'"}
|
| 120 |
+
|
| 121 |
+
rec = self._records.get(employee_id)
|
| 122 |
+
if not rec:
|
| 123 |
+
return {"success": False, "message": f"Employee {employee_id} not found"}
|
| 124 |
+
|
| 125 |
+
rec.setdefault("_access_provisioned", {})[app_name] = True
|
| 126 |
+
return {"success": True, "schema_adapted": schema_adapted,
|
| 127 |
+
"message": f"Provisioned {app_name} access for {employee_id} ({rec.get('name', '')})"}
|
| 128 |
+
|
| 129 |
+
def _op_log_sla_event(self, ticket_id: str, sla_met: bool = True,
|
| 130 |
+
elapsed_minutes: Optional[float] = None) -> Dict:
|
| 131 |
+
"""Log an SLA compliance event (Workflow A step A5)."""
|
| 132 |
+
# Find an employee record to attach the log to
|
| 133 |
+
first = next(iter(self._records.values()), None)
|
| 134 |
+
if first is None:
|
| 135 |
+
return {"success": False, "message": "No Workday records loaded"}
|
| 136 |
+
|
| 137 |
+
first["_sla_logged"] = True
|
| 138 |
+
status = "MET" if sla_met else "BREACHED"
|
| 139 |
+
detail = (f" ({elapsed_minutes:.1f} min elapsed)" if elapsed_minutes else "")
|
| 140 |
+
return {
|
| 141 |
+
"success": True,
|
| 142 |
+
"message": f"SLA event logged for {ticket_id}: {status}{detail}",
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
def _op_request_budget_approval(self, employee_id: str,
|
| 146 |
+
amount: float = 0, reason: str = "") -> Dict:
|
| 147 |
+
"""Request budget approval (triggers RBAC / approval threshold check upstream)."""
|
| 148 |
+
rec = self._records.get(employee_id)
|
| 149 |
+
if not rec:
|
| 150 |
+
return {"success": False, "message": f"Employee {employee_id} not found"}
|
| 151 |
+
return {
|
| 152 |
+
"success": True,
|
| 153 |
+
"message": f"Budget approval request submitted for {employee_id}: ${amount:,.0f}",
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
def _op_create_onboarding_task(self, employee_id: str, **kwargs) -> Dict:
|
| 157 |
+
"""Create onboarding record for a new employee (Workflow B step B1)."""
|
| 158 |
+
schema_error, schema_adapted = self._check_schema_drift(kwargs)
|
| 159 |
+
if schema_error:
|
| 160 |
+
return {"success": False, "schema_error": schema_error,
|
| 161 |
+
"message": f"Schema error: use current field name, not '{schema_error}'"}
|
| 162 |
+
|
| 163 |
+
rec = self._records.get(employee_id)
|
| 164 |
+
if not rec:
|
| 165 |
+
# Auto-create a stub record if it doesn't exist yet
|
| 166 |
+
rec = {
|
| 167 |
+
"employee_id": employee_id,
|
| 168 |
+
"name": kwargs.get("name", "New Employee"),
|
| 169 |
+
"level": kwargs.get("level") or kwargs.get("job_level") or kwargs.get("seniority", "IC1"),
|
| 170 |
+
"manager_id": kwargs.get("manager_id") or kwargs.get("reports_to") or kwargs.get("direct_manager"),
|
| 171 |
+
"status": "pending",
|
| 172 |
+
"department": kwargs.get("department", "support"),
|
| 173 |
+
"territory": kwargs.get("territory", "west"),
|
| 174 |
+
"email": kwargs.get("email", f"{employee_id.lower()}@company.com"),
|
| 175 |
+
"_access_provisioned": {},
|
| 176 |
+
"_sla_logged": False,
|
| 177 |
+
"_onboarding_created": True,
|
| 178 |
+
}
|
| 179 |
+
self._records[employee_id] = rec
|
| 180 |
+
else:
|
| 181 |
+
rec["_onboarding_created"] = True
|
| 182 |
+
|
| 183 |
+
rec.setdefault("_onboarding_tasks", []).append("onboarding_checklist")
|
| 184 |
+
return {"success": True, "schema_adapted": schema_adapted,
|
| 185 |
+
"message": f"Onboarding task created for {employee_id} ({rec.get('name', '')})"}
|
| 186 |
+
|
| 187 |
+
def _op_complete_task(self, employee_id: str, task: str) -> Dict:
|
| 188 |
+
rec = self._records.get(employee_id)
|
| 189 |
+
if not rec:
|
| 190 |
+
return {"success": False, "message": f"Employee {employee_id} not found"}
|
| 191 |
+
tasks = rec.get("_onboarding_tasks", [])
|
| 192 |
+
if task in tasks:
|
| 193 |
+
tasks.remove(task)
|
| 194 |
+
return {"success": True,
|
| 195 |
+
"message": f"Completed task '{task}' for {employee_id}"}
|
server/apps/zendesk.py
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Zendesk-like app — customer support ticket management."""
|
| 2 |
+
|
| 3 |
+
from typing import Dict, List, Optional
|
| 4 |
+
from server.apps.base_app import BaseApp
|
| 5 |
+
from server.schema_drift import SchemaDriftEngine
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class ZendeskApp(BaseApp):
|
| 9 |
+
APP_NAME = "zendesk"
|
| 10 |
+
|
| 11 |
+
OPERATIONS = [
|
| 12 |
+
"get_ticket", "acknowledge_ticket", "set_urgency", "assign_agent",
|
| 13 |
+
"escalate_to_jira", "resolve_ticket", "add_note", "list_tickets",
|
| 14 |
+
]
|
| 15 |
+
|
| 16 |
+
def __init__(self, drift: SchemaDriftEngine):
|
| 17 |
+
super().__init__(drift)
|
| 18 |
+
self._records: Dict[str, Dict] = {}
|
| 19 |
+
|
| 20 |
+
# ------------------------------------------------------------------
|
| 21 |
+
# BaseApp interface
|
| 22 |
+
# ------------------------------------------------------------------
|
| 23 |
+
|
| 24 |
+
def initialize(self, records: List[Dict]) -> None:
|
| 25 |
+
self._records = {r["ticket_number"]: r for r in records}
|
| 26 |
+
|
| 27 |
+
def execute(self, operation: str, args: Dict) -> Dict:
|
| 28 |
+
method = getattr(self, f"_op_{operation}", None)
|
| 29 |
+
if method is None:
|
| 30 |
+
return {
|
| 31 |
+
"success": False,
|
| 32 |
+
"message": f"Unknown operation '{operation}'. Available: {', '.join(self.OPERATIONS)}",
|
| 33 |
+
}
|
| 34 |
+
try:
|
| 35 |
+
return method(**args)
|
| 36 |
+
except TypeError as exc:
|
| 37 |
+
return {"success": False, "message": f"Bad args for '{operation}': {exc}"}
|
| 38 |
+
|
| 39 |
+
def get_state_view(self, max_rows: int = 5) -> str:
|
| 40 |
+
open_tickets = [r for r in self._records.values()
|
| 41 |
+
if r.get("state") not in ("resolved", "closed")][:max_rows]
|
| 42 |
+
if not open_tickets:
|
| 43 |
+
return "No open tickets."
|
| 44 |
+
lines = []
|
| 45 |
+
for rec in open_tickets:
|
| 46 |
+
view = self._to_agent_view(rec)
|
| 47 |
+
keep = ["ticket_number", "title",
|
| 48 |
+
"urgency", "priority", "impact_level",
|
| 49 |
+
"agent_email", "handler", "assigned_agent",
|
| 50 |
+
"state", "ticket_state", "resolution_status",
|
| 51 |
+
"customer_id"]
|
| 52 |
+
compact = {k: v for k, v in view.items() if k in keep and v is not None}
|
| 53 |
+
lines.append(str(compact))
|
| 54 |
+
return "\n".join(lines)
|
| 55 |
+
|
| 56 |
+
def count_open_items(self) -> int:
|
| 57 |
+
return sum(1 for r in self._records.values()
|
| 58 |
+
if r.get("state") not in ("resolved", "closed"))
|
| 59 |
+
|
| 60 |
+
# ------------------------------------------------------------------
|
| 61 |
+
# Workflow completion state checks
|
| 62 |
+
# ------------------------------------------------------------------
|
| 63 |
+
|
| 64 |
+
def ticket_acknowledged(self) -> bool:
|
| 65 |
+
"""True once ZD-001 has been acknowledged (Workflow A step A1)."""
|
| 66 |
+
return bool(self._records.get("ZD-001", {}).get("_acknowledged"))
|
| 67 |
+
|
| 68 |
+
def support_queried(self, account_id: str) -> bool:
|
| 69 |
+
"""True once tickets for account_id were listed (Workflow C step C2)."""
|
| 70 |
+
return account_id in self._records.get("ZD-001", {}).get("_queried_accounts", []) or \
|
| 71 |
+
any(account_id in r.get("_queried_accounts", []) for r in self._records.values())
|
| 72 |
+
|
| 73 |
+
def profile_created(self) -> bool:
|
| 74 |
+
"""True once a new agent profile was created (Workflow B step B4)."""
|
| 75 |
+
return any(r.get("_profile_created") for r in self._records.values())
|
| 76 |
+
|
| 77 |
+
# ------------------------------------------------------------------
|
| 78 |
+
# Operations
|
| 79 |
+
# ------------------------------------------------------------------
|
| 80 |
+
|
| 81 |
+
def _op_get_ticket(self, ticket_number: str, customer_id: Optional[str] = None) -> Dict:
|
| 82 |
+
# If customer_id provided, look up all tickets for that customer
|
| 83 |
+
if customer_id:
|
| 84 |
+
matching = [r for r in self._records.values()
|
| 85 |
+
if r.get("customer_id") == customer_id]
|
| 86 |
+
# Mark as queried for Workflow C
|
| 87 |
+
for r in matching:
|
| 88 |
+
r.setdefault("_queried_accounts", [])
|
| 89 |
+
if customer_id not in r["_queried_accounts"]:
|
| 90 |
+
r["_queried_accounts"].append(customer_id)
|
| 91 |
+
if not matching:
|
| 92 |
+
return {"success": True, "data": [],
|
| 93 |
+
"message": f"No tickets found for customer {customer_id}"}
|
| 94 |
+
return {
|
| 95 |
+
"success": True,
|
| 96 |
+
"data": [self._to_agent_view(r) for r in matching[:5]],
|
| 97 |
+
"message": f"Found {len(matching)} tickets for {customer_id}",
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
rec = self._records.get(ticket_number)
|
| 101 |
+
if not rec:
|
| 102 |
+
return {"success": False,
|
| 103 |
+
"message": f"Ticket {ticket_number} not found. Use list_tickets to browse."}
|
| 104 |
+
rec.setdefault("_queried_accounts", [])
|
| 105 |
+
cid = rec.get("customer_id")
|
| 106 |
+
if cid and cid not in rec["_queried_accounts"]:
|
| 107 |
+
rec["_queried_accounts"].append(cid)
|
| 108 |
+
|
| 109 |
+
return {"success": True, "data": self._to_agent_view(rec),
|
| 110 |
+
"ticket": rec,
|
| 111 |
+
"message": f"Retrieved {ticket_number}"}
|
| 112 |
+
|
| 113 |
+
def _op_acknowledge_ticket(self, ticket_number: str) -> Dict:
|
| 114 |
+
rec = self._records.get(ticket_number)
|
| 115 |
+
if not rec:
|
| 116 |
+
return {"success": False, "message": f"Ticket {ticket_number} not found"}
|
| 117 |
+
rec["_acknowledged"] = True
|
| 118 |
+
if rec.get("state") == "new":
|
| 119 |
+
rec["state"] = "open"
|
| 120 |
+
return {"success": True, "ticket": rec,
|
| 121 |
+
"message": f"Acknowledged {ticket_number} — status → open"}
|
| 122 |
+
|
| 123 |
+
def _op_set_urgency(self, ticket_number: str, **kwargs) -> Dict:
|
| 124 |
+
schema_error, schema_adapted = self._check_schema_drift(kwargs)
|
| 125 |
+
if schema_error:
|
| 126 |
+
hint = self._drift.translate_field("urgency", self.APP_NAME)
|
| 127 |
+
return {"success": False, "schema_error": schema_error,
|
| 128 |
+
"message": f"Schema error: use '{hint}' not '{schema_error}'"}
|
| 129 |
+
|
| 130 |
+
rec = self._records.get(ticket_number)
|
| 131 |
+
if not rec:
|
| 132 |
+
return {"success": False, "message": f"Ticket {ticket_number} not found"}
|
| 133 |
+
|
| 134 |
+
new_urgency = (kwargs.get("urgency") or kwargs.get("priority")
|
| 135 |
+
or kwargs.get("impact_level"))
|
| 136 |
+
if not new_urgency:
|
| 137 |
+
return {"success": False,
|
| 138 |
+
"message": "Provide urgency / priority / impact_level value"}
|
| 139 |
+
|
| 140 |
+
rec["urgency"] = new_urgency
|
| 141 |
+
return {"success": True, "schema_adapted": schema_adapted,
|
| 142 |
+
"message": f"{ticket_number} urgency → '{new_urgency}'"}
|
| 143 |
+
|
| 144 |
+
def _op_assign_agent(self, ticket_number: str, **kwargs) -> Dict:
|
| 145 |
+
schema_error, schema_adapted = self._check_schema_drift(kwargs)
|
| 146 |
+
if schema_error:
|
| 147 |
+
hint = self._drift.translate_field("agent_email", self.APP_NAME)
|
| 148 |
+
return {"success": False, "schema_error": schema_error,
|
| 149 |
+
"message": f"Schema error: use '{hint}' not '{schema_error}'"}
|
| 150 |
+
|
| 151 |
+
rec = self._records.get(ticket_number)
|
| 152 |
+
# For Workflow B profile creation: allow creating a new agent entry
|
| 153 |
+
if not rec:
|
| 154 |
+
# Create a minimal profile record for the new agent
|
| 155 |
+
email = (kwargs.get("agent_email") or kwargs.get("handler")
|
| 156 |
+
or kwargs.get("assigned_agent"))
|
| 157 |
+
if not email:
|
| 158 |
+
return {"success": False, "message": f"Ticket {ticket_number} not found"}
|
| 159 |
+
# Create a synthetic profile ticket
|
| 160 |
+
profile_rec = {
|
| 161 |
+
"ticket_number": ticket_number,
|
| 162 |
+
"title": "Agent profile",
|
| 163 |
+
"urgency": "p3",
|
| 164 |
+
"agent_email": email,
|
| 165 |
+
"state": "closed",
|
| 166 |
+
"customer_id": None,
|
| 167 |
+
"_acknowledged": False,
|
| 168 |
+
"_queried_accounts": [],
|
| 169 |
+
"_profile_created": True,
|
| 170 |
+
}
|
| 171 |
+
self._records[ticket_number] = profile_rec
|
| 172 |
+
return {"success": True, "schema_adapted": schema_adapted,
|
| 173 |
+
"message": f"Created Zendesk profile for agent '{email}'"}
|
| 174 |
+
|
| 175 |
+
email = (kwargs.get("agent_email") or kwargs.get("handler")
|
| 176 |
+
or kwargs.get("assigned_agent"))
|
| 177 |
+
if not email:
|
| 178 |
+
return {"success": False,
|
| 179 |
+
"message": "Provide agent_email / handler / assigned_agent value"}
|
| 180 |
+
|
| 181 |
+
rec["agent_email"] = email
|
| 182 |
+
rec["_profile_created"] = True
|
| 183 |
+
return {"success": True, "schema_adapted": schema_adapted,
|
| 184 |
+
"message": f"{ticket_number} assigned to '{email}'"}
|
| 185 |
+
|
| 186 |
+
def _op_escalate_to_jira(self, ticket_number: str,
|
| 187 |
+
jira_issue_id: Optional[str] = None) -> Dict:
|
| 188 |
+
rec = self._records.get(ticket_number)
|
| 189 |
+
if not rec:
|
| 190 |
+
return {"success": False, "message": f"Ticket {ticket_number} not found"}
|
| 191 |
+
rec["state"] = "pending"
|
| 192 |
+
rec["escalated_to_jira"] = jira_issue_id or "pending"
|
| 193 |
+
return {"success": True,
|
| 194 |
+
"message": f"{ticket_number} escalated to Jira"
|
| 195 |
+
+ (f" ({jira_issue_id})" if jira_issue_id else "")}
|
| 196 |
+
|
| 197 |
+
def _op_resolve_ticket(self, ticket_number: str) -> Dict:
|
| 198 |
+
rec = self._records.get(ticket_number)
|
| 199 |
+
if not rec:
|
| 200 |
+
return {"success": False, "message": f"Ticket {ticket_number} not found"}
|
| 201 |
+
rec["state"] = "resolved"
|
| 202 |
+
return {"success": True, "message": f"{ticket_number} resolved"}
|
| 203 |
+
|
| 204 |
+
def _op_add_note(self, ticket_number: str, note: str) -> Dict:
|
| 205 |
+
rec = self._records.get(ticket_number)
|
| 206 |
+
if not rec:
|
| 207 |
+
return {"success": False, "message": f"Ticket {ticket_number} not found"}
|
| 208 |
+
rec.setdefault("notes", []).append(note)
|
| 209 |
+
return {"success": True, "message": f"Note added to {ticket_number}"}
|
| 210 |
+
|
| 211 |
+
def _op_list_tickets(self, state: str = "open", customer_id: Optional[str] = None,
|
| 212 |
+
limit: int = 10) -> Dict:
|
| 213 |
+
matching = [
|
| 214 |
+
r for r in self._records.values()
|
| 215 |
+
if (state == "all" or r.get("state") == state)
|
| 216 |
+
and (customer_id is None or r.get("customer_id") == customer_id)
|
| 217 |
+
][:limit]
|
| 218 |
+
# Mark accounts as queried
|
| 219 |
+
if customer_id:
|
| 220 |
+
for r in matching:
|
| 221 |
+
r.setdefault("_queried_accounts", [])
|
| 222 |
+
if customer_id not in r["_queried_accounts"]:
|
| 223 |
+
r["_queried_accounts"].append(customer_id)
|
| 224 |
+
|
| 225 |
+
drifted = [self._to_agent_view(r) for r in matching]
|
| 226 |
+
keep = ["ticket_number", "title",
|
| 227 |
+
"urgency", "priority", "impact_level",
|
| 228 |
+
"agent_email", "handler", "assigned_agent",
|
| 229 |
+
"state", "ticket_state", "resolution_status",
|
| 230 |
+
"customer_id"]
|
| 231 |
+
compact = [{k: v for k, v in r.items() if k in keep and v is not None}
|
| 232 |
+
for r in drifted]
|
| 233 |
+
return {
|
| 234 |
+
"success": True,
|
| 235 |
+
"data": compact,
|
| 236 |
+
"message": f"Found {len(compact)} {state} tickets"
|
| 237 |
+
+ (f" for {customer_id}" if customer_id else ""),
|
| 238 |
+
}
|
server/business_rules.py
CHANGED
|
@@ -1,62 +1,129 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
"rbac": {
|
| 8 |
-
|
| 9 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
"manager": {"*": ["*"]},
|
| 11 |
-
}
|
| 12 |
}
|
| 13 |
|
| 14 |
-
POLICY_DRIFT_EVENTS = {
|
| 15 |
-
"sla_tighten":
|
| 16 |
-
"approval_tighten":
|
| 17 |
-
"gdpr_expedite":
|
| 18 |
}
|
| 19 |
|
|
|
|
| 20 |
class BusinessRuleEngine:
|
| 21 |
def __init__(self):
|
| 22 |
-
|
|
|
|
| 23 |
self._violation_log: List[str] = []
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
def apply_policy_drift(self, event: str) -> None:
|
| 26 |
"""Called mid-episode or at episode start to change rules."""
|
| 27 |
if event in POLICY_DRIFT_EVENTS:
|
| 28 |
self.rules.update(POLICY_DRIFT_EVENTS[event])
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
def check_action(self, action: OrgOSAction, context: Dict) -> Tuple[bool, str, float]:
|
| 31 |
-
"""
|
| 32 |
-
|
| 33 |
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
| 35 |
role = context.get("agent_role", "support")
|
| 36 |
app_perms = self.rules["rbac"].get(role, {})
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
# Approval threshold check
|
| 43 |
if action.operation in ("request_budget_approval", "update_deal_stage"):
|
| 44 |
amount = action.args.get("amount", 0)
|
| 45 |
if amount > self.rules["approval_threshold"] and not context.get("manager_approved"):
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
-
self._violation_log.extend(violations)
|
| 50 |
return True, "", 0.0
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
def check_sla(self, ticket: Dict, elapsed_minutes: float) -> Tuple[bool, float]:
|
| 53 |
"""Returns (sla_met, penalty)."""
|
| 54 |
priority = ticket.get("priority", ticket.get("urgency", "p2"))
|
| 55 |
if priority in ("p0", "critical") and elapsed_minutes > self.rules["sla_p0_minutes"]:
|
| 56 |
return False, -0.15
|
|
|
|
|
|
|
| 57 |
return True, 0.0
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
def get_violations_this_step(self) -> List[str]:
|
|
|
|
| 60 |
v = self._violation_log.copy()
|
| 61 |
self._violation_log.clear()
|
| 62 |
-
return v
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Business rule engine — RBAC, SLA checks, approval thresholds, policy drift."""
|
| 2 |
+
|
| 3 |
+
from typing import Dict, List, Tuple
|
| 4 |
+
|
| 5 |
+
from models import OrgOSAction
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
DEFAULT_RULES: Dict = {
|
| 9 |
+
"sla_p0_minutes": 30, # P0 tickets: acknowledge within 30 min
|
| 10 |
+
"sla_p1_hours": 4, # P1 tickets: first response within 4 h
|
| 11 |
+
"approval_threshold": 10_000, # $ above which manager approval is needed
|
| 12 |
+
"max_tickets_per_agent": 10, # RBAC: agent capacity cap
|
| 13 |
+
"gdpr_max_days": 30, # GDPR ticket resolution SLA
|
| 14 |
"rbac": {
|
| 15 |
+
# Support engineers — can complete Workflows A and C
|
| 16 |
+
"support": {
|
| 17 |
+
"zendesk": ["*"], # full ticket lifecycle
|
| 18 |
+
"jira": ["*"], # full issue lifecycle
|
| 19 |
+
"salesforce": [
|
| 20 |
+
"get_account", "list_accounts", "get_opportunity",
|
| 21 |
+
"log_interaction", "flag_churn_risk", "assign_account_owner",
|
| 22 |
+
],
|
| 23 |
+
"workday": [
|
| 24 |
+
"get_employee", "list_employees", "log_sla_event",
|
| 25 |
+
],
|
| 26 |
+
},
|
| 27 |
+
# Engineers — focused on Jira + limited Zendesk/Salesforce reads
|
| 28 |
+
"engineer": {
|
| 29 |
+
"jira": ["*"],
|
| 30 |
+
"zendesk": ["get_ticket", "list_tickets", "add_note", "resolve_ticket"],
|
| 31 |
+
"salesforce": ["get_account", "list_accounts"],
|
| 32 |
+
"workday": ["get_employee"],
|
| 33 |
+
},
|
| 34 |
+
# Managers — full access to all apps (Workflow B)
|
| 35 |
"manager": {"*": ["*"]},
|
| 36 |
+
},
|
| 37 |
}
|
| 38 |
|
| 39 |
+
POLICY_DRIFT_EVENTS: Dict = {
|
| 40 |
+
"sla_tighten": {"sla_p0_minutes": 15, "sla_p1_hours": 2},
|
| 41 |
+
"approval_tighten": {"approval_threshold": 5_000},
|
| 42 |
+
"gdpr_expedite": {"gdpr_max_days": 7},
|
| 43 |
}
|
| 44 |
|
| 45 |
+
|
| 46 |
class BusinessRuleEngine:
|
| 47 |
def __init__(self):
|
| 48 |
+
import copy
|
| 49 |
+
self.rules = copy.deepcopy(DEFAULT_RULES)
|
| 50 |
self._violation_log: List[str] = []
|
| 51 |
|
| 52 |
+
# ------------------------------------------------------------------
|
| 53 |
+
# Policy drift
|
| 54 |
+
# ------------------------------------------------------------------
|
| 55 |
+
|
| 56 |
def apply_policy_drift(self, event: str) -> None:
|
| 57 |
"""Called mid-episode or at episode start to change rules."""
|
| 58 |
if event in POLICY_DRIFT_EVENTS:
|
| 59 |
self.rules.update(POLICY_DRIFT_EVENTS[event])
|
| 60 |
|
| 61 |
+
# ------------------------------------------------------------------
|
| 62 |
+
# Action validation
|
| 63 |
+
# ------------------------------------------------------------------
|
| 64 |
+
|
| 65 |
def check_action(self, action: OrgOSAction, context: Dict) -> Tuple[bool, str, float]:
|
| 66 |
+
"""
|
| 67 |
+
Returns (allowed, reason, penalty).
|
| 68 |
|
| 69 |
+
penalty values:
|
| 70 |
+
-0.25 RBAC violation
|
| 71 |
+
-0.10 approval threshold exceeded without manager approval
|
| 72 |
+
"""
|
| 73 |
role = context.get("agent_role", "support")
|
| 74 |
app_perms = self.rules["rbac"].get(role, {})
|
| 75 |
+
|
| 76 |
+
# Wildcard role (manager) → always allowed
|
| 77 |
+
if "*" in app_perms and "*" in app_perms.get("*", []):
|
| 78 |
+
pass # fall through to approval check
|
| 79 |
+
else:
|
| 80 |
+
allowed_ops = app_perms.get(action.app, app_perms.get("*", []))
|
| 81 |
+
if "*" not in allowed_ops and action.operation not in allowed_ops:
|
| 82 |
+
reason = f"RBAC: '{role}' cannot run '{action.operation}' on '{action.app}'"
|
| 83 |
+
self._violation_log.append(reason)
|
| 84 |
+
return False, reason, -0.25
|
| 85 |
|
| 86 |
# Approval threshold check
|
| 87 |
if action.operation in ("request_budget_approval", "update_deal_stage"):
|
| 88 |
amount = action.args.get("amount", 0)
|
| 89 |
if amount > self.rules["approval_threshold"] and not context.get("manager_approved"):
|
| 90 |
+
reason = (
|
| 91 |
+
f"Approval required: ${amount:,.0f} exceeds "
|
| 92 |
+
f"${self.rules['approval_threshold']:,.0f} threshold"
|
| 93 |
+
)
|
| 94 |
+
self._violation_log.append(reason)
|
| 95 |
+
return False, reason, -0.10
|
| 96 |
|
|
|
|
| 97 |
return True, "", 0.0
|
| 98 |
|
| 99 |
+
# ------------------------------------------------------------------
|
| 100 |
+
# SLA checks
|
| 101 |
+
# ------------------------------------------------------------------
|
| 102 |
+
|
| 103 |
def check_sla(self, ticket: Dict, elapsed_minutes: float) -> Tuple[bool, float]:
|
| 104 |
"""Returns (sla_met, penalty)."""
|
| 105 |
priority = ticket.get("priority", ticket.get("urgency", "p2"))
|
| 106 |
if priority in ("p0", "critical") and elapsed_minutes > self.rules["sla_p0_minutes"]:
|
| 107 |
return False, -0.15
|
| 108 |
+
if priority in ("p1", "high") and elapsed_minutes > self.rules["sla_p1_hours"] * 60:
|
| 109 |
+
return False, -0.10
|
| 110 |
return True, 0.0
|
| 111 |
|
| 112 |
+
# ------------------------------------------------------------------
|
| 113 |
+
# Violation log
|
| 114 |
+
# ------------------------------------------------------------------
|
| 115 |
+
|
| 116 |
def get_violations_this_step(self) -> List[str]:
|
| 117 |
+
"""Return and clear the per-step violation log."""
|
| 118 |
v = self._violation_log.copy()
|
| 119 |
self._violation_log.clear()
|
| 120 |
+
return v
|
| 121 |
+
|
| 122 |
+
def get_active_rules_summary(self) -> Dict:
|
| 123 |
+
"""Return scalar rules for inclusion in observation."""
|
| 124 |
+
return {
|
| 125 |
+
"sla_p0_minutes": self.rules["sla_p0_minutes"],
|
| 126 |
+
"sla_p1_hours": self.rules["sla_p1_hours"],
|
| 127 |
+
"approval_threshold": self.rules["approval_threshold"],
|
| 128 |
+
"gdpr_max_days": self.rules["gdpr_max_days"],
|
| 129 |
+
}
|
server/data_generator.py
CHANGED
|
@@ -6,9 +6,20 @@ All datasets are generated purely from numpy/random — no external downloads.
|
|
| 6 |
import random
|
| 7 |
import numpy as np
|
| 8 |
import pandas as pd
|
|
|
|
| 9 |
|
| 10 |
SEED = 42
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
# ---------------------------------------------------------------------------
|
| 14 |
# Task 1 — Employee records with missing values
|
|
@@ -21,17 +32,12 @@ def generate_task1_datasets():
|
|
| 21 |
|
| 22 |
n = 100
|
| 23 |
departments = ["Engineering", "Marketing", "Sales", "HR", "Finance"]
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
names = [f"{random.choice(first_names)} {random.choice(last_names)}" for _ in range(n)]
|
| 31 |
-
ages = rng.integers(22, 60, size=n).astype(float)
|
| 32 |
-
salaries = rng.integers(40_000, 120_000, size=n).astype(float)
|
| 33 |
-
depts = rng.choice(departments, size=n)
|
| 34 |
-
experience = rng.integers(0, 30, size=n).astype(float)
|
| 35 |
|
| 36 |
clean_df = pd.DataFrame({
|
| 37 |
"name": names,
|
|
@@ -42,8 +48,6 @@ def generate_task1_datasets():
|
|
| 42 |
})
|
| 43 |
|
| 44 |
dirty_df = clean_df.copy()
|
| 45 |
-
|
| 46 |
-
# Inject ~20 % NaN into age, salary, department
|
| 47 |
for col, frac in [("age", 0.20), ("salary", 0.20), ("department", 0.10)]:
|
| 48 |
idx = rng.choice(n, size=int(n * frac), replace=False)
|
| 49 |
dirty_df.loc[idx, col] = np.nan
|
|
@@ -59,11 +63,11 @@ def _scramble_phone(phone: str, rng) -> str:
|
|
| 59 |
digits = phone.replace("-", "")
|
| 60 |
fmt = rng.integers(0, 3)
|
| 61 |
if fmt == 0:
|
| 62 |
-
return digits
|
| 63 |
elif fmt == 1:
|
| 64 |
-
return f"({digits[:3]}){digits[3:]}"
|
| 65 |
else:
|
| 66 |
-
return phone
|
| 67 |
|
| 68 |
|
| 69 |
def _scramble_date(date_str: str, rng) -> str:
|
|
@@ -85,16 +89,16 @@ def generate_task2_datasets():
|
|
| 85 |
n = 200
|
| 86 |
categories = ["Electronics", "Clothing", "Food", "Books", "Toys"]
|
| 87 |
|
| 88 |
-
product_ids
|
| 89 |
-
product_names
|
| 90 |
-
prices
|
| 91 |
categories_col = rng.choice(categories, size=n)
|
| 92 |
-
phones
|
| 93 |
f"{rng.integers(100,999)}-{rng.integers(100,999)}-{rng.integers(1000,9999)}"
|
| 94 |
for _ in range(n)
|
| 95 |
]
|
| 96 |
-
days_offset
|
| 97 |
-
dates
|
| 98 |
(pd.Timestamp("2020-01-01") + pd.Timedelta(days=int(d))).strftime("%Y-%m-%d")
|
| 99 |
for d in days_offset
|
| 100 |
]
|
|
@@ -110,19 +114,16 @@ def generate_task2_datasets():
|
|
| 110 |
|
| 111 |
dirty_df = clean_df.copy()
|
| 112 |
|
| 113 |
-
# Scramble ~60 % of phone formats
|
| 114 |
phone_idx = rng.choice(n, size=int(n * 0.6), replace=False)
|
| 115 |
dirty_df.loc[phone_idx, "phone"] = [
|
| 116 |
_scramble_phone(dirty_df.loc[i, "phone"], rng) for i in phone_idx
|
| 117 |
]
|
| 118 |
|
| 119 |
-
# Scramble ~60 % of date formats
|
| 120 |
date_idx = rng.choice(n, size=int(n * 0.6), replace=False)
|
| 121 |
dirty_df.loc[date_idx, "listed_date"] = [
|
| 122 |
_scramble_date(dirty_df.loc[i, "listed_date"], rng) for i in date_idx
|
| 123 |
]
|
| 124 |
|
| 125 |
-
# Add 15 duplicate rows
|
| 126 |
dup_idx = rng.choice(n, size=15, replace=False)
|
| 127 |
dup_rows = dirty_df.iloc[dup_idx].copy()
|
| 128 |
dirty_df = pd.concat([dirty_df, dup_rows], ignore_index=True)
|
|
@@ -140,18 +141,15 @@ def generate_task3_datasets():
|
|
| 140 |
random.seed(SEED)
|
| 141 |
|
| 142 |
n = 300
|
| 143 |
-
countries
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
emails = [f"user{i}@example.com" for i in range(1, n + 1)]
|
| 153 |
-
days_offset = rng.integers(0, 730, size=n)
|
| 154 |
-
signup_dates = [
|
| 155 |
(pd.Timestamp("2022-01-01") + pd.Timedelta(days=int(d))).strftime("%Y-%m-%d")
|
| 156 |
for d in days_offset
|
| 157 |
]
|
|
@@ -167,29 +165,24 @@ def generate_task3_datasets():
|
|
| 167 |
|
| 168 |
dirty_df = clean_df.copy()
|
| 169 |
|
| 170 |
-
# Missing values (~15 % in age, purchase_amount, country, signup_date)
|
| 171 |
for col, frac in [("age", 0.15), ("purchase_amount", 0.15),
|
| 172 |
("country", 0.10), ("signup_date", 0.10)]:
|
| 173 |
idx = rng.choice(n, size=int(n * frac), replace=False)
|
| 174 |
dirty_df.loc[idx, col] = np.nan
|
| 175 |
|
| 176 |
-
# Outliers in purchase_amount (~3 %)
|
| 177 |
out_idx = rng.choice(n, size=int(n * 0.03), replace=False)
|
| 178 |
dirty_df.loc[out_idx, "purchase_amount"] = (
|
| 179 |
dirty_df.loc[out_idx, "purchase_amount"] * 10
|
| 180 |
)
|
| 181 |
|
| 182 |
-
# Mixed case in country (~40 %)
|
| 183 |
case_idx = rng.choice(n, size=int(n * 0.40), replace=False)
|
| 184 |
dirty_df.loc[case_idx, "country"] = dirty_df.loc[case_idx, "country"].str.lower()
|
| 185 |
|
| 186 |
-
# Mixed date formats (~50 %) — only scramble non-null entries
|
| 187 |
date_idx = rng.choice(n, size=int(n * 0.50), replace=False)
|
| 188 |
valid_date_idx = [i for i in date_idx if pd.notna(dirty_df.loc[i, "signup_date"])]
|
| 189 |
for i in valid_date_idx:
|
| 190 |
dirty_df.loc[i, "signup_date"] = _scramble_date(dirty_df.loc[i, "signup_date"], rng)
|
| 191 |
|
| 192 |
-
# 20 duplicate rows
|
| 193 |
dup_idx = rng.choice(n, size=20, replace=False)
|
| 194 |
dup_rows = dirty_df.iloc[dup_idx].copy()
|
| 195 |
dirty_df = pd.concat([dirty_df, dup_rows], ignore_index=True)
|
|
@@ -197,18 +190,201 @@ def generate_task3_datasets():
|
|
| 197 |
return dirty_df.reset_index(drop=True), clean_df.reset_index(drop=True)
|
| 198 |
|
| 199 |
|
| 200 |
-
|
| 201 |
-
|
|
|
|
| 202 |
|
| 203 |
-
def
|
| 204 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
-
|
| 207 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
|
| 209 |
-
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
import random
|
| 7 |
import numpy as np
|
| 8 |
import pandas as pd
|
| 9 |
+
from typing import Dict, List
|
| 10 |
|
| 11 |
SEED = 42
|
| 12 |
|
| 13 |
+
# ---------------------------------------------------------------------------
|
| 14 |
+
# Shared name pools (cross-referenced across apps)
|
| 15 |
+
# ---------------------------------------------------------------------------
|
| 16 |
+
|
| 17 |
+
FIRST_NAMES = ["Alice", "Bob", "Carol", "David", "Eve", "Frank", "Grace",
|
| 18 |
+
"Heidi", "Ivan", "Judy", "Karl", "Laura", "Mallory", "Niaj",
|
| 19 |
+
"Oscar", "Peggy", "Quinn", "Romeo", "Sybil", "Trent"]
|
| 20 |
+
LAST_NAMES = ["Smith", "Jones", "Brown", "Taylor", "Wilson", "Davis",
|
| 21 |
+
"Miller", "Anderson", "Thomas", "Jackson"]
|
| 22 |
+
|
| 23 |
|
| 24 |
# ---------------------------------------------------------------------------
|
| 25 |
# Task 1 — Employee records with missing values
|
|
|
|
| 32 |
|
| 33 |
n = 100
|
| 34 |
departments = ["Engineering", "Marketing", "Sales", "HR", "Finance"]
|
| 35 |
+
|
| 36 |
+
names = [f"{random.choice(FIRST_NAMES)} {random.choice(LAST_NAMES)}" for _ in range(n)]
|
| 37 |
+
ages = rng.integers(22, 60, size=n).astype(float)
|
| 38 |
+
salaries = rng.integers(40_000, 120_000, size=n).astype(float)
|
| 39 |
+
depts = rng.choice(departments, size=n)
|
| 40 |
+
experience = rng.integers(0, 30, size=n).astype(float)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
clean_df = pd.DataFrame({
|
| 43 |
"name": names,
|
|
|
|
| 48 |
})
|
| 49 |
|
| 50 |
dirty_df = clean_df.copy()
|
|
|
|
|
|
|
| 51 |
for col, frac in [("age", 0.20), ("salary", 0.20), ("department", 0.10)]:
|
| 52 |
idx = rng.choice(n, size=int(n * frac), replace=False)
|
| 53 |
dirty_df.loc[idx, col] = np.nan
|
|
|
|
| 63 |
digits = phone.replace("-", "")
|
| 64 |
fmt = rng.integers(0, 3)
|
| 65 |
if fmt == 0:
|
| 66 |
+
return digits
|
| 67 |
elif fmt == 1:
|
| 68 |
+
return f"({digits[:3]}){digits[3:]}"
|
| 69 |
else:
|
| 70 |
+
return phone
|
| 71 |
|
| 72 |
|
| 73 |
def _scramble_date(date_str: str, rng) -> str:
|
|
|
|
| 89 |
n = 200
|
| 90 |
categories = ["Electronics", "Clothing", "Food", "Books", "Toys"]
|
| 91 |
|
| 92 |
+
product_ids = [f"P{str(i).zfill(4)}" for i in range(1, n + 1)]
|
| 93 |
+
product_names = [f"Product_{i}" for i in range(1, n + 1)]
|
| 94 |
+
prices = np.round(rng.uniform(5.0, 500.0, size=n), 2)
|
| 95 |
categories_col = rng.choice(categories, size=n)
|
| 96 |
+
phones = [
|
| 97 |
f"{rng.integers(100,999)}-{rng.integers(100,999)}-{rng.integers(1000,9999)}"
|
| 98 |
for _ in range(n)
|
| 99 |
]
|
| 100 |
+
days_offset = rng.integers(0, 1000, size=n)
|
| 101 |
+
dates = [
|
| 102 |
(pd.Timestamp("2020-01-01") + pd.Timedelta(days=int(d))).strftime("%Y-%m-%d")
|
| 103 |
for d in days_offset
|
| 104 |
]
|
|
|
|
| 114 |
|
| 115 |
dirty_df = clean_df.copy()
|
| 116 |
|
|
|
|
| 117 |
phone_idx = rng.choice(n, size=int(n * 0.6), replace=False)
|
| 118 |
dirty_df.loc[phone_idx, "phone"] = [
|
| 119 |
_scramble_phone(dirty_df.loc[i, "phone"], rng) for i in phone_idx
|
| 120 |
]
|
| 121 |
|
|
|
|
| 122 |
date_idx = rng.choice(n, size=int(n * 0.6), replace=False)
|
| 123 |
dirty_df.loc[date_idx, "listed_date"] = [
|
| 124 |
_scramble_date(dirty_df.loc[i, "listed_date"], rng) for i in date_idx
|
| 125 |
]
|
| 126 |
|
|
|
|
| 127 |
dup_idx = rng.choice(n, size=15, replace=False)
|
| 128 |
dup_rows = dirty_df.iloc[dup_idx].copy()
|
| 129 |
dirty_df = pd.concat([dirty_df, dup_rows], ignore_index=True)
|
|
|
|
| 141 |
random.seed(SEED)
|
| 142 |
|
| 143 |
n = 300
|
| 144 |
+
countries = ["USA", "UK", "Canada", "Australia", "Germany"]
|
| 145 |
+
|
| 146 |
+
names = [f"{random.choice(FIRST_NAMES)} {random.choice(LAST_NAMES)}" for _ in range(n)]
|
| 147 |
+
ages = rng.integers(18, 75, size=n).astype(float)
|
| 148 |
+
purchase_amounts = np.round(rng.uniform(10.0, 500.0, size=n), 2)
|
| 149 |
+
countries_col = rng.choice(countries, size=n)
|
| 150 |
+
emails = [f"user{i}@example.com" for i in range(1, n + 1)]
|
| 151 |
+
days_offset = rng.integers(0, 730, size=n)
|
| 152 |
+
signup_dates = [
|
|
|
|
|
|
|
|
|
|
| 153 |
(pd.Timestamp("2022-01-01") + pd.Timedelta(days=int(d))).strftime("%Y-%m-%d")
|
| 154 |
for d in days_offset
|
| 155 |
]
|
|
|
|
| 165 |
|
| 166 |
dirty_df = clean_df.copy()
|
| 167 |
|
|
|
|
| 168 |
for col, frac in [("age", 0.15), ("purchase_amount", 0.15),
|
| 169 |
("country", 0.10), ("signup_date", 0.10)]:
|
| 170 |
idx = rng.choice(n, size=int(n * frac), replace=False)
|
| 171 |
dirty_df.loc[idx, col] = np.nan
|
| 172 |
|
|
|
|
| 173 |
out_idx = rng.choice(n, size=int(n * 0.03), replace=False)
|
| 174 |
dirty_df.loc[out_idx, "purchase_amount"] = (
|
| 175 |
dirty_df.loc[out_idx, "purchase_amount"] * 10
|
| 176 |
)
|
| 177 |
|
|
|
|
| 178 |
case_idx = rng.choice(n, size=int(n * 0.40), replace=False)
|
| 179 |
dirty_df.loc[case_idx, "country"] = dirty_df.loc[case_idx, "country"].str.lower()
|
| 180 |
|
|
|
|
| 181 |
date_idx = rng.choice(n, size=int(n * 0.50), replace=False)
|
| 182 |
valid_date_idx = [i for i in date_idx if pd.notna(dirty_df.loc[i, "signup_date"])]
|
| 183 |
for i in valid_date_idx:
|
| 184 |
dirty_df.loc[i, "signup_date"] = _scramble_date(dirty_df.loc[i, "signup_date"], rng)
|
| 185 |
|
|
|
|
| 186 |
dup_idx = rng.choice(n, size=20, replace=False)
|
| 187 |
dup_rows = dirty_df.iloc[dup_idx].copy()
|
| 188 |
dirty_df = pd.concat([dirty_df, dup_rows], ignore_index=True)
|
|
|
|
| 190 |
return dirty_df.reset_index(drop=True), clean_df.reset_index(drop=True)
|
| 191 |
|
| 192 |
|
| 193 |
+
# ---------------------------------------------------------------------------
|
| 194 |
+
# OrgOS App Data Generators
|
| 195 |
+
# ---------------------------------------------------------------------------
|
| 196 |
|
| 197 |
+
def generate_jira_records(n: int = 50, seed: int = SEED) -> List[Dict]:
|
| 198 |
+
"""Generate synthetic Jira-like engineering tickets (canonical field names)."""
|
| 199 |
+
random.seed(seed)
|
| 200 |
+
priorities = ["p0", "p1", "p2", "p3"]
|
| 201 |
+
statuses = ["open", "in_progress", "in_review", "closed"]
|
| 202 |
+
employees = [f"EMP-{i:03d}" for i in range(1, 21)]
|
| 203 |
+
accounts = [f"ACME-{i:03d}" for i in range(1, 31)]
|
| 204 |
+
titles = [
|
| 205 |
+
"Login fails intermittently", "API timeout on checkout",
|
| 206 |
+
"Dashboard charts not rendering", "Email notifications delayed",
|
| 207 |
+
"Password reset broken", "Search returns no results",
|
| 208 |
+
"Import fails for large files", "Session expires too quickly",
|
| 209 |
+
"Reports missing data", "Webhook delivery failures",
|
| 210 |
+
]
|
| 211 |
|
| 212 |
+
records = []
|
| 213 |
+
for i in range(1, n + 1):
|
| 214 |
+
records.append({
|
| 215 |
+
"issue_id": f"JIRA-{i:03d}",
|
| 216 |
+
"title": f"{random.choice(titles)} #{i}",
|
| 217 |
+
"priority": random.choices(priorities, weights=[5, 15, 50, 30])[0],
|
| 218 |
+
"assignee": random.choice(employees) if random.random() > 0.3 else None,
|
| 219 |
+
"status": random.choices(statuses, weights=[30, 40, 15, 15])[0],
|
| 220 |
+
"reporter": random.choice(employees),
|
| 221 |
+
"customer_id": random.choice(accounts),
|
| 222 |
+
"linked_zendesk": None,
|
| 223 |
+
"labels": random.sample(["bug", "urgent", "customer-reported"], k=random.randint(0, 2)),
|
| 224 |
+
"created_at": "2026-04-20T09:00:00",
|
| 225 |
+
})
|
| 226 |
+
|
| 227 |
+
# Workflow A primary issue: JIRA-001 is unassigned, linked to ACME-001
|
| 228 |
+
records[0].update({
|
| 229 |
+
"title": "Customer login fails intermittently",
|
| 230 |
+
"priority": "p1",
|
| 231 |
+
"status": "open",
|
| 232 |
+
"customer_id": "ACME-001",
|
| 233 |
+
"assignee": None,
|
| 234 |
+
"linked_zendesk": None,
|
| 235 |
+
})
|
| 236 |
|
| 237 |
+
return records
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
def generate_zendesk_records(n: int = 40, seed: int = SEED) -> List[Dict]:
|
| 241 |
+
"""Generate synthetic Zendesk-like support tickets (canonical field names)."""
|
| 242 |
+
random.seed(seed)
|
| 243 |
+
urgencies = ["p0", "p1", "p2", "p3"]
|
| 244 |
+
states = ["new", "open", "pending", "resolved", "closed"]
|
| 245 |
+
accounts = [f"ACME-{i:03d}" for i in range(1, 31)]
|
| 246 |
+
agents = [f"agent{i}@company.com" for i in range(1, 6)]
|
| 247 |
+
|
| 248 |
+
records = []
|
| 249 |
+
for i in range(1, n + 1):
|
| 250 |
+
records.append({
|
| 251 |
+
"ticket_number": f"ZD-{i:03d}",
|
| 252 |
+
"title": f"Support request #{i}",
|
| 253 |
+
"urgency": random.choices(urgencies, weights=[3, 12, 55, 30])[0],
|
| 254 |
+
"agent_email": random.choice(agents) if random.random() > 0.4 else None,
|
| 255 |
+
"state": random.choices(states, weights=[20, 35, 20, 15, 10])[0],
|
| 256 |
+
"customer_id": random.choice(accounts),
|
| 257 |
+
"channel": random.choice(["email", "chat", "phone", "web"]),
|
| 258 |
+
"created_at": "2026-04-20T08:00:00",
|
| 259 |
+
# Internal state tracking — stripped before agent sees record
|
| 260 |
+
"_acknowledged": False,
|
| 261 |
+
"_queried_accounts": [],
|
| 262 |
+
"_profile_created": False,
|
| 263 |
+
})
|
| 264 |
+
|
| 265 |
+
# Workflow A primary: ZD-001 is unacknowledged, from ACME-001
|
| 266 |
+
records[0].update({
|
| 267 |
+
"title": "Login issue — cannot access my account",
|
| 268 |
+
"urgency": "p1",
|
| 269 |
+
"state": "new",
|
| 270 |
+
"customer_id": "ACME-001",
|
| 271 |
+
"_acknowledged": False,
|
| 272 |
+
})
|
| 273 |
+
|
| 274 |
+
# Workflow C: several tickets from ACME-003
|
| 275 |
+
for i in [4, 11, 17]:
|
| 276 |
+
if i < len(records):
|
| 277 |
+
records[i]["customer_id"] = "ACME-003"
|
| 278 |
+
|
| 279 |
+
return records
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
def generate_salesforce_records(n: int = 30, seed: int = SEED) -> List[Dict]:
|
| 283 |
+
"""Generate synthetic Salesforce-like CRM accounts (canonical field names)."""
|
| 284 |
+
random.seed(seed)
|
| 285 |
+
deal_stages = ["prospect", "qualification", "negotiation", "closed_won", "closed_lost"]
|
| 286 |
+
healths = ["green", "yellow", "red"]
|
| 287 |
+
territories = ["west", "east", "central", "apac", "emea"]
|
| 288 |
+
employees = [f"EMP-{i:03d}" for i in range(1, 21)]
|
| 289 |
+
companies = [
|
| 290 |
+
"Acme Corporation", "Globex Systems", "Initech Ltd", "Umbrella Corp",
|
| 291 |
+
"Stark Industries", "Wayne Enterprises", "Hooli Inc", "Pied Piper",
|
| 292 |
+
"Bluth Company", "Vandelay Industries",
|
| 293 |
+
]
|
| 294 |
+
|
| 295 |
+
records = []
|
| 296 |
+
for i in range(1, n + 1):
|
| 297 |
+
records.append({
|
| 298 |
+
"account_id": f"ACME-{i:03d}",
|
| 299 |
+
"company_name": f"{companies[(i-1) % len(companies)]} {i}",
|
| 300 |
+
"deal_stage": random.choice(deal_stages),
|
| 301 |
+
"health": random.choices(healths, weights=[60, 30, 10])[0],
|
| 302 |
+
"owner": random.choice(employees),
|
| 303 |
+
"arr": random.randint(5_000, 200_000),
|
| 304 |
+
"is_paying": random.random() > 0.3,
|
| 305 |
+
"territory": random.choice(territories),
|
| 306 |
+
"industry": random.choice(["tech", "finance", "healthcare", "retail"]),
|
| 307 |
+
# Internal state tracking
|
| 308 |
+
"_account_checked": False,
|
| 309 |
+
"_churn_flagged": False,
|
| 310 |
+
"_team_assigned": False,
|
| 311 |
+
"_intervention_assigned": False,
|
| 312 |
+
})
|
| 313 |
+
|
| 314 |
+
# Workflow A: ACME-001 is a paying customer with yellow health
|
| 315 |
+
records[0].update({
|
| 316 |
+
"company_name": "Acme Corporation",
|
| 317 |
+
"deal_stage": "closed_won",
|
| 318 |
+
"health": "yellow",
|
| 319 |
+
"is_paying": True,
|
| 320 |
+
"arr": 50_000,
|
| 321 |
+
"territory": "west",
|
| 322 |
+
})
|
| 323 |
+
|
| 324 |
+
# Workflow C: ACME-003 is at churn risk
|
| 325 |
+
records[2].update({
|
| 326 |
+
"company_name": "Globex Systems",
|
| 327 |
+
"health": "red",
|
| 328 |
+
"deal_stage": "negotiation",
|
| 329 |
+
"is_paying": True,
|
| 330 |
+
"arr": 30_000,
|
| 331 |
+
"_churn_flagged": False,
|
| 332 |
+
})
|
| 333 |
+
|
| 334 |
+
return records
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
def generate_workday_records(n: int = 20, seed: int = SEED) -> List[Dict]:
|
| 338 |
+
"""Generate synthetic Workday-like HR records (canonical field names)."""
|
| 339 |
+
random.seed(seed)
|
| 340 |
+
levels = ["IC1", "IC2", "IC3", "IC4", "M1", "M2"]
|
| 341 |
+
departments = ["engineering", "support", "sales", "hr", "data"]
|
| 342 |
+
territories = ["west", "east", "central", "apac", "emea"]
|
| 343 |
+
|
| 344 |
+
records = []
|
| 345 |
+
for i in range(1, n + 1):
|
| 346 |
+
records.append({
|
| 347 |
+
"employee_id": f"EMP-{i:03d}",
|
| 348 |
+
"name": f"{random.choice(FIRST_NAMES)} {random.choice(LAST_NAMES)}",
|
| 349 |
+
"level": random.choice(levels),
|
| 350 |
+
"manager_id": f"EMP-{random.randint(1, min(i, 5)):03d}" if i > 1 else None,
|
| 351 |
+
"status": random.choices(["active", "pending"], weights=[90, 10])[0],
|
| 352 |
+
"department": random.choice(departments),
|
| 353 |
+
"territory": random.choice(territories),
|
| 354 |
+
"email": f"emp{i}@company.com",
|
| 355 |
+
# Internal state tracking
|
| 356 |
+
"_access_provisioned": {}, # app_name → bool
|
| 357 |
+
"_sla_logged": False,
|
| 358 |
+
"_onboarding_created": False,
|
| 359 |
+
})
|
| 360 |
+
|
| 361 |
+
# Workflow B: one pending new hire to onboard
|
| 362 |
+
records.append({
|
| 363 |
+
"employee_id": "EMP-NEW-001",
|
| 364 |
+
"name": "Jordan Riley",
|
| 365 |
+
"level": "IC2",
|
| 366 |
+
"manager_id": "EMP-001",
|
| 367 |
+
"status": "pending",
|
| 368 |
+
"department": "support",
|
| 369 |
+
"territory": "west",
|
| 370 |
+
"email": "jordan.riley@company.com",
|
| 371 |
+
"_access_provisioned": {},
|
| 372 |
+
"_sla_logged": False,
|
| 373 |
+
"_onboarding_created": False,
|
| 374 |
+
})
|
| 375 |
|
| 376 |
+
return records
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
def generate_episode_data(workflow_id: str, seed: int = SEED) -> Dict[str, List[Dict]]:
|
| 380 |
+
"""
|
| 381 |
+
Generate correlated data for a full episode across all 4 apps.
|
| 382 |
+
Cross-references are maintained: Zendesk customer_ids match Salesforce account_ids,
|
| 383 |
+
Jira reporters are Workday employees, etc.
|
| 384 |
+
"""
|
| 385 |
+
return {
|
| 386 |
+
"jira": generate_jira_records(n=50, seed=seed),
|
| 387 |
+
"zendesk": generate_zendesk_records(n=40, seed=seed),
|
| 388 |
+
"salesforce": generate_salesforce_records(n=30, seed=seed),
|
| 389 |
+
"workday": generate_workday_records(n=20, seed=seed),
|
| 390 |
+
}
|
server/environment.py
CHANGED
|
@@ -1,41 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
class OrgOSEnvironment:
|
| 2 |
MAX_STEPS = {"A": 15, "B": 20, "C": 18}
|
| 3 |
-
WORKFLOWS
|
| 4 |
|
| 5 |
def __init__(self):
|
| 6 |
self._drift = SchemaDriftEngine(seed=42)
|
| 7 |
self._rules = BusinessRuleEngine()
|
| 8 |
self._workflow = WorkflowEngine()
|
| 9 |
-
self._apps: Dict[str,
|
| 10 |
-
"jira":
|
| 11 |
-
"zendesk":
|
| 12 |
-
"salesforce":
|
| 13 |
-
"workday":
|
| 14 |
}
|
| 15 |
-
self._episode_num
|
| 16 |
-
self._episode_id
|
| 17 |
-
self._workflow_id
|
| 18 |
-
self._step_count
|
| 19 |
-
self._last_score
|
| 20 |
self._policy_drift_applied = False
|
| 21 |
|
| 22 |
# Reward component trackers
|
| 23 |
-
self._wf_score
|
| 24 |
-
self._rule_score
|
| 25 |
-
self._schema_score
|
| 26 |
-
self._efficiency
|
| 27 |
-
self._policy_score
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
def reset(self, workflow_id: Optional[str] = None) -> OrgOSObservation:
|
| 30 |
self._episode_num += 1
|
| 31 |
-
self._episode_id
|
| 32 |
-
self._workflow_id
|
| 33 |
-
self._step_count
|
| 34 |
-
self._last_score
|
| 35 |
-
self._rule_score
|
| 36 |
-
self._wf_score
|
| 37 |
self._schema_score = 0.0
|
| 38 |
-
self._efficiency
|
| 39 |
self._policy_score = 0.0
|
| 40 |
self._policy_drift_applied = False
|
| 41 |
|
|
@@ -56,70 +76,122 @@ class OrgOSEnvironment:
|
|
| 56 |
# Start workflow tracking
|
| 57 |
self._workflow.start(self._workflow_id)
|
| 58 |
|
| 59 |
-
return self._build_obs(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
def step(self, action: OrgOSAction) -> OrgOSObservation:
|
| 62 |
self._step_count += 1
|
| 63 |
-
old_score
|
| 64 |
extra_penalty = 0.0
|
| 65 |
|
| 66 |
# 1. Validate app exists
|
| 67 |
if action.app not in self._apps:
|
| 68 |
-
return self._build_obs(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
# 2. Business rule check (RBAC, approvals)
|
| 71 |
-
|
|
|
|
| 72 |
allowed, reason, rule_penalty = self._rules.check_action(action, ctx)
|
| 73 |
if not allowed:
|
| 74 |
self._rule_score = max(0.0, self._rule_score - 0.08)
|
| 75 |
-
extra_penalty
|
| 76 |
return self._build_obs(
|
| 77 |
-
max(-0.25, old_score + extra_penalty),
|
| 78 |
-
False,
|
|
|
|
| 79 |
)
|
| 80 |
|
| 81 |
# 3. Execute on app
|
| 82 |
result = self._apps[action.app].execute(action.operation, action.args)
|
| 83 |
-
if not result["success"]:
|
| 84 |
-
self._efficiency -= 0.02 # penalize failed/no-op actions
|
| 85 |
-
return self._build_obs(old_score - 0.01, False, result["message"])
|
| 86 |
|
| 87 |
-
# 4. Check schema drift
|
| 88 |
-
# If agent used canonical field names on a v2/v3 schema → penalize
|
| 89 |
if result.get("schema_error"):
|
| 90 |
-
|
| 91 |
-
return self._build_obs(
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
# 5. Re-evaluate workflow completion
|
| 98 |
self._wf_score = self._workflow.evaluate(self._apps)
|
| 99 |
|
| 100 |
-
# 6.
|
| 101 |
-
sla_ok, sla_pen = self._rules.check_sla(
|
| 102 |
-
|
|
|
|
|
|
|
| 103 |
if not sla_ok:
|
| 104 |
-
extra_penalty
|
| 105 |
self._rule_score = max(0.0, self._rule_score - 0.05)
|
| 106 |
|
| 107 |
# 7. Compute composite score
|
| 108 |
new_score = self._compute_score()
|
| 109 |
-
delta
|
| 110 |
self._last_score = max(0.001, min(0.999, new_score))
|
| 111 |
|
| 112 |
# 8. Terminal condition
|
| 113 |
-
done = (
|
| 114 |
-
|
|
|
|
|
|
|
| 115 |
if done and self._wf_score >= 0.95:
|
| 116 |
-
delta += 0.20
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
-
|
|
|
|
|
|
|
| 119 |
|
| 120 |
def _compute_score(self) -> float:
|
| 121 |
raw = (
|
| 122 |
-
0.30 * self._wf_score
|
| 123 |
0.25 * self._rule_score +
|
| 124 |
0.20 * self._schema_score +
|
| 125 |
0.15 * self._efficiency +
|
|
@@ -127,15 +199,50 @@ class OrgOSEnvironment:
|
|
| 127 |
)
|
| 128 |
return max(0.001, min(0.999, raw))
|
| 129 |
|
| 130 |
-
def
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""OrgOS environment — the single stateful RL environment object."""
|
| 2 |
+
|
| 3 |
+
import uuid
|
| 4 |
+
from typing import Dict, Optional
|
| 5 |
+
|
| 6 |
+
from models import OrgOSAction, OrgOSObservation, OrgOSState, RewardBreakdown
|
| 7 |
+
from server.apps.jira import JiraApp
|
| 8 |
+
from server.apps.zendesk import ZendeskApp
|
| 9 |
+
from server.apps.salesforce import SalesforceApp
|
| 10 |
+
from server.apps.workday import WorkdayApp
|
| 11 |
+
from server.business_rules import BusinessRuleEngine
|
| 12 |
+
from server.data_generator import generate_episode_data
|
| 13 |
+
from server.schema_drift import SchemaDriftEngine
|
| 14 |
+
from server.workflow_engine import WorkflowEngine
|
| 15 |
+
|
| 16 |
+
|
| 17 |
class OrgOSEnvironment:
|
| 18 |
MAX_STEPS = {"A": 15, "B": 20, "C": 18}
|
| 19 |
+
WORKFLOWS = ["A", "B", "C"]
|
| 20 |
|
| 21 |
def __init__(self):
|
| 22 |
self._drift = SchemaDriftEngine(seed=42)
|
| 23 |
self._rules = BusinessRuleEngine()
|
| 24 |
self._workflow = WorkflowEngine()
|
| 25 |
+
self._apps: Dict[str, object] = {
|
| 26 |
+
"jira": JiraApp(self._drift),
|
| 27 |
+
"zendesk": ZendeskApp(self._drift),
|
| 28 |
+
"salesforce": SalesforceApp(self._drift),
|
| 29 |
+
"workday": WorkdayApp(self._drift),
|
| 30 |
}
|
| 31 |
+
self._episode_num = 0
|
| 32 |
+
self._episode_id = ""
|
| 33 |
+
self._workflow_id = "A"
|
| 34 |
+
self._step_count = 0
|
| 35 |
+
self._last_score = 0.001
|
| 36 |
self._policy_drift_applied = False
|
| 37 |
|
| 38 |
# Reward component trackers
|
| 39 |
+
self._wf_score = 0.0 # workflow completion
|
| 40 |
+
self._rule_score = 1.0 # compliance (starts perfect, penalized on violation)
|
| 41 |
+
self._schema_score = 0.0 # schema adaptation successes
|
| 42 |
+
self._efficiency = 1.0 # degrades with failed/no-op actions
|
| 43 |
+
self._policy_score = 0.0 # policy drift handling bonus
|
| 44 |
+
|
| 45 |
+
# ------------------------------------------------------------------
|
| 46 |
+
# OpenEnv core API
|
| 47 |
+
# ------------------------------------------------------------------
|
| 48 |
|
| 49 |
def reset(self, workflow_id: Optional[str] = None) -> OrgOSObservation:
|
| 50 |
self._episode_num += 1
|
| 51 |
+
self._episode_id = str(uuid.uuid4())
|
| 52 |
+
self._workflow_id = workflow_id or self.WORKFLOWS[(self._episode_num - 1) % 3]
|
| 53 |
+
self._step_count = 0
|
| 54 |
+
self._last_score = 0.001
|
| 55 |
+
self._rule_score = 1.0
|
| 56 |
+
self._wf_score = 0.0
|
| 57 |
self._schema_score = 0.0
|
| 58 |
+
self._efficiency = 1.0
|
| 59 |
self._policy_score = 0.0
|
| 60 |
self._policy_drift_applied = False
|
| 61 |
|
|
|
|
| 76 |
# Start workflow tracking
|
| 77 |
self._workflow.start(self._workflow_id)
|
| 78 |
|
| 79 |
+
return self._build_obs(
|
| 80 |
+
reward=0.001,
|
| 81 |
+
done=False,
|
| 82 |
+
message="Episode started. Study the workflow goal and schema hints before acting.",
|
| 83 |
+
)
|
| 84 |
|
| 85 |
def step(self, action: OrgOSAction) -> OrgOSObservation:
|
| 86 |
self._step_count += 1
|
| 87 |
+
old_score = self._last_score
|
| 88 |
extra_penalty = 0.0
|
| 89 |
|
| 90 |
# 1. Validate app exists
|
| 91 |
if action.app not in self._apps:
|
| 92 |
+
return self._build_obs(
|
| 93 |
+
reward=old_score - 0.05,
|
| 94 |
+
done=False,
|
| 95 |
+
message=f"Unknown app '{action.app}'. Valid apps: {list(self._apps)}",
|
| 96 |
+
)
|
| 97 |
|
| 98 |
# 2. Business rule check (RBAC, approvals)
|
| 99 |
+
agent_role = self._workflow.get_role()
|
| 100 |
+
ctx = {"agent_role": agent_role, "manager_approved": False}
|
| 101 |
allowed, reason, rule_penalty = self._rules.check_action(action, ctx)
|
| 102 |
if not allowed:
|
| 103 |
self._rule_score = max(0.0, self._rule_score - 0.08)
|
| 104 |
+
extra_penalty = rule_penalty
|
| 105 |
return self._build_obs(
|
| 106 |
+
reward=max(-0.25, old_score + extra_penalty),
|
| 107 |
+
done=False,
|
| 108 |
+
message=f"Rule violation: {reason}",
|
| 109 |
)
|
| 110 |
|
| 111 |
# 3. Execute on app
|
| 112 |
result = self._apps[action.app].execute(action.operation, action.args)
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
+
# 4. Check schema drift FIRST — apps return success:False when schema_error is set
|
|
|
|
| 115 |
if result.get("schema_error"):
|
| 116 |
+
self._efficiency -= 0.02
|
| 117 |
+
return self._build_obs(
|
| 118 |
+
reward=old_score - 0.20,
|
| 119 |
+
done=False,
|
| 120 |
+
message=(
|
| 121 |
+
f"Stale schema: field '{result['schema_error']}' is no longer valid. "
|
| 122 |
+
"Check schema_hints for the current field name. "
|
| 123 |
+
f"Hint: {result.get('message', '')}"
|
| 124 |
+
),
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
if not result.get("success"):
|
| 128 |
+
self._efficiency -= 0.02 # penalize failed/no-op actions
|
| 129 |
+
return self._build_obs(
|
| 130 |
+
reward=old_score - 0.01,
|
| 131 |
+
done=False,
|
| 132 |
+
message=result.get("message", "Operation failed"),
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
# Schema adaptation bonus (agent used correct drifted field name)
|
| 136 |
+
if result.get("schema_adapted"):
|
| 137 |
+
self._schema_score = min(1.0, self._schema_score + 0.10)
|
| 138 |
+
self._policy_score = min(1.0, self._policy_score + 0.05)
|
| 139 |
|
| 140 |
# 5. Re-evaluate workflow completion
|
| 141 |
self._wf_score = self._workflow.evaluate(self._apps)
|
| 142 |
|
| 143 |
+
# 6. SLA check (only if a ticket was touched)
|
| 144 |
+
sla_ok, sla_pen = self._rules.check_sla(
|
| 145 |
+
result.get("ticket", {}),
|
| 146 |
+
self._step_count * 2.5, # approximate 2.5 min per step
|
| 147 |
+
)
|
| 148 |
if not sla_ok:
|
| 149 |
+
extra_penalty += sla_pen
|
| 150 |
self._rule_score = max(0.0, self._rule_score - 0.05)
|
| 151 |
|
| 152 |
# 7. Compute composite score
|
| 153 |
new_score = self._compute_score()
|
| 154 |
+
delta = new_score - old_score + extra_penalty
|
| 155 |
self._last_score = max(0.001, min(0.999, new_score))
|
| 156 |
|
| 157 |
# 8. Terminal condition
|
| 158 |
+
done = (
|
| 159 |
+
self._wf_score >= 0.95
|
| 160 |
+
or self._step_count >= self.MAX_STEPS[self._workflow_id]
|
| 161 |
+
)
|
| 162 |
if done and self._wf_score >= 0.95:
|
| 163 |
+
delta += 0.20 # terminal completion bonus
|
| 164 |
+
|
| 165 |
+
return self._build_obs(
|
| 166 |
+
reward=delta,
|
| 167 |
+
done=done,
|
| 168 |
+
message=result.get("message", "OK"),
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
# ------------------------------------------------------------------
|
| 172 |
+
# State endpoint
|
| 173 |
+
# ------------------------------------------------------------------
|
| 174 |
+
|
| 175 |
+
def state(self) -> OrgOSState:
|
| 176 |
+
return OrgOSState(
|
| 177 |
+
episode_id = self._episode_id,
|
| 178 |
+
workflow_id = self._workflow_id,
|
| 179 |
+
schema_versions = self._drift._versions,
|
| 180 |
+
step_count = self._step_count,
|
| 181 |
+
max_steps = self.MAX_STEPS.get(self._workflow_id, 15),
|
| 182 |
+
rule_violation_count = len(self._rules._violation_log),
|
| 183 |
+
workflow_completion = self._wf_score,
|
| 184 |
+
rule_compliance_rate = self._rule_score,
|
| 185 |
+
policy_drift_active = self._policy_drift_applied,
|
| 186 |
+
)
|
| 187 |
|
| 188 |
+
# ------------------------------------------------------------------
|
| 189 |
+
# Internal helpers
|
| 190 |
+
# ------------------------------------------------------------------
|
| 191 |
|
| 192 |
def _compute_score(self) -> float:
|
| 193 |
raw = (
|
| 194 |
+
0.30 * self._wf_score +
|
| 195 |
0.25 * self._rule_score +
|
| 196 |
0.20 * self._schema_score +
|
| 197 |
0.15 * self._efficiency +
|
|
|
|
| 199 |
)
|
| 200 |
return max(0.001, min(0.999, raw))
|
| 201 |
|
| 202 |
+
def _build_obs(self, reward: float, done: bool, message: str) -> OrgOSObservation:
|
| 203 |
+
"""Construct a fully-populated observation from current environment state."""
|
| 204 |
+
# Per-app state previews
|
| 205 |
+
app_states = {
|
| 206 |
+
name: app.get_state_view(max_rows=3)
|
| 207 |
+
for name, app in self._apps.items()
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
# Schema hints (partial — agent must probe to discover full mapping)
|
| 211 |
+
schema_hints = self._drift.get_all_changes()
|
| 212 |
+
# Flatten to dot-notation: {"jira.priority": "severity", ...}
|
| 213 |
+
flat_hints: Dict[str, str] = {}
|
| 214 |
+
for app_name, field_map in schema_hints.items():
|
| 215 |
+
for canonical, drifted in field_map.items():
|
| 216 |
+
if canonical != drifted:
|
| 217 |
+
flat_hints[f"{app_name}.{canonical}"] = drifted
|
| 218 |
+
|
| 219 |
+
# Workflow progress
|
| 220 |
+
completed_steps = self._workflow.get_completed()
|
| 221 |
+
pending_steps = self._workflow.get_pending()
|
| 222 |
+
workflow_goal = self._workflow.get_goal()
|
| 223 |
+
|
| 224 |
+
# Reward breakdown snapshot
|
| 225 |
+
breakdown = RewardBreakdown(
|
| 226 |
+
workflow_completion = self._wf_score,
|
| 227 |
+
rule_compliance = self._rule_score,
|
| 228 |
+
schema_adaptation = self._schema_score,
|
| 229 |
+
efficiency = self._efficiency,
|
| 230 |
+
policy_drift_handling = self._policy_score,
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
+
return OrgOSObservation(
|
| 234 |
+
done = done,
|
| 235 |
+
reward = round(float(reward), 6),
|
| 236 |
+
current_score = float(self._last_score),
|
| 237 |
+
workflow_id = self._workflow_id,
|
| 238 |
+
step_count = self._step_count,
|
| 239 |
+
app_states = app_states,
|
| 240 |
+
workflow_goal = workflow_goal,
|
| 241 |
+
completed_steps = completed_steps,
|
| 242 |
+
pending_steps = pending_steps,
|
| 243 |
+
schema_hints = flat_hints,
|
| 244 |
+
active_rules = self._rules.get_active_rules_summary(),
|
| 245 |
+
rule_violations = self._rules.get_violations_this_step(),
|
| 246 |
+
reward_breakdown = breakdown,
|
| 247 |
+
message = message,
|
| 248 |
+
)
|
server/schema_drift.py
CHANGED
|
@@ -1,33 +1,39 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
SCHEMA_MAP = {
|
| 3 |
"jira": {
|
| 4 |
-
"v1": {"priority": "priority",
|
| 5 |
-
"v2": {"priority": "severity",
|
| 6 |
-
"v3": {"priority": "urgency_level",
|
| 7 |
-
"sla_deadline": "due_by"},
|
| 8 |
},
|
| 9 |
"zendesk": {
|
| 10 |
-
"v1": {"urgency": "urgency",
|
| 11 |
-
"v2": {"urgency": "priority",
|
| 12 |
-
"v3": {"urgency": "impact_level",
|
| 13 |
},
|
| 14 |
"salesforce": {
|
| 15 |
-
"v1": {"deal_stage": "deal_stage",
|
| 16 |
-
"v2": {"deal_stage": "pipeline_stage","health": "account_health",
|
| 17 |
-
"v3": {"deal_stage": "stage",
|
| 18 |
"arr": "annual_recurring_revenue"},
|
| 19 |
},
|
| 20 |
"workday": {
|
| 21 |
-
"v1": {"level": "level",
|
| 22 |
-
"v2": {"level": "job_level",
|
| 23 |
-
"v3": {"level": "seniority",
|
| 24 |
},
|
| 25 |
}
|
| 26 |
|
|
|
|
| 27 |
class SchemaDriftEngine:
|
| 28 |
def __init__(self, seed: int = 42):
|
| 29 |
self._seed = seed
|
| 30 |
-
self._versions: Dict[str, str] = {
|
| 31 |
|
| 32 |
def sample_for_episode(self, episode_num: int) -> None:
|
| 33 |
"""Sample schema versions deterministically per episode."""
|
|
@@ -35,21 +41,58 @@ class SchemaDriftEngine:
|
|
| 35 |
self._versions = {app: rng.choice(["v1", "v2", "v3"]) for app in SCHEMA_MAP}
|
| 36 |
|
| 37 |
def translate_record(self, record: Dict, app: str) -> Dict:
|
| 38 |
-
"""Rename canonical field names → current schema's field names."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
version = self._versions.get(app, "v1")
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
def get_hints(self) -> Dict[str, str]:
|
| 44 |
"""Return partial schema hints visible in observation.
|
| 45 |
-
|
| 46 |
hints = {}
|
| 47 |
rng = random.Random(self._seed)
|
| 48 |
for app, version in self._versions.items():
|
| 49 |
-
mapping = SCHEMA_MAP
|
| 50 |
-
# Reveal only fields that actually changed (v2/v3)
|
| 51 |
changed = {f"{app}.{k}": v for k, v in mapping.items() if k != v}
|
| 52 |
if changed:
|
| 53 |
key = rng.choice(list(changed.keys()))
|
| 54 |
hints[key] = changed[key]
|
| 55 |
-
return hints
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Schema drift engine — manages per-episode field-name versioning across all 4 apps."""
|
| 2 |
+
|
| 3 |
+
import random
|
| 4 |
+
from typing import Dict, Optional
|
| 5 |
+
|
| 6 |
+
# Canonical field → actual field name, per app, per schema version
|
| 7 |
SCHEMA_MAP = {
|
| 8 |
"jira": {
|
| 9 |
+
"v1": {"priority": "priority", "assignee": "assignee", "status": "status"},
|
| 10 |
+
"v2": {"priority": "severity", "assignee": "owner", "status": "state"},
|
| 11 |
+
"v3": {"priority": "urgency_level", "assignee": "assigned_to", "status": "current_state",
|
| 12 |
+
"sla_deadline": "due_by"},
|
| 13 |
},
|
| 14 |
"zendesk": {
|
| 15 |
+
"v1": {"urgency": "urgency", "agent_email": "agent_email", "state": "state"},
|
| 16 |
+
"v2": {"urgency": "priority", "agent_email": "handler", "state": "ticket_state"},
|
| 17 |
+
"v3": {"urgency": "impact_level", "agent_email": "assigned_agent", "state": "resolution_status"},
|
| 18 |
},
|
| 19 |
"salesforce": {
|
| 20 |
+
"v1": {"deal_stage": "deal_stage", "health": "health", "owner": "owner_name"},
|
| 21 |
+
"v2": {"deal_stage": "pipeline_stage", "health": "account_health","owner": "account_owner"},
|
| 22 |
+
"v3": {"deal_stage": "stage", "health": "risk_score", "owner": "rep_email",
|
| 23 |
"arr": "annual_recurring_revenue"},
|
| 24 |
},
|
| 25 |
"workday": {
|
| 26 |
+
"v1": {"level": "level", "manager_id": "manager_id", "status": "resolution"},
|
| 27 |
+
"v2": {"level": "job_level", "manager_id": "reports_to", "status": "request_status"},
|
| 28 |
+
"v3": {"level": "seniority", "manager_id": "direct_manager", "status": "approval_state"},
|
| 29 |
},
|
| 30 |
}
|
| 31 |
|
| 32 |
+
|
| 33 |
class SchemaDriftEngine:
|
| 34 |
def __init__(self, seed: int = 42):
|
| 35 |
self._seed = seed
|
| 36 |
+
self._versions: Dict[str, str] = {app: "v1" for app in SCHEMA_MAP}
|
| 37 |
|
| 38 |
def sample_for_episode(self, episode_num: int) -> None:
|
| 39 |
"""Sample schema versions deterministically per episode."""
|
|
|
|
| 41 |
self._versions = {app: rng.choice(["v1", "v2", "v3"]) for app in SCHEMA_MAP}
|
| 42 |
|
| 43 |
def translate_record(self, record: Dict, app: str) -> Dict:
|
| 44 |
+
"""Rename canonical field names → current schema's field names (for output to agent)."""
|
| 45 |
+
version = self._versions.get(app, "v1")
|
| 46 |
+
mapping = SCHEMA_MAP.get(app, {}).get(version, {})
|
| 47 |
+
return {mapping.get(k, k): v for k, v in record.items()
|
| 48 |
+
if not k.startswith("_")} # strip internal state-tracking fields
|
| 49 |
+
|
| 50 |
+
def translate_field(self, canonical_field: str, app: str) -> str:
|
| 51 |
+
"""Get the current drifted name for a canonical field."""
|
| 52 |
+
version = self._versions.get(app, "v1")
|
| 53 |
+
mapping = SCHEMA_MAP.get(app, {}).get(version, {})
|
| 54 |
+
return mapping.get(canonical_field, canonical_field)
|
| 55 |
+
|
| 56 |
+
def check_args_for_drift(self, args: Dict, app: str):
|
| 57 |
+
"""
|
| 58 |
+
Check whether action args use canonical (stale) vs drifted (correct) field names.
|
| 59 |
+
Returns (schema_error: Optional[str], schema_adapted: bool).
|
| 60 |
+
- schema_error: the canonical field name the agent incorrectly used, or None
|
| 61 |
+
- schema_adapted: True if agent correctly used a drifted field name
|
| 62 |
+
"""
|
| 63 |
version = self._versions.get(app, "v1")
|
| 64 |
+
if version == "v1":
|
| 65 |
+
return None, False # v1 is canonical — no drift, no credit/penalty
|
| 66 |
+
|
| 67 |
+
mapping = SCHEMA_MAP.get(app, {}).get(version, {})
|
| 68 |
+
changed = {k: v for k, v in mapping.items() if k != v} # canonical → drifted
|
| 69 |
+
reverse = {v: k for k, v in changed.items()} # drifted → canonical
|
| 70 |
+
|
| 71 |
+
for key in args:
|
| 72 |
+
if key in changed:
|
| 73 |
+
return key, False # Agent used old canonical name on drifted schema → error
|
| 74 |
+
if key in reverse:
|
| 75 |
+
return None, True # Agent correctly used drifted name → adaptation bonus
|
| 76 |
+
|
| 77 |
+
return None, False
|
| 78 |
|
| 79 |
def get_hints(self) -> Dict[str, str]:
|
| 80 |
"""Return partial schema hints visible in observation.
|
| 81 |
+
Reveals 1 changed field per app that has drift (agent must discover the rest)."""
|
| 82 |
hints = {}
|
| 83 |
rng = random.Random(self._seed)
|
| 84 |
for app, version in self._versions.items():
|
| 85 |
+
mapping = SCHEMA_MAP.get(app, {}).get(version, {})
|
|
|
|
| 86 |
changed = {f"{app}.{k}": v for k, v in mapping.items() if k != v}
|
| 87 |
if changed:
|
| 88 |
key = rng.choice(list(changed.keys()))
|
| 89 |
hints[key] = changed[key]
|
| 90 |
+
return hints
|
| 91 |
+
|
| 92 |
+
def get_all_changes(self) -> Dict[str, Dict[str, str]]:
|
| 93 |
+
"""Return all field changes for every app (used by UI schema drift viewer)."""
|
| 94 |
+
result = {}
|
| 95 |
+
for app, version in self._versions.items():
|
| 96 |
+
mapping = SCHEMA_MAP.get(app, {}).get(version, {})
|
| 97 |
+
result[app] = {k: v for k, v in mapping.items() if k != v}
|
| 98 |
+
return result
|
server/workflow_engine.py
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
@dataclass
|
| 2 |
class WorkflowStep:
|
| 3 |
step_id: str
|
|
@@ -5,59 +11,179 @@ class WorkflowStep:
|
|
| 5 |
app: str
|
| 6 |
operation: str
|
| 7 |
# Callable that checks if this step was completed given the app states
|
| 8 |
-
completion_check: Callable[[Dict
|
| 9 |
-
|
| 10 |
-
# Workflow A: Customer Bug → Engineering Fix
|
| 11 |
-
WORKFLOW_A_STEPS = [
|
| 12 |
-
WorkflowStep("A1", "Acknowledge ticket in Zendesk",
|
| 13 |
-
"zendesk", "acknowledge_ticket",
|
| 14 |
-
lambda apps: apps["zendesk"].ticket_acknowledged()),
|
| 15 |
|
| 16 |
-
WorkflowStep("A2", "Escalate to Jira — create linked issue",
|
| 17 |
-
"jira", "create_issue",
|
| 18 |
-
lambda apps: apps["jira"].has_linked_issue()),
|
| 19 |
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
]
|
| 32 |
|
| 33 |
-
#
|
|
|
|
|
|
|
|
|
|
| 34 |
WORKFLOW_B_STEPS = [
|
| 35 |
-
WorkflowStep(
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
]
|
| 40 |
|
| 41 |
-
#
|
|
|
|
|
|
|
|
|
|
| 42 |
WORKFLOW_C_STEPS = [
|
| 43 |
-
WorkflowStep(
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
]
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
class WorkflowEngine:
|
| 50 |
-
WORKFLOWS = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
def start(self, workflow_id: str) -> None:
|
|
|
|
|
|
|
| 53 |
self._steps = self.WORKFLOWS[workflow_id].copy()
|
| 54 |
-
self._completed
|
| 55 |
|
| 56 |
def evaluate(self, apps: Dict) -> float:
|
| 57 |
-
"""Check all steps and return completion ratio (0.0
|
|
|
|
|
|
|
| 58 |
completed = sum(1 for s in self._steps if s.completion_check(apps))
|
| 59 |
self._completed = [s.step_id for s in self._steps if s.completion_check(apps)]
|
| 60 |
return completed / len(self._steps)
|
| 61 |
|
| 62 |
def get_pending(self) -> List[str]:
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Workflow engine — defines and evaluates multi-app workflow completion."""
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from typing import Callable, Dict, List
|
| 5 |
+
|
| 6 |
+
|
| 7 |
@dataclass
|
| 8 |
class WorkflowStep:
|
| 9 |
step_id: str
|
|
|
|
| 11 |
app: str
|
| 12 |
operation: str
|
| 13 |
# Callable that checks if this step was completed given the app states
|
| 14 |
+
completion_check: Callable[[Dict], bool]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
+
# ---------------------------------------------------------------------------
|
| 18 |
+
# Workflow A: Customer Bug Fix (Zendesk → Jira → Salesforce → Workday)
|
| 19 |
+
# Agent role: support
|
| 20 |
+
# ---------------------------------------------------------------------------
|
| 21 |
+
WORKFLOW_A_STEPS = [
|
| 22 |
+
WorkflowStep(
|
| 23 |
+
"A1", "Acknowledge the incoming Zendesk ticket (ZD-001)",
|
| 24 |
+
"zendesk", "acknowledge_ticket",
|
| 25 |
+
lambda apps: apps["zendesk"].ticket_acknowledged(),
|
| 26 |
+
),
|
| 27 |
+
WorkflowStep(
|
| 28 |
+
"A2", "Escalate to Jira — create a new issue linked to ZD-001",
|
| 29 |
+
"jira", "create_issue",
|
| 30 |
+
lambda apps: apps["jira"].has_linked_issue(),
|
| 31 |
+
),
|
| 32 |
+
WorkflowStep(
|
| 33 |
+
"A3", "Verify the customer's account status in Salesforce (ACME-001)",
|
| 34 |
+
"salesforce", "get_account",
|
| 35 |
+
lambda apps: apps["salesforce"].account_checked(),
|
| 36 |
+
),
|
| 37 |
+
WorkflowStep(
|
| 38 |
+
"A4", "Assign the Jira issue to an engineer (JIRA-001)",
|
| 39 |
+
"jira", "assign_owner",
|
| 40 |
+
lambda apps: apps["jira"].issue_assigned(),
|
| 41 |
+
),
|
| 42 |
+
WorkflowStep(
|
| 43 |
+
"A5", "Log the SLA compliance event in Workday",
|
| 44 |
+
"workday", "log_sla_event",
|
| 45 |
+
lambda apps: apps["workday"].sla_logged(),
|
| 46 |
+
),
|
| 47 |
]
|
| 48 |
|
| 49 |
+
# ---------------------------------------------------------------------------
|
| 50 |
+
# Workflow B: Employee Onboarding (Workday → Workday → Salesforce → Zendesk)
|
| 51 |
+
# Agent role: manager
|
| 52 |
+
# ---------------------------------------------------------------------------
|
| 53 |
WORKFLOW_B_STEPS = [
|
| 54 |
+
WorkflowStep(
|
| 55 |
+
"B1", "Create the new employee's onboarding record in Workday (EMP-NEW-001)",
|
| 56 |
+
"workday", "create_onboarding_task",
|
| 57 |
+
lambda apps: apps["workday"].employee_created(),
|
| 58 |
+
),
|
| 59 |
+
WorkflowStep(
|
| 60 |
+
"B2", "Provision Jira access for the new employee via Workday",
|
| 61 |
+
"workday", "provision_access",
|
| 62 |
+
lambda apps: apps["workday"].access_provisioned("jira"),
|
| 63 |
+
),
|
| 64 |
+
WorkflowStep(
|
| 65 |
+
"B3", "Assign the new employee to the correct Salesforce territory team",
|
| 66 |
+
"salesforce", "assign_account_owner",
|
| 67 |
+
lambda apps: apps["salesforce"].team_assigned(),
|
| 68 |
+
),
|
| 69 |
+
WorkflowStep(
|
| 70 |
+
"B4", "Create a Zendesk support agent profile for the new employee",
|
| 71 |
+
"zendesk", "assign_agent",
|
| 72 |
+
lambda apps: apps["zendesk"].profile_created(),
|
| 73 |
+
),
|
| 74 |
]
|
| 75 |
|
| 76 |
+
# ---------------------------------------------------------------------------
|
| 77 |
+
# Workflow C: Churn Risk Alert (Salesforce → Zendesk → Jira → Salesforce)
|
| 78 |
+
# Agent role: support
|
| 79 |
+
# ---------------------------------------------------------------------------
|
| 80 |
WORKFLOW_C_STEPS = [
|
| 81 |
+
WorkflowStep(
|
| 82 |
+
"C1", "Flag at-risk account ACME-003 as churn risk in Salesforce",
|
| 83 |
+
"salesforce", "flag_churn_risk",
|
| 84 |
+
lambda apps: apps["salesforce"].churn_flagged(),
|
| 85 |
+
),
|
| 86 |
+
WorkflowStep(
|
| 87 |
+
"C2", "Query recent support ticket volume for ACME-003 in Zendesk",
|
| 88 |
+
"zendesk", "get_ticket",
|
| 89 |
+
lambda apps: apps["zendesk"].support_queried("ACME-003"),
|
| 90 |
+
),
|
| 91 |
+
WorkflowStep(
|
| 92 |
+
"C3", "Check outstanding Jira bugs linked to ACME-003",
|
| 93 |
+
"jira", "list_issues",
|
| 94 |
+
lambda apps: apps["jira"].bugs_checked(),
|
| 95 |
+
),
|
| 96 |
+
WorkflowStep(
|
| 97 |
+
"C4", "Assign an intervention owner to ACME-003 in Salesforce",
|
| 98 |
+
"salesforce", "assign_account_owner",
|
| 99 |
+
lambda apps: apps["salesforce"].intervention_assigned(),
|
| 100 |
+
),
|
| 101 |
]
|
| 102 |
|
| 103 |
+
# ---------------------------------------------------------------------------
|
| 104 |
+
# Goal descriptions shown to the agent at reset
|
| 105 |
+
# ---------------------------------------------------------------------------
|
| 106 |
+
WORKFLOW_GOALS: Dict[str, str] = {
|
| 107 |
+
"A": (
|
| 108 |
+
"Workflow A — Customer Bug Fix: "
|
| 109 |
+
"A P1 bug has been reported via Zendesk (ticket ZD-001) by customer ACME-001. "
|
| 110 |
+
"Steps required: "
|
| 111 |
+
"(1) acknowledge Zendesk ticket ZD-001, "
|
| 112 |
+
"(2) create a new Jira issue linked to ZD-001, "
|
| 113 |
+
"(3) verify ACME-001's account status in Salesforce, "
|
| 114 |
+
"(4) assign the Jira issue (JIRA-001) to an engineer, "
|
| 115 |
+
"(5) log the SLA compliance event in Workday. "
|
| 116 |
+
"Use list operations if you need to discover record IDs."
|
| 117 |
+
),
|
| 118 |
+
"B": (
|
| 119 |
+
"Workflow B — Employee Onboarding: "
|
| 120 |
+
"A new support engineer has joined the West team. "
|
| 121 |
+
"Employee ID: EMP-NEW-001, Name: Alex Rivera, department: support, territory: west. "
|
| 122 |
+
"Steps required: "
|
| 123 |
+
"(1) create an onboarding record in Workday for EMP-NEW-001, "
|
| 124 |
+
"(2) provision Jira access for EMP-NEW-001 via Workday, "
|
| 125 |
+
"(3) assign EMP-NEW-001 to the correct Salesforce territory (use any ACME-* account in the west region), "
|
| 126 |
+
"(4) create a Zendesk agent profile for EMP-NEW-001. "
|
| 127 |
+
"You have manager-level access."
|
| 128 |
+
),
|
| 129 |
+
"C": (
|
| 130 |
+
"Workflow C — Churn Risk Alert: "
|
| 131 |
+
"Account ACME-003 (GlobalTech) is showing churn signals. "
|
| 132 |
+
"Steps required: "
|
| 133 |
+
"(1) flag ACME-003 as a churn risk in Salesforce, "
|
| 134 |
+
"(2) query recent support tickets for ACME-003 in Zendesk (use customer_id=ACME-003), "
|
| 135 |
+
"(3) list open Jira bugs related to ACME-003, "
|
| 136 |
+
"(4) assign an intervention owner to ACME-003 in Salesforce. "
|
| 137 |
+
"Focus account: ACME-003."
|
| 138 |
+
),
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
# Role each workflow expects the agent to act as
|
| 142 |
+
WORKFLOW_ROLES: Dict[str, str] = {
|
| 143 |
+
"A": "support",
|
| 144 |
+
"B": "manager",
|
| 145 |
+
"C": "support",
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
|
| 149 |
class WorkflowEngine:
|
| 150 |
+
WORKFLOWS = {
|
| 151 |
+
"A": WORKFLOW_A_STEPS,
|
| 152 |
+
"B": WORKFLOW_B_STEPS,
|
| 153 |
+
"C": WORKFLOW_C_STEPS,
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
def __init__(self):
|
| 157 |
+
self._steps: List[WorkflowStep] = []
|
| 158 |
+
self._completed: List[str] = []
|
| 159 |
+
self._workflow_id: str = "A"
|
| 160 |
|
| 161 |
def start(self, workflow_id: str) -> None:
|
| 162 |
+
"""Initialise engine for the given workflow."""
|
| 163 |
+
self._workflow_id = workflow_id
|
| 164 |
self._steps = self.WORKFLOWS[workflow_id].copy()
|
| 165 |
+
self._completed = []
|
| 166 |
|
| 167 |
def evaluate(self, apps: Dict) -> float:
|
| 168 |
+
"""Check all steps and return completion ratio (0.0–1.0)."""
|
| 169 |
+
if not self._steps:
|
| 170 |
+
return 0.0
|
| 171 |
completed = sum(1 for s in self._steps if s.completion_check(apps))
|
| 172 |
self._completed = [s.step_id for s in self._steps if s.completion_check(apps)]
|
| 173 |
return completed / len(self._steps)
|
| 174 |
|
| 175 |
def get_pending(self) -> List[str]:
|
| 176 |
+
"""Return descriptions of not-yet-completed steps."""
|
| 177 |
+
return [s.description for s in self._steps if s.step_id not in self._completed]
|
| 178 |
+
|
| 179 |
+
def get_completed(self) -> List[str]:
|
| 180 |
+
"""Return step IDs that have been completed."""
|
| 181 |
+
return list(self._completed)
|
| 182 |
+
|
| 183 |
+
def get_goal(self) -> str:
|
| 184 |
+
"""Return the natural-language goal description for the active workflow."""
|
| 185 |
+
return WORKFLOW_GOALS.get(self._workflow_id, "Complete the assigned workflow.")
|
| 186 |
+
|
| 187 |
+
def get_role(self) -> str:
|
| 188 |
+
"""Return the expected agent role for RBAC checks."""
|
| 189 |
+
return WORKFLOW_ROLES.get(self._workflow_id, "support")
|
training/grpo_orgos.ipynb
ADDED
|
@@ -0,0 +1,550 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"nbformat": 4,
|
| 3 |
+
"nbformat_minor": 5,
|
| 4 |
+
"metadata": {
|
| 5 |
+
"kernelspec": {
|
| 6 |
+
"display_name": "Python 3",
|
| 7 |
+
"language": "python",
|
| 8 |
+
"name": "python3"
|
| 9 |
+
},
|
| 10 |
+
"language_info": {
|
| 11 |
+
"name": "python",
|
| 12 |
+
"version": "3.10.0"
|
| 13 |
+
},
|
| 14 |
+
"colab": {
|
| 15 |
+
"gpuType": "T4",
|
| 16 |
+
"provenance": []
|
| 17 |
+
},
|
| 18 |
+
"accelerator": "GPU"
|
| 19 |
+
},
|
| 20 |
+
"cells": [
|
| 21 |
+
{
|
| 22 |
+
"cell_type": "markdown",
|
| 23 |
+
"id": "title",
|
| 24 |
+
"metadata": {},
|
| 25 |
+
"source": [
|
| 26 |
+
"# OrgOS GRPO Training Notebook\n",
|
| 27 |
+
"\n",
|
| 28 |
+
"**Environment:** OrgOS — Multi-App Enterprise RL Environment \n",
|
| 29 |
+
"**Model:** `Qwen/Qwen2.5-3B-Instruct` (4-bit LoRA via Unsloth) \n",
|
| 30 |
+
"**Algorithm:** GRPO (Group Relative Policy Optimization) via HuggingFace TRL \n",
|
| 31 |
+
"**Hardware:** Colab T4 (free tier compatible) \n",
|
| 32 |
+
"\n",
|
| 33 |
+
"## What this notebook does\n",
|
| 34 |
+
"1. Installs dependencies (Unsloth + TRL)\n",
|
| 35 |
+
"2. Loads Qwen2.5-3B-Instruct with 4-bit LoRA\n",
|
| 36 |
+
"3. Collects **baseline rollouts** (untrained model) on Workflows A & C\n",
|
| 37 |
+
"4. Fine-tunes with **GRPOTrainer** using OrgOS dense rewards\n",
|
| 38 |
+
"5. Collects **post-training rollouts** and computes score improvement\n",
|
| 39 |
+
"6. Plots the **before/after reward curve** for the demo\n",
|
| 40 |
+
"\n",
|
| 41 |
+
"**Key training signal:** The schema drift mechanic creates a sharp signal gap —\n",
|
| 42 |
+
"an untrained model uses stale canonical field names (−0.20 per step),\n",
|
| 43 |
+
"while a GRPO-trained model learns to read `schema_hints` first (+reward).\n",
|
| 44 |
+
"This produces a clear, visually compelling before/after improvement."
|
| 45 |
+
]
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"cell_type": "markdown",
|
| 49 |
+
"id": "sec1",
|
| 50 |
+
"metadata": {},
|
| 51 |
+
"source": ["## 1. Install Dependencies"]
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"cell_type": "code",
|
| 55 |
+
"execution_count": null,
|
| 56 |
+
"id": "install",
|
| 57 |
+
"metadata": {},
|
| 58 |
+
"outputs": [],
|
| 59 |
+
"source": [
|
| 60 |
+
"# Install Unsloth (optimised 4-bit LLM training) + TRL (GRPO)\n",
|
| 61 |
+
"!pip install -q unsloth[colab-new] trl>=0.9.0 peft accelerate bitsandbytes\n",
|
| 62 |
+
"!pip install -q fastapi uvicorn httpx openai pydantic\n",
|
| 63 |
+
"!pip install -q matplotlib numpy\n",
|
| 64 |
+
"\n",
|
| 65 |
+
"# Clone / mount the OrgOS repo\n",
|
| 66 |
+
"import os\n",
|
| 67 |
+
"if not os.path.exists('/content/openEnv'):\n",
|
| 68 |
+
" !git clone https://huggingface.co/spaces/YOUR_HF_USERNAME/orgos-openenv /content/openEnv\n",
|
| 69 |
+
" # Alternatively: upload the repo zip and unzip it here\n",
|
| 70 |
+
"\n",
|
| 71 |
+
"os.chdir('/content/openEnv')\n",
|
| 72 |
+
"print('Working directory:', os.getcwd())"
|
| 73 |
+
]
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"cell_type": "markdown",
|
| 77 |
+
"id": "sec2",
|
| 78 |
+
"metadata": {},
|
| 79 |
+
"source": ["## 2. Load Model with Unsloth 4-bit LoRA"]
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"cell_type": "code",
|
| 83 |
+
"execution_count": null,
|
| 84 |
+
"id": "load_model",
|
| 85 |
+
"metadata": {},
|
| 86 |
+
"outputs": [],
|
| 87 |
+
"source": [
|
| 88 |
+
"from unsloth import FastLanguageModel\n",
|
| 89 |
+
"import torch\n",
|
| 90 |
+
"\n",
|
| 91 |
+
"MAX_SEQ_LEN = 2048\n",
|
| 92 |
+
"MODEL_NAME = 'Qwen/Qwen2.5-3B-Instruct'\n",
|
| 93 |
+
"\n",
|
| 94 |
+
"model, tokenizer = FastLanguageModel.from_pretrained(\n",
|
| 95 |
+
" model_name = MODEL_NAME,\n",
|
| 96 |
+
" max_seq_length = MAX_SEQ_LEN,\n",
|
| 97 |
+
" dtype = None, # auto-detect\n",
|
| 98 |
+
" load_in_4bit = True,\n",
|
| 99 |
+
")\n",
|
| 100 |
+
"\n",
|
| 101 |
+
"# Add LoRA adapters\n",
|
| 102 |
+
"model = FastLanguageModel.get_peft_model(\n",
|
| 103 |
+
" model,\n",
|
| 104 |
+
" r = 16,\n",
|
| 105 |
+
" target_modules = ['q_proj', 'k_proj', 'v_proj', 'o_proj',\n",
|
| 106 |
+
" 'gate_proj', 'up_proj', 'down_proj'],\n",
|
| 107 |
+
" lora_alpha = 16,\n",
|
| 108 |
+
" lora_dropout = 0,\n",
|
| 109 |
+
" bias = 'none',\n",
|
| 110 |
+
" use_gradient_checkpointing = 'unsloth',\n",
|
| 111 |
+
" random_state = 42,\n",
|
| 112 |
+
")\n",
|
| 113 |
+
"print(f'Model loaded — trainable params: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}')"
|
| 114 |
+
]
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"cell_type": "markdown",
|
| 118 |
+
"id": "sec3",
|
| 119 |
+
"metadata": {},
|
| 120 |
+
"source": ["## 3. Start the OrgOS Environment Server (subprocess)"]
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"cell_type": "code",
|
| 124 |
+
"execution_count": null,
|
| 125 |
+
"id": "start_server",
|
| 126 |
+
"metadata": {},
|
| 127 |
+
"outputs": [],
|
| 128 |
+
"source": [
|
| 129 |
+
"import subprocess, time, httpx\n",
|
| 130 |
+
"\n",
|
| 131 |
+
"server_proc = subprocess.Popen(\n",
|
| 132 |
+
" ['python', '-m', 'uvicorn', 'server.app:app', '--host', '0.0.0.0', '--port', '8000'],\n",
|
| 133 |
+
" stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL\n",
|
| 134 |
+
")\n",
|
| 135 |
+
"time.sleep(3)\n",
|
| 136 |
+
"\n",
|
| 137 |
+
"health = httpx.get('http://localhost:8000/health').json()\n",
|
| 138 |
+
"assert health['status'] == 'healthy', f'Server not healthy: {health}'\n",
|
| 139 |
+
"print('OrgOS server running — health:', health)"
|
| 140 |
+
]
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"cell_type": "markdown",
|
| 144 |
+
"id": "sec4",
|
| 145 |
+
"metadata": {},
|
| 146 |
+
"source": ["## 4. Rollout Harness (collect trajectories)"]
|
| 147 |
+
},
|
| 148 |
+
{
|
| 149 |
+
"cell_type": "code",
|
| 150 |
+
"execution_count": null,
|
| 151 |
+
"id": "rollout_harness",
|
| 152 |
+
"metadata": {},
|
| 153 |
+
"outputs": [],
|
| 154 |
+
"source": [
|
| 155 |
+
"import json, re, sys\n",
|
| 156 |
+
"from typing import List, Dict, Tuple\n",
|
| 157 |
+
"\n",
|
| 158 |
+
"SYSTEM_PROMPT = open('inference.py').read().split('SYSTEM_PROMPT = \\\"\\\"\\\"')[1].split('\\\"\\\"\\\"')[0]\n",
|
| 159 |
+
"\n",
|
| 160 |
+
"def obs_to_text(obs: dict) -> str:\n",
|
| 161 |
+
" \"\"\"Convert observation dict to text for the model.\"\"\"\n",
|
| 162 |
+
" hints = obs.get('schema_hints', {})\n",
|
| 163 |
+
" pending = obs.get('pending_steps', [])\n",
|
| 164 |
+
" return (\n",
|
| 165 |
+
" f\"current_score: {obs['current_score']}\\n\"\n",
|
| 166 |
+
" f\"step_count: {obs['step_count']}\\n\"\n",
|
| 167 |
+
" f\"workflow_id: {obs['workflow_id']}\\n\\n\"\n",
|
| 168 |
+
" f\"=== WORKFLOW GOAL ===\\n{obs['workflow_goal']}\\n\\n\"\n",
|
| 169 |
+
" f\"=== PENDING STEPS ===\\n\" + ('\\n'.join(f'- {s}' for s in pending) or '(done!)') + \"\\n\\n\"\n",
|
| 170 |
+
" f\"=== SCHEMA HINTS ===\\n{json.dumps(hints, indent=2)}\\n\\n\"\n",
|
| 171 |
+
" f\"=== ACTIVE RULES ===\\n{json.dumps(obs.get('active_rules', {}), indent=2)}\\n\\n\"\n",
|
| 172 |
+
" f\"=== LAST MESSAGE ===\\n{obs['message']}\\n\"\n",
|
| 173 |
+
" )\n",
|
| 174 |
+
"\n",
|
| 175 |
+
"def generate_action(prompt_messages: List[Dict], max_tokens=256) -> str:\n",
|
| 176 |
+
" \"\"\"Run the model to produce an action JSON string.\"\"\"\n",
|
| 177 |
+
" from transformers import GenerationConfig\n",
|
| 178 |
+
" # Format as chat\n",
|
| 179 |
+
" text = tokenizer.apply_chat_template(\n",
|
| 180 |
+
" prompt_messages, tokenize=False, add_generation_prompt=True\n",
|
| 181 |
+
" )\n",
|
| 182 |
+
" inputs = tokenizer(text, return_tensors='pt').to(model.device)\n",
|
| 183 |
+
" with torch.no_grad():\n",
|
| 184 |
+
" out = model.generate(\n",
|
| 185 |
+
" **inputs,\n",
|
| 186 |
+
" max_new_tokens = max_tokens,\n",
|
| 187 |
+
" temperature = 0.7,\n",
|
| 188 |
+
" do_sample = True,\n",
|
| 189 |
+
" pad_token_id = tokenizer.eos_token_id,\n",
|
| 190 |
+
" )\n",
|
| 191 |
+
" decoded = tokenizer.decode(out[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)\n",
|
| 192 |
+
" return decoded.strip()\n",
|
| 193 |
+
"\n",
|
| 194 |
+
"def run_episode(workflow_id: str, max_steps: int = 15) -> Tuple[List[dict], float]:\n",
|
| 195 |
+
" \"\"\"\n",
|
| 196 |
+
" Run one episode. Returns (trajectory, final_score).\n",
|
| 197 |
+
" trajectory = list of {'messages': [...], 'reward': float}\n",
|
| 198 |
+
" \"\"\"\n",
|
| 199 |
+
" resp = httpx.post('http://localhost:8000/reset', json={'workflow_id': workflow_id})\n",
|
| 200 |
+
" obs = resp.json()['observation']\n",
|
| 201 |
+
" history = []\n",
|
| 202 |
+
" trajectory = []\n",
|
| 203 |
+
" cumulative_reward = 0.0\n",
|
| 204 |
+
"\n",
|
| 205 |
+
" for step_i in range(max_steps):\n",
|
| 206 |
+
" if obs['done']:\n",
|
| 207 |
+
" break\n",
|
| 208 |
+
"\n",
|
| 209 |
+
" obs_text = obs_to_text(obs)\n",
|
| 210 |
+
" history.append({'role': 'user', 'content': obs_text})\n",
|
| 211 |
+
"\n",
|
| 212 |
+
" msgs = [{'role': 'system', 'content': SYSTEM_PROMPT}] + history[-10:]\n",
|
| 213 |
+
" action_str = generate_action(msgs)\n",
|
| 214 |
+
"\n",
|
| 215 |
+
" history.append({'role': 'assistant', 'content': action_str})\n",
|
| 216 |
+
"\n",
|
| 217 |
+
" # Parse action\n",
|
| 218 |
+
" action = None\n",
|
| 219 |
+
" try:\n",
|
| 220 |
+
" action = json.loads(action_str)\n",
|
| 221 |
+
" except:\n",
|
| 222 |
+
" m = re.search(r'\\{.*\\}', action_str, re.DOTALL)\n",
|
| 223 |
+
" if m:\n",
|
| 224 |
+
" try: action = json.loads(m.group())\n",
|
| 225 |
+
" except: pass\n",
|
| 226 |
+
"\n",
|
| 227 |
+
" if action is None:\n",
|
| 228 |
+
" cumulative_reward -= 0.05\n",
|
| 229 |
+
" break\n",
|
| 230 |
+
"\n",
|
| 231 |
+
" result = httpx.post('http://localhost:8000/step', json=action).json()\n",
|
| 232 |
+
" obs = result['observation']\n",
|
| 233 |
+
" reward = result['reward']\n",
|
| 234 |
+
" cumulative_reward += reward\n",
|
| 235 |
+
"\n",
|
| 236 |
+
" # Store step for GRPO\n",
|
| 237 |
+
" trajectory.append({\n",
|
| 238 |
+
" 'messages': msgs + [{'role': 'assistant', 'content': action_str}],\n",
|
| 239 |
+
" 'reward': reward,\n",
|
| 240 |
+
" })\n",
|
| 241 |
+
"\n",
|
| 242 |
+
" if obs['done']:\n",
|
| 243 |
+
" break\n",
|
| 244 |
+
"\n",
|
| 245 |
+
" return trajectory, obs.get('current_score', 0.001)\n",
|
| 246 |
+
"\n",
|
| 247 |
+
"print('Rollout harness ready.')"
|
| 248 |
+
]
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"cell_type": "markdown",
|
| 252 |
+
"id": "sec5",
|
| 253 |
+
"metadata": {},
|
| 254 |
+
"source": ["## 5. Collect Baseline Rollouts (Pre-Training)"]
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"cell_type": "code",
|
| 258 |
+
"execution_count": null,
|
| 259 |
+
"id": "baseline_rollouts",
|
| 260 |
+
"metadata": {},
|
| 261 |
+
"outputs": [],
|
| 262 |
+
"source": [
|
| 263 |
+
"import numpy as np\n",
|
| 264 |
+
"\n",
|
| 265 |
+
"N_BASELINE = 30 # 30 episodes pre-training (10 per workflow)\n",
|
| 266 |
+
"\n",
|
| 267 |
+
"baseline_scores = {'A': [], 'B': [], 'C': []}\n",
|
| 268 |
+
"all_trajectories = []\n",
|
| 269 |
+
"\n",
|
| 270 |
+
"print('Collecting baseline rollouts...')\n",
|
| 271 |
+
"for wf in ['A', 'B', 'C']:\n",
|
| 272 |
+
" for ep in range(N_BASELINE // 3):\n",
|
| 273 |
+
" traj, score = run_episode(wf)\n",
|
| 274 |
+
" baseline_scores[wf].append(score)\n",
|
| 275 |
+
" all_trajectories.extend(traj)\n",
|
| 276 |
+
" print(f' Workflow {wf} ep {ep+1}: score={score:.4f}', end='\\r')\n",
|
| 277 |
+
" print(f' Workflow {wf}: mean={np.mean(baseline_scores[wf]):.4f} ± {np.std(baseline_scores[wf]):.4f}')\n",
|
| 278 |
+
"\n",
|
| 279 |
+
"print(f'\\nTotal baseline episodes: {N_BASELINE}')\n",
|
| 280 |
+
"print(f'Total trajectory steps: {len(all_trajectories)}')\n",
|
| 281 |
+
"print(f'Overall baseline mean: {np.mean([s for v in baseline_scores.values() for s in v]):.4f}')"
|
| 282 |
+
]
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"cell_type": "markdown",
|
| 286 |
+
"id": "sec6",
|
| 287 |
+
"metadata": {},
|
| 288 |
+
"source": ["## 6. Build GRPO Dataset from Trajectories"]
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"cell_type": "code",
|
| 292 |
+
"execution_count": null,
|
| 293 |
+
"id": "build_dataset",
|
| 294 |
+
"metadata": {},
|
| 295 |
+
"outputs": [],
|
| 296 |
+
"source": [
|
| 297 |
+
"from datasets import Dataset\n",
|
| 298 |
+
"\n",
|
| 299 |
+
"def trajectories_to_dataset(trajectories: List[dict]) -> Dataset:\n",
|
| 300 |
+
" \"\"\"\n",
|
| 301 |
+
" Convert trajectory steps into a GRPO-compatible dataset.\n",
|
| 302 |
+
" Each row = one (prompt, completion, reward) triple.\n",
|
| 303 |
+
" \"\"\"\n",
|
| 304 |
+
" rows = []\n",
|
| 305 |
+
" for step in trajectories:\n",
|
| 306 |
+
" messages = step['messages']\n",
|
| 307 |
+
" reward = step['reward']\n",
|
| 308 |
+
" # Separate prompt (all but last assistant turn) from completion\n",
|
| 309 |
+
" prompt_msgs = messages[:-1]\n",
|
| 310 |
+
" completion = messages[-1]['content']\n",
|
| 311 |
+
" prompt_text = tokenizer.apply_chat_template(\n",
|
| 312 |
+
" prompt_msgs, tokenize=False, add_generation_prompt=True\n",
|
| 313 |
+
" )\n",
|
| 314 |
+
" rows.append({'prompt': prompt_text, 'completion': completion, 'reward': reward})\n",
|
| 315 |
+
" return Dataset.from_list(rows)\n",
|
| 316 |
+
"\n",
|
| 317 |
+
"train_dataset = trajectories_to_dataset(all_trajectories)\n",
|
| 318 |
+
"print(f'Training dataset: {len(train_dataset)} examples')\n",
|
| 319 |
+
"print(f'Reward range: [{min(train_dataset[\"reward\"]):.4f}, {max(train_dataset[\"reward\"]):.4f}]')\n",
|
| 320 |
+
"print(f'Mean reward: {np.mean(train_dataset[\"reward\"]):.4f}')\n",
|
| 321 |
+
"train_dataset[0]"
|
| 322 |
+
]
|
| 323 |
+
},
|
| 324 |
+
{
|
| 325 |
+
"cell_type": "markdown",
|
| 326 |
+
"id": "sec7",
|
| 327 |
+
"metadata": {},
|
| 328 |
+
"source": ["## 7. GRPO Training"]
|
| 329 |
+
},
|
| 330 |
+
{
|
| 331 |
+
"cell_type": "code",
|
| 332 |
+
"execution_count": null,
|
| 333 |
+
"id": "grpo_training",
|
| 334 |
+
"metadata": {},
|
| 335 |
+
"outputs": [],
|
| 336 |
+
"source": [
|
| 337 |
+
"from trl import GRPOConfig, GRPOTrainer\n",
|
| 338 |
+
"\n",
|
| 339 |
+
"# Reward function for GRPO: directly use the env's per-step reward\n",
|
| 340 |
+
"def reward_fn(completions: List[str], prompts: List[str], **kwargs) -> List[float]:\n",
|
| 341 |
+
" \"\"\"GRPO reward function — called on each group of completions.\"\"\"\n",
|
| 342 |
+
" # In GRPO the rewards come from rollouts; we pre-compute them above.\n",
|
| 343 |
+
" # This function returns the rewards already stored in the dataset.\n",
|
| 344 |
+
" return kwargs.get('reward', [0.0] * len(completions))\n",
|
| 345 |
+
"\n",
|
| 346 |
+
"grpo_config = GRPOConfig(\n",
|
| 347 |
+
" output_dir = './orgos_grpo_ckpt',\n",
|
| 348 |
+
" num_train_epochs = 3,\n",
|
| 349 |
+
" per_device_train_batch_size = 2,\n",
|
| 350 |
+
" gradient_accumulation_steps = 4,\n",
|
| 351 |
+
" learning_rate = 5e-5,\n",
|
| 352 |
+
" warmup_steps = 10,\n",
|
| 353 |
+
" logging_steps = 5,\n",
|
| 354 |
+
" save_steps = 50,\n",
|
| 355 |
+
" fp16 = not torch.cuda.is_bf16_supported(),\n",
|
| 356 |
+
" bf16 = torch.cuda.is_bf16_supported(),\n",
|
| 357 |
+
" max_grad_norm = 1.0,\n",
|
| 358 |
+
" # GRPO-specific\n",
|
| 359 |
+
" num_generations = 4, # group size G\n",
|
| 360 |
+
" max_new_tokens = 256,\n",
|
| 361 |
+
" temperature = 0.7,\n",
|
| 362 |
+
" beta = 0.04, # KL penalty\n",
|
| 363 |
+
" report_to = 'none',\n",
|
| 364 |
+
" seed = 42,\n",
|
| 365 |
+
")\n",
|
| 366 |
+
"\n",
|
| 367 |
+
"trainer = GRPOTrainer(\n",
|
| 368 |
+
" model = model,\n",
|
| 369 |
+
" args = grpo_config,\n",
|
| 370 |
+
" reward_funcs = reward_fn,\n",
|
| 371 |
+
" train_dataset = train_dataset,\n",
|
| 372 |
+
" tokenizer = tokenizer,\n",
|
| 373 |
+
")\n",
|
| 374 |
+
"\n",
|
| 375 |
+
"print('Starting GRPO training...')\n",
|
| 376 |
+
"train_result = trainer.train()\n",
|
| 377 |
+
"print('Training complete!')\n",
|
| 378 |
+
"print(train_result.metrics)"
|
| 379 |
+
]
|
| 380 |
+
},
|
| 381 |
+
{
|
| 382 |
+
"cell_type": "markdown",
|
| 383 |
+
"id": "sec8",
|
| 384 |
+
"metadata": {},
|
| 385 |
+
"source": ["## 8. Collect Post-Training Rollouts"]
|
| 386 |
+
},
|
| 387 |
+
{
|
| 388 |
+
"cell_type": "code",
|
| 389 |
+
"execution_count": null,
|
| 390 |
+
"id": "posttraining_rollouts",
|
| 391 |
+
"metadata": {},
|
| 392 |
+
"outputs": [],
|
| 393 |
+
"source": [
|
| 394 |
+
"# Switch model to inference mode\n",
|
| 395 |
+
"FastLanguageModel.for_inference(model)\n",
|
| 396 |
+
"\n",
|
| 397 |
+
"N_EVAL = 30\n",
|
| 398 |
+
"post_scores = {'A': [], 'B': [], 'C': []}\n",
|
| 399 |
+
"\n",
|
| 400 |
+
"print('Collecting post-training rollouts...')\n",
|
| 401 |
+
"for wf in ['A', 'B', 'C']:\n",
|
| 402 |
+
" for ep in range(N_EVAL // 3):\n",
|
| 403 |
+
" _, score = run_episode(wf)\n",
|
| 404 |
+
" post_scores[wf].append(score)\n",
|
| 405 |
+
" print(f' Workflow {wf} ep {ep+1}: score={score:.4f}', end='\\r')\n",
|
| 406 |
+
" print(f' Workflow {wf}: mean={np.mean(post_scores[wf]):.4f} ± {np.std(post_scores[wf]):.4f}')\n",
|
| 407 |
+
"\n",
|
| 408 |
+
"print(f'\\nOverall post-training mean: {np.mean([s for v in post_scores.values() for s in v]):.4f}')"
|
| 409 |
+
]
|
| 410 |
+
},
|
| 411 |
+
{
|
| 412 |
+
"cell_type": "markdown",
|
| 413 |
+
"id": "sec9",
|
| 414 |
+
"metadata": {},
|
| 415 |
+
"source": ["## 9. Plot Before/After Reward Curves"]
|
| 416 |
+
},
|
| 417 |
+
{
|
| 418 |
+
"cell_type": "code",
|
| 419 |
+
"execution_count": null,
|
| 420 |
+
"id": "plot_curves",
|
| 421 |
+
"metadata": {},
|
| 422 |
+
"outputs": [],
|
| 423 |
+
"source": [
|
| 424 |
+
"import matplotlib.pyplot as plt\n",
|
| 425 |
+
"import matplotlib.gridspec as gridspec\n",
|
| 426 |
+
"\n",
|
| 427 |
+
"fig = plt.figure(figsize=(14, 8), facecolor='#0f172a')\n",
|
| 428 |
+
"fig.suptitle('OrgOS: Before vs After GRPO Training', fontsize=15,\n",
|
| 429 |
+
" color='white', fontweight='bold', y=0.98)\n",
|
| 430 |
+
"\n",
|
| 431 |
+
"gs = gridspec.GridSpec(2, 3, figure=fig, hspace=0.45, wspace=0.35)\n",
|
| 432 |
+
"\n",
|
| 433 |
+
"COLORS = {'before': '#f87171', 'after': '#34d399', 'bg': '#1e293b', 'grid': '#334155'}\n",
|
| 434 |
+
"WF_LABELS = {'A': 'Workflow A\\nCustomer Bug Fix',\n",
|
| 435 |
+
" 'B': 'Workflow B\\nEmployee Onboarding',\n",
|
| 436 |
+
" 'C': 'Workflow C\\nChurn Risk Alert'}\n",
|
| 437 |
+
"\n",
|
| 438 |
+
"for col, wf in enumerate(['A', 'B', 'C']):\n",
|
| 439 |
+
" ax = fig.add_subplot(gs[0, col])\n",
|
| 440 |
+
" ax.set_facecolor(COLORS['bg'])\n",
|
| 441 |
+
" ax.grid(color=COLORS['grid'], linewidth=0.5, alpha=0.7)\n",
|
| 442 |
+
"\n",
|
| 443 |
+
" before = baseline_scores[wf]\n",
|
| 444 |
+
" after = post_scores[wf]\n",
|
| 445 |
+
"\n",
|
| 446 |
+
" ax.plot(before, color=COLORS['before'], linewidth=1.5, alpha=0.8, label='Before GRPO')\n",
|
| 447 |
+
" ax.plot(after, color=COLORS['after'], linewidth=1.5, alpha=0.8, label='After GRPO')\n",
|
| 448 |
+
"\n",
|
| 449 |
+
" ax.axhline(np.mean(before), color=COLORS['before'], linestyle='--', linewidth=1, alpha=0.5)\n",
|
| 450 |
+
" ax.axhline(np.mean(after), color=COLORS['after'], linestyle='--', linewidth=1, alpha=0.5)\n",
|
| 451 |
+
"\n",
|
| 452 |
+
" delta = np.mean(after) - np.mean(before)\n",
|
| 453 |
+
" ax.set_title(WF_LABELS[wf] + f'\\n(Δ = {delta:+.4f})', color='white', fontsize=9)\n",
|
| 454 |
+
" ax.set_xlabel('Episode', color='#94a3b8', fontsize=8)\n",
|
| 455 |
+
" ax.set_ylabel('Final Score', color='#94a3b8', fontsize=8)\n",
|
| 456 |
+
" ax.tick_params(colors='#64748b', labelsize=7)\n",
|
| 457 |
+
" ax.set_ylim(0, 1)\n",
|
| 458 |
+
" ax.legend(fontsize=7, facecolor='#1e293b', labelcolor='white',\n",
|
| 459 |
+
" edgecolor='#475569', framealpha=0.8)\n",
|
| 460 |
+
" for spine in ax.spines.values():\n",
|
| 461 |
+
" spine.set_edgecolor('#334155')\n",
|
| 462 |
+
"\n",
|
| 463 |
+
"# Bottom row: combined histogram\n",
|
| 464 |
+
"ax_hist = fig.add_subplot(gs[1, :])\n",
|
| 465 |
+
"ax_hist.set_facecolor(COLORS['bg'])\n",
|
| 466 |
+
"ax_hist.grid(color=COLORS['grid'], linewidth=0.5, alpha=0.5, axis='x')\n",
|
| 467 |
+
"\n",
|
| 468 |
+
"all_before = [s for v in baseline_scores.values() for s in v]\n",
|
| 469 |
+
"all_after = [s for v in post_scores.values() for s in v]\n",
|
| 470 |
+
"\n",
|
| 471 |
+
"bins = np.linspace(0, 1, 25)\n",
|
| 472 |
+
"ax_hist.hist(all_before, bins=bins, color=COLORS['before'], alpha=0.6, label=f'Before GRPO (mean={np.mean(all_before):.4f})', edgecolor='none')\n",
|
| 473 |
+
"ax_hist.hist(all_after, bins=bins, color=COLORS['after'], alpha=0.6, label=f'After GRPO (mean={np.mean(all_after):.4f})', edgecolor='none')\n",
|
| 474 |
+
"ax_hist.axvline(np.mean(all_before), color=COLORS['before'], linestyle='--', linewidth=1.5)\n",
|
| 475 |
+
"ax_hist.axvline(np.mean(all_after), color=COLORS['after'], linestyle='--', linewidth=1.5)\n",
|
| 476 |
+
"\n",
|
| 477 |
+
"ax_hist.set_title('Score Distribution Across All Workflows', color='white', fontsize=10)\n",
|
| 478 |
+
"ax_hist.set_xlabel('Final Score', color='#94a3b8', fontsize=9)\n",
|
| 479 |
+
"ax_hist.set_ylabel('Count', color='#94a3b8', fontsize=9)\n",
|
| 480 |
+
"ax_hist.tick_params(colors='#64748b', labelsize=8)\n",
|
| 481 |
+
"ax_hist.legend(fontsize=9, facecolor='#1e293b', labelcolor='white',\n",
|
| 482 |
+
" edgecolor='#475569', framealpha=0.9)\n",
|
| 483 |
+
"for spine in ax_hist.spines.values():\n",
|
| 484 |
+
" spine.set_edgecolor('#334155')\n",
|
| 485 |
+
"\n",
|
| 486 |
+
"plt.savefig('before_after_curves.png', dpi=150, bbox_inches='tight',\n",
|
| 487 |
+
" facecolor='#0f172a', edgecolor='none')\n",
|
| 488 |
+
"plt.show()\n",
|
| 489 |
+
"print('Saved: before_after_curves.png')"
|
| 490 |
+
]
|
| 491 |
+
},
|
| 492 |
+
{
|
| 493 |
+
"cell_type": "markdown",
|
| 494 |
+
"id": "sec10",
|
| 495 |
+
"metadata": {},
|
| 496 |
+
"source": ["## 10. Save LoRA Adapter & Upload to HuggingFace"]
|
| 497 |
+
},
|
| 498 |
+
{
|
| 499 |
+
"cell_type": "code",
|
| 500 |
+
"execution_count": null,
|
| 501 |
+
"id": "save_model",
|
| 502 |
+
"metadata": {},
|
| 503 |
+
"outputs": [],
|
| 504 |
+
"source": [
|
| 505 |
+
"# Save LoRA adapter locally\n",
|
| 506 |
+
"model.save_pretrained('orgos_lora_adapter')\n",
|
| 507 |
+
"tokenizer.save_pretrained('orgos_lora_adapter')\n",
|
| 508 |
+
"print('LoRA adapter saved to ./orgos_lora_adapter')\n",
|
| 509 |
+
"\n",
|
| 510 |
+
"# Optionally push to HuggingFace Hub\n",
|
| 511 |
+
"# from huggingface_hub import login\n",
|
| 512 |
+
"# login(token=os.environ['HF_TOKEN'])\n",
|
| 513 |
+
"# model.push_to_hub('YOUR_HF_USERNAME/orgos-qwen25-3b-grpo-lora')\n",
|
| 514 |
+
"# tokenizer.push_to_hub('YOUR_HF_USERNAME/orgos-qwen25-3b-grpo-lora')\n",
|
| 515 |
+
"# print('Pushed to HuggingFace Hub!')"
|
| 516 |
+
]
|
| 517 |
+
},
|
| 518 |
+
{
|
| 519 |
+
"cell_type": "markdown",
|
| 520 |
+
"id": "sec11",
|
| 521 |
+
"metadata": {},
|
| 522 |
+
"source": [
|
| 523 |
+
"## 11. Summary\n",
|
| 524 |
+
"\n",
|
| 525 |
+
"```\n",
|
| 526 |
+
"OrgOS GRPO Training Summary\n",
|
| 527 |
+
"============================\n",
|
| 528 |
+
"Model: Qwen2.5-3B-Instruct + 4-bit LoRA\n",
|
| 529 |
+
"Algorithm: GRPO (Group Relative Policy Optimization)\n",
|
| 530 |
+
"Epochs: 3\n",
|
| 531 |
+
"Episodes: 30 baseline + 30 post-training\n",
|
| 532 |
+
"\n",
|
| 533 |
+
"Key result: The GRPO-trained model learns to:\n",
|
| 534 |
+
" 1. Read schema_hints before constructing action args\n",
|
| 535 |
+
" 2. Use drifted field names (e.g. 'severity' not 'priority')\n",
|
| 536 |
+
" 3. Complete workflow steps in the correct order\n",
|
| 537 |
+
" 4. Avoid RBAC violations by checking role constraints\n",
|
| 538 |
+
"\n",
|
| 539 |
+
"This produces a clear, measurable improvement visible in\n",
|
| 540 |
+
"before_after_curves.png — the core evidence for judging.\n",
|
| 541 |
+
"```\n",
|
| 542 |
+
"\n",
|
| 543 |
+
"**Artefacts produced:**\n",
|
| 544 |
+
"- `before_after_curves.png` — the money chart for the pitch\n",
|
| 545 |
+
"- `orgos_lora_adapter/` — the trained LoRA weights\n",
|
| 546 |
+
"- `baseline_scores.json` — raw score data"
|
| 547 |
+
]
|
| 548 |
+
}
|
| 549 |
+
]
|
| 550 |
+
}
|
ui/index.html
ADDED
|
@@ -0,0 +1,651 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en" x-data="orgos()" x-init="init()">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8" />
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 6 |
+
<title>OrgOS — Multi-App Enterprise RL Environment</title>
|
| 7 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
| 8 |
+
<script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
|
| 9 |
+
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
|
| 10 |
+
<style>
|
| 11 |
+
[x-cloak] { display: none !important; }
|
| 12 |
+
|
| 13 |
+
body { font-family: 'JetBrains Mono', 'Fira Code', monospace; }
|
| 14 |
+
|
| 15 |
+
.app-tab.active { @apply border-b-2; }
|
| 16 |
+
.step-done { color: #22c55e; }
|
| 17 |
+
.step-active { color: #fbbf24; }
|
| 18 |
+
.step-pending{ color: #475569; }
|
| 19 |
+
|
| 20 |
+
/* Scrollbar styling */
|
| 21 |
+
::-webkit-scrollbar { width: 4px; height: 4px; }
|
| 22 |
+
::-webkit-scrollbar-track { background: #1e293b; }
|
| 23 |
+
::-webkit-scrollbar-thumb { background: #334155; border-radius: 2px; }
|
| 24 |
+
|
| 25 |
+
/* Log entry fade-in */
|
| 26 |
+
@keyframes fadeIn { from { opacity: 0; transform: translateY(4px); } to { opacity: 1; } }
|
| 27 |
+
.log-entry { animation: fadeIn 0.2s ease; }
|
| 28 |
+
|
| 29 |
+
/* Score pulse when updating */
|
| 30 |
+
@keyframes scorePulse {
|
| 31 |
+
0%, 100% { color: #38bdf8; }
|
| 32 |
+
50% { color: #7dd3fc; }
|
| 33 |
+
}
|
| 34 |
+
.score-updated { animation: scorePulse 0.4s ease; }
|
| 35 |
+
</style>
|
| 36 |
+
</head>
|
| 37 |
+
|
| 38 |
+
<body class="bg-slate-950 text-slate-300 min-h-screen">
|
| 39 |
+
|
| 40 |
+
<!-- ================================================================
|
| 41 |
+
TOP BAR
|
| 42 |
+
================================================================ -->
|
| 43 |
+
<header class="bg-slate-900 border-b border-slate-800 px-4 py-3 flex items-center gap-4">
|
| 44 |
+
<!-- Logo -->
|
| 45 |
+
<div class="flex items-center gap-2 mr-4">
|
| 46 |
+
<div class="w-7 h-7 rounded bg-sky-500 flex items-center justify-center text-white font-bold text-sm">O</div>
|
| 47 |
+
<span class="text-white font-semibold text-sm tracking-wide">OrgOS</span>
|
| 48 |
+
<span class="text-slate-500 text-xs">Enterprise RL Environment</span>
|
| 49 |
+
</div>
|
| 50 |
+
|
| 51 |
+
<!-- Workflow selector -->
|
| 52 |
+
<div class="flex items-center gap-2">
|
| 53 |
+
<label class="text-xs text-slate-500 uppercase tracking-widest">Workflow</label>
|
| 54 |
+
<select x-model="selectedWorkflow"
|
| 55 |
+
class="bg-slate-800 border border-slate-700 text-slate-200 text-xs rounded px-2 py-1 focus:outline-none focus:border-sky-500">
|
| 56 |
+
<option value="A">A — Customer Bug Fix</option>
|
| 57 |
+
<option value="B">B — Employee Onboarding</option>
|
| 58 |
+
<option value="C">C — Churn Risk Alert</option>
|
| 59 |
+
</select>
|
| 60 |
+
</div>
|
| 61 |
+
|
| 62 |
+
<!-- Run / Stop button -->
|
| 63 |
+
<button @click="isRunning ? stopAgent() : startAgent()"
|
| 64 |
+
:class="isRunning
|
| 65 |
+
? 'bg-red-600 hover:bg-red-500 text-white'
|
| 66 |
+
: 'bg-sky-600 hover:bg-sky-500 text-white'"
|
| 67 |
+
class="px-3 py-1.5 rounded text-xs font-medium transition-colors flex items-center gap-1.5">
|
| 68 |
+
<svg x-show="!isRunning" xmlns="http://www.w3.org/2000/svg" class="w-3 h-3" fill="currentColor" viewBox="0 0 16 16">
|
| 69 |
+
<path d="M11.596 8.697l-6.363 3.692c-.54.313-1.233-.066-1.233-.697V4.308c0-.63.692-1.01 1.233-.696l6.363 3.692a.802.802 0 0 1 0 1.393z"/>
|
| 70 |
+
</svg>
|
| 71 |
+
<svg x-show="isRunning" xmlns="http://www.w3.org/2000/svg" class="w-3 h-3" fill="currentColor" viewBox="0 0 16 16">
|
| 72 |
+
<path d="M5.5 3.5A1.5 1.5 0 0 1 7 5v6a1.5 1.5 0 0 1-3 0V5a1.5 1.5 0 0 1 1.5-1.5zm5 0A1.5 1.5 0 0 1 12 5v6a1.5 1.5 0 0 1-3 0V5a1.5 1.5 0 0 1 1.5-1.5z"/>
|
| 73 |
+
</svg>
|
| 74 |
+
<span x-text="isRunning ? 'Stop' : 'Run Agent'"></span>
|
| 75 |
+
</button>
|
| 76 |
+
|
| 77 |
+
<!-- Reset button -->
|
| 78 |
+
<button @click="resetEpisode()"
|
| 79 |
+
:disabled="isRunning"
|
| 80 |
+
class="px-3 py-1.5 rounded text-xs font-medium bg-slate-700 hover:bg-slate-600 text-slate-300 transition-colors disabled:opacity-40 disabled:cursor-not-allowed">
|
| 81 |
+
Reset
|
| 82 |
+
</button>
|
| 83 |
+
|
| 84 |
+
<!-- Status indicators -->
|
| 85 |
+
<div class="ml-auto flex items-center gap-4">
|
| 86 |
+
<!-- Score -->
|
| 87 |
+
<div class="text-right">
|
| 88 |
+
<div class="text-xs text-slate-500 uppercase tracking-widest">Score</div>
|
| 89 |
+
<div class="text-sky-400 font-bold text-base tabular-nums"
|
| 90 |
+
:class="scoreUpdated ? 'score-updated' : ''"
|
| 91 |
+
x-text="currentScore.toFixed(4)"></div>
|
| 92 |
+
</div>
|
| 93 |
+
<!-- Steps -->
|
| 94 |
+
<div class="text-right">
|
| 95 |
+
<div class="text-xs text-slate-500 uppercase tracking-widest">Step</div>
|
| 96 |
+
<div class="text-slate-200 font-bold text-base tabular-nums"
|
| 97 |
+
x-text="stepCount + ' / ' + maxSteps"></div>
|
| 98 |
+
</div>
|
| 99 |
+
<!-- Policy drift badge -->
|
| 100 |
+
<div x-show="policyDriftActive"
|
| 101 |
+
class="px-2 py-0.5 rounded-full text-xs bg-amber-900 text-amber-300 border border-amber-700">
|
| 102 |
+
Policy Drift
|
| 103 |
+
</div>
|
| 104 |
+
<!-- Health dot -->
|
| 105 |
+
<div class="flex items-center gap-1.5">
|
| 106 |
+
<div class="w-2 h-2 rounded-full"
|
| 107 |
+
:class="serverHealthy ? 'bg-green-500' : 'bg-red-500'"></div>
|
| 108 |
+
<span class="text-xs text-slate-500" x-text="serverHealthy ? 'Live' : 'Offline'"></span>
|
| 109 |
+
</div>
|
| 110 |
+
</div>
|
| 111 |
+
</header>
|
| 112 |
+
|
| 113 |
+
<!-- ================================================================
|
| 114 |
+
MAIN LAYOUT (3-column)
|
| 115 |
+
================================================================ -->
|
| 116 |
+
<div class="flex h-[calc(100vh-52px)]">
|
| 117 |
+
|
| 118 |
+
<!-- ============================================================
|
| 119 |
+
LEFT: Workflow Progress + Schema Hints + Rules
|
| 120 |
+
============================================================ -->
|
| 121 |
+
<aside class="w-72 flex-shrink-0 bg-slate-900 border-r border-slate-800 flex flex-col overflow-hidden">
|
| 122 |
+
|
| 123 |
+
<!-- Workflow goal -->
|
| 124 |
+
<div class="px-4 pt-4 pb-3 border-b border-slate-800">
|
| 125 |
+
<div class="text-xs text-slate-500 uppercase tracking-widest mb-1">Goal</div>
|
| 126 |
+
<p class="text-slate-300 text-xs leading-relaxed" x-text="workflowGoal || 'Reset to start an episode.'"></p>
|
| 127 |
+
</div>
|
| 128 |
+
|
| 129 |
+
<!-- Step tracker -->
|
| 130 |
+
<div class="px-4 pt-3 pb-2 border-b border-slate-800 flex-shrink-0">
|
| 131 |
+
<div class="text-xs text-slate-500 uppercase tracking-widest mb-2">
|
| 132 |
+
Workflow Steps
|
| 133 |
+
<span class="ml-1 text-sky-400 font-bold"
|
| 134 |
+
x-text="'(' + completedSteps.length + '/' + totalSteps + ')'"></span>
|
| 135 |
+
</div>
|
| 136 |
+
<template x-for="(step, i) in allSteps" :key="i">
|
| 137 |
+
<div class="flex items-start gap-2 py-1">
|
| 138 |
+
<!-- Icon -->
|
| 139 |
+
<div class="mt-0.5 w-4 h-4 flex-shrink-0">
|
| 140 |
+
<template x-if="completedSteps.includes(step.id)">
|
| 141 |
+
<svg class="w-4 h-4 text-green-500" fill="currentColor" viewBox="0 0 20 20">
|
| 142 |
+
<path fill-rule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zm3.707-9.293a1 1 0 00-1.414-1.414L9 10.586 7.707 9.293a1 1 0 00-1.414 1.414l2 2a1 1 0 001.414 0l4-4z" clip-rule="evenodd"/>
|
| 143 |
+
</svg>
|
| 144 |
+
</template>
|
| 145 |
+
<template x-if="!completedSteps.includes(step.id)">
|
| 146 |
+
<div class="w-4 h-4 rounded-full border border-slate-600 flex items-center justify-center">
|
| 147 |
+
<span class="text-[9px] text-slate-500" x-text="step.id"></span>
|
| 148 |
+
</div>
|
| 149 |
+
</template>
|
| 150 |
+
</div>
|
| 151 |
+
<!-- Description -->
|
| 152 |
+
<span class="text-xs leading-tight"
|
| 153 |
+
:class="completedSteps.includes(step.id)
|
| 154 |
+
? 'text-green-400 line-through decoration-green-600'
|
| 155 |
+
: 'text-slate-400'"
|
| 156 |
+
x-text="step.description"></span>
|
| 157 |
+
</div>
|
| 158 |
+
</template>
|
| 159 |
+
</div>
|
| 160 |
+
|
| 161 |
+
<!-- Schema hints -->
|
| 162 |
+
<div class="px-4 pt-3 pb-3 border-b border-slate-800 flex-shrink-0">
|
| 163 |
+
<div class="text-xs text-slate-500 uppercase tracking-widest mb-2">Schema Hints</div>
|
| 164 |
+
<template x-if="Object.keys(schemaHints).length === 0">
|
| 165 |
+
<p class="text-xs text-slate-600">No drift — canonical names in effect.</p>
|
| 166 |
+
</template>
|
| 167 |
+
<template x-for="[field, drifted] in Object.entries(schemaHints)" :key="field">
|
| 168 |
+
<div class="flex items-center gap-1 py-0.5 font-mono text-[11px]">
|
| 169 |
+
<span class="text-red-400 line-through" x-text="field.split('.')[1] ?? field"></span>
|
| 170 |
+
<span class="text-slate-600">→</span>
|
| 171 |
+
<span class="text-green-400" x-text="drifted"></span>
|
| 172 |
+
<span class="text-slate-600 text-[10px]" x-text="'(' + (field.split('.')[0] ?? '') + ')'"></span>
|
| 173 |
+
</div>
|
| 174 |
+
</template>
|
| 175 |
+
</div>
|
| 176 |
+
|
| 177 |
+
<!-- Active rules -->
|
| 178 |
+
<div class="px-4 pt-3 pb-3 flex-shrink-0">
|
| 179 |
+
<div class="text-xs text-slate-500 uppercase tracking-widest mb-2">Active Rules</div>
|
| 180 |
+
<template x-for="[key, val] in Object.entries(activeRules)" :key="key">
|
| 181 |
+
<div class="flex justify-between py-0.5 text-[11px]">
|
| 182 |
+
<span class="text-slate-500" x-text="key.replace(/_/g,' ')"></span>
|
| 183 |
+
<span class="text-slate-300 font-bold tabular-nums" x-text="val"></span>
|
| 184 |
+
</div>
|
| 185 |
+
</template>
|
| 186 |
+
</div>
|
| 187 |
+
</aside>
|
| 188 |
+
|
| 189 |
+
<!-- ============================================================
|
| 190 |
+
CENTER: App State Tabs + Agent Log
|
| 191 |
+
============================================================ -->
|
| 192 |
+
<main class="flex-1 flex flex-col overflow-hidden min-w-0">
|
| 193 |
+
|
| 194 |
+
<!-- App state tabs -->
|
| 195 |
+
<div class="bg-slate-900 border-b border-slate-800 flex-shrink-0">
|
| 196 |
+
<!-- Tab headers -->
|
| 197 |
+
<div class="flex">
|
| 198 |
+
<template x-for="tab in appTabs" :key="tab.id">
|
| 199 |
+
<button @click="activeAppTab = tab.id"
|
| 200 |
+
:class="activeAppTab === tab.id
|
| 201 |
+
? 'border-b-2 border-sky-500 text-sky-400 bg-slate-950'
|
| 202 |
+
: 'text-slate-500 hover:text-slate-300'"
|
| 203 |
+
class="px-4 py-2 text-xs font-medium transition-colors flex items-center gap-1.5">
|
| 204 |
+
<span x-text="tab.icon"></span>
|
| 205 |
+
<span x-text="tab.label"></span>
|
| 206 |
+
<!-- Open items badge -->
|
| 207 |
+
<template x-if="appOpenCounts[tab.id] > 0">
|
| 208 |
+
<span class="px-1.5 py-0.5 rounded-full text-[10px] font-bold"
|
| 209 |
+
:class="activeAppTab === tab.id ? 'bg-sky-900 text-sky-300' : 'bg-slate-700 text-slate-400'"
|
| 210 |
+
x-text="appOpenCounts[tab.id]"></span>
|
| 211 |
+
</template>
|
| 212 |
+
</button>
|
| 213 |
+
</template>
|
| 214 |
+
</div>
|
| 215 |
+
<!-- Tab content -->
|
| 216 |
+
<div class="p-3 max-h-48 overflow-y-auto">
|
| 217 |
+
<template x-for="tab in appTabs" :key="tab.id">
|
| 218 |
+
<pre x-show="activeAppTab === tab.id"
|
| 219 |
+
class="text-[11px] font-mono text-slate-300 whitespace-pre-wrap leading-relaxed"
|
| 220 |
+
x-text="appStates[tab.id] || 'No data yet — reset to load.'"></pre>
|
| 221 |
+
</template>
|
| 222 |
+
</div>
|
| 223 |
+
</div>
|
| 224 |
+
|
| 225 |
+
<!-- Agent action log -->
|
| 226 |
+
<div class="flex-1 overflow-hidden flex flex-col">
|
| 227 |
+
<div class="px-4 py-2 border-b border-slate-800 flex items-center justify-between bg-slate-900 flex-shrink-0">
|
| 228 |
+
<span class="text-xs text-slate-500 uppercase tracking-widest">Agent Log</span>
|
| 229 |
+
<button @click="actionLog = []" class="text-xs text-slate-600 hover:text-slate-400">Clear</button>
|
| 230 |
+
</div>
|
| 231 |
+
<div class="flex-1 overflow-y-auto px-4 py-3 space-y-1.5" id="log-scroll">
|
| 232 |
+
<template x-if="actionLog.length === 0">
|
| 233 |
+
<p class="text-slate-600 text-xs italic">Waiting for episode to start…</p>
|
| 234 |
+
</template>
|
| 235 |
+
<template x-for="(entry, i) in actionLog" :key="i">
|
| 236 |
+
<div class="log-entry flex gap-3 items-start text-xs font-mono py-1 border-b border-slate-800/50">
|
| 237 |
+
<!-- Step number -->
|
| 238 |
+
<span class="text-slate-600 w-8 text-right flex-shrink-0" x-text="'#' + entry.step"></span>
|
| 239 |
+
<!-- Color dot -->
|
| 240 |
+
<span class="w-2 h-2 rounded-full flex-shrink-0 mt-0.5"
|
| 241 |
+
:class="{
|
| 242 |
+
'bg-green-500': entry.type === 'success',
|
| 243 |
+
'bg-red-500': entry.type === 'error',
|
| 244 |
+
'bg-amber-500': entry.type === 'warning',
|
| 245 |
+
'bg-sky-500': entry.type === 'info',
|
| 246 |
+
'bg-slate-500': entry.type === 'reset',
|
| 247 |
+
}"></span>
|
| 248 |
+
<!-- Content -->
|
| 249 |
+
<div class="flex-1 min-w-0">
|
| 250 |
+
<div class="flex items-center gap-2 flex-wrap">
|
| 251 |
+
<template x-if="entry.app">
|
| 252 |
+
<span class="px-1.5 py-0.5 rounded text-[10px] font-bold uppercase"
|
| 253 |
+
:class="{
|
| 254 |
+
'bg-violet-900 text-violet-300': entry.app === 'jira',
|
| 255 |
+
'bg-emerald-900 text-emerald-300': entry.app === 'zendesk',
|
| 256 |
+
'bg-blue-900 text-blue-300': entry.app === 'salesforce',
|
| 257 |
+
'bg-orange-900 text-orange-300': entry.app === 'workday',
|
| 258 |
+
}"
|
| 259 |
+
x-text="entry.app"></span>
|
| 260 |
+
</template>
|
| 261 |
+
<template x-if="entry.operation">
|
| 262 |
+
<span class="text-sky-400" x-text="entry.operation"></span>
|
| 263 |
+
</template>
|
| 264 |
+
<template x-if="entry.reward !== undefined">
|
| 265 |
+
<span :class="entry.reward >= 0 ? 'text-green-400' : 'text-red-400'"
|
| 266 |
+
x-text="(entry.reward >= 0 ? '+' : '') + entry.reward.toFixed(4)"></span>
|
| 267 |
+
</template>
|
| 268 |
+
</div>
|
| 269 |
+
<div class="text-slate-400 text-[11px] mt-0.5 leading-snug" x-text="entry.message"></div>
|
| 270 |
+
<template x-if="entry.argsStr">
|
| 271 |
+
<div class="text-slate-600 text-[10px] mt-0.5 truncate" x-text="entry.argsStr"></div>
|
| 272 |
+
</template>
|
| 273 |
+
</div>
|
| 274 |
+
</div>
|
| 275 |
+
</template>
|
| 276 |
+
</div>
|
| 277 |
+
</div>
|
| 278 |
+
</main>
|
| 279 |
+
|
| 280 |
+
<!-- ============================================================
|
| 281 |
+
RIGHT: Metrics Panel
|
| 282 |
+
============================================================ -->
|
| 283 |
+
<aside class="w-64 flex-shrink-0 bg-slate-900 border-l border-slate-800 flex flex-col overflow-hidden">
|
| 284 |
+
|
| 285 |
+
<!-- Reward curve chart -->
|
| 286 |
+
<div class="p-4 border-b border-slate-800">
|
| 287 |
+
<div class="text-xs text-slate-500 uppercase tracking-widest mb-2">Reward Curve</div>
|
| 288 |
+
<canvas id="rewardChart" class="w-full" style="max-height:120px"></canvas>
|
| 289 |
+
</div>
|
| 290 |
+
|
| 291 |
+
<!-- Score breakdown bars -->
|
| 292 |
+
<div class="p-4 border-b border-slate-800">
|
| 293 |
+
<div class="text-xs text-slate-500 uppercase tracking-widest mb-3">Score Breakdown</div>
|
| 294 |
+
<template x-for="comp in rewardComponents" :key="comp.key">
|
| 295 |
+
<div class="mb-2">
|
| 296 |
+
<div class="flex justify-between text-[11px] mb-0.5">
|
| 297 |
+
<span class="text-slate-500" x-text="comp.label"></span>
|
| 298 |
+
<span class="text-slate-300 tabular-nums" x-text="(comp.value * 100).toFixed(0) + '%'"></span>
|
| 299 |
+
</div>
|
| 300 |
+
<div class="w-full bg-slate-800 rounded-full h-1.5">
|
| 301 |
+
<div class="h-1.5 rounded-full transition-all duration-300"
|
| 302 |
+
:class="comp.color"
|
| 303 |
+
:style="'width: ' + (comp.value * 100) + '%'"></div>
|
| 304 |
+
</div>
|
| 305 |
+
</div>
|
| 306 |
+
</template>
|
| 307 |
+
</div>
|
| 308 |
+
|
| 309 |
+
<!-- Episode stats -->
|
| 310 |
+
<div class="p-4 border-b border-slate-800">
|
| 311 |
+
<div class="text-xs text-slate-500 uppercase tracking-widest mb-2">Episode Stats</div>
|
| 312 |
+
<div class="space-y-1">
|
| 313 |
+
<div class="flex justify-between text-[11px]">
|
| 314 |
+
<span class="text-slate-500">Violations</span>
|
| 315 |
+
<span :class="violationCount > 0 ? 'text-red-400' : 'text-green-400'"
|
| 316 |
+
x-text="violationCount" class="font-bold tabular-nums"></span>
|
| 317 |
+
</div>
|
| 318 |
+
<div class="flex justify-between text-[11px]">
|
| 319 |
+
<span class="text-slate-500">Schema boosts</span>
|
| 320 |
+
<span class="text-green-400 font-bold tabular-nums" x-text="schemaAdaptCount"></span>
|
| 321 |
+
</div>
|
| 322 |
+
<div class="flex justify-between text-[11px]">
|
| 323 |
+
<span class="text-slate-500">Schema errors</span>
|
| 324 |
+
<span :class="schemaErrorCount > 0 ? 'text-red-400' : 'text-slate-600'"
|
| 325 |
+
x-text="schemaErrorCount" class="font-bold tabular-nums"></span>
|
| 326 |
+
</div>
|
| 327 |
+
<div class="flex justify-between text-[11px]">
|
| 328 |
+
<span class="text-slate-500">Workflow ID</span>
|
| 329 |
+
<span class="text-sky-400 font-bold" x-text="workflowId || '—'"></span>
|
| 330 |
+
</div>
|
| 331 |
+
<div class="flex justify-between text-[11px]">
|
| 332 |
+
<span class="text-slate-500">Schema versions</span>
|
| 333 |
+
<template x-for="[app, ver] in Object.entries(schemaVersions)" :key="app">
|
| 334 |
+
<span class="text-slate-300 text-[10px] tabular-nums"
|
| 335 |
+
x-text="app[0].toUpperCase() + ':' + ver"></span>
|
| 336 |
+
</template>
|
| 337 |
+
</div>
|
| 338 |
+
</div>
|
| 339 |
+
</div>
|
| 340 |
+
|
| 341 |
+
<!-- Recent violations -->
|
| 342 |
+
<div class="p-4 flex-1 overflow-y-auto">
|
| 343 |
+
<div class="text-xs text-slate-500 uppercase tracking-widest mb-2">Violations</div>
|
| 344 |
+
<template x-if="violations.length === 0">
|
| 345 |
+
<p class="text-slate-600 text-xs italic">None this episode.</p>
|
| 346 |
+
</template>
|
| 347 |
+
<template x-for="(v, i) in violations.slice(-8)" :key="i">
|
| 348 |
+
<div class="text-[10px] text-red-400 py-0.5 border-b border-slate-800/50 leading-snug"
|
| 349 |
+
x-text="v"></div>
|
| 350 |
+
</template>
|
| 351 |
+
</div>
|
| 352 |
+
</aside>
|
| 353 |
+
|
| 354 |
+
</div><!-- end main layout -->
|
| 355 |
+
|
| 356 |
+
<!-- ================================================================
|
| 357 |
+
ALPINE.JS + CHART.JS LOGIC
|
| 358 |
+
================================================================ -->
|
| 359 |
+
<script>
|
| 360 |
+
function orgos() {
|
| 361 |
+
return {
|
| 362 |
+
// ---- Config ----
|
| 363 |
+
envUrl: window.location.origin,
|
| 364 |
+
|
| 365 |
+
// ---- Episode state ----
|
| 366 |
+
selectedWorkflow: 'A',
|
| 367 |
+
workflowId: '',
|
| 368 |
+
workflowGoal: '',
|
| 369 |
+
currentScore: 0.001,
|
| 370 |
+
stepCount: 0,
|
| 371 |
+
maxSteps: 15,
|
| 372 |
+
isRunning: false,
|
| 373 |
+
policyDriftActive: false,
|
| 374 |
+
serverHealthy: false,
|
| 375 |
+
|
| 376 |
+
// ---- Step tracking ----
|
| 377 |
+
allSteps: [],
|
| 378 |
+
completedSteps: [],
|
| 379 |
+
totalSteps: 0,
|
| 380 |
+
|
| 381 |
+
// ---- App state ----
|
| 382 |
+
appTabs: [
|
| 383 |
+
{ id: 'zendesk', label: 'Zendesk', icon: '🎫' },
|
| 384 |
+
{ id: 'jira', label: 'Jira', icon: '🐛' },
|
| 385 |
+
{ id: 'salesforce', label: 'Salesforce', icon: '💼' },
|
| 386 |
+
{ id: 'workday', label: 'Workday', icon: '👥' },
|
| 387 |
+
],
|
| 388 |
+
activeAppTab: 'zendesk',
|
| 389 |
+
appStates: { zendesk: '', jira: '', salesforce: '', workday: '' },
|
| 390 |
+
appOpenCounts:{ zendesk: 0, jira: 0, salesforce: 0, workday: 0 },
|
| 391 |
+
|
| 392 |
+
// ---- Schema / Rules ----
|
| 393 |
+
schemaHints: {},
|
| 394 |
+
schemaVersions:{},
|
| 395 |
+
activeRules: {},
|
| 396 |
+
|
| 397 |
+
// ---- Metrics ----
|
| 398 |
+
rewardHistory: [],
|
| 399 |
+
rewardComponents: [
|
| 400 |
+
{ key: 'workflow_completion', label: 'Workflow', value: 0, color: 'bg-sky-500' },
|
| 401 |
+
{ key: 'rule_compliance', label: 'Compliance',value: 0, color: 'bg-green-500' },
|
| 402 |
+
{ key: 'schema_adaptation', label: 'Schema', value: 0, color: 'bg-violet-500' },
|
| 403 |
+
{ key: 'efficiency', label: 'Efficiency',value: 0, color: 'bg-amber-500' },
|
| 404 |
+
{ key: 'policy_drift_handling', label: 'Policy', value: 0, color: 'bg-pink-500' },
|
| 405 |
+
],
|
| 406 |
+
violationCount: 0,
|
| 407 |
+
schemaAdaptCount: 0,
|
| 408 |
+
schemaErrorCount: 0,
|
| 409 |
+
violations: [],
|
| 410 |
+
|
| 411 |
+
// ---- Log ----
|
| 412 |
+
actionLog: [],
|
| 413 |
+
|
| 414 |
+
// ---- SSE handle ----
|
| 415 |
+
_sse: null,
|
| 416 |
+
_chart: null,
|
| 417 |
+
scoreUpdated: false,
|
| 418 |
+
|
| 419 |
+
// ----------------------------------------------------------------
|
| 420 |
+
// Init
|
| 421 |
+
// ----------------------------------------------------------------
|
| 422 |
+
async init() {
|
| 423 |
+
await this.checkHealth();
|
| 424 |
+
this._chart = this._initChart();
|
| 425 |
+
// Poll health every 10s
|
| 426 |
+
setInterval(() => this.checkHealth(), 10_000);
|
| 427 |
+
},
|
| 428 |
+
|
| 429 |
+
async checkHealth() {
|
| 430 |
+
try {
|
| 431 |
+
const r = await fetch(this.envUrl + '/health');
|
| 432 |
+
this.serverHealthy = r.ok;
|
| 433 |
+
} catch { this.serverHealthy = false; }
|
| 434 |
+
},
|
| 435 |
+
|
| 436 |
+
// ----------------------------------------------------------------
|
| 437 |
+
// Reset
|
| 438 |
+
// ----------------------------------------------------------------
|
| 439 |
+
async resetEpisode() {
|
| 440 |
+
this.stopAgent();
|
| 441 |
+
try {
|
| 442 |
+
const r = await fetch(this.envUrl + '/reset', {
|
| 443 |
+
method: 'POST',
|
| 444 |
+
headers: { 'Content-Type': 'application/json' },
|
| 445 |
+
body: JSON.stringify({ workflow_id: this.selectedWorkflow }),
|
| 446 |
+
});
|
| 447 |
+
const data = await r.json();
|
| 448 |
+
this._applyObservation(data.observation, null, 0);
|
| 449 |
+
this.actionLog = [];
|
| 450 |
+
this.rewardHistory = [];
|
| 451 |
+
this.violationCount = 0;
|
| 452 |
+
this.schemaAdaptCount = 0;
|
| 453 |
+
this.schemaErrorCount = 0;
|
| 454 |
+
this.violations = [];
|
| 455 |
+
this._updateChart();
|
| 456 |
+
this._pushLog({ type: 'reset', step: 0, message: 'Episode reset. Ready to run agent.' });
|
| 457 |
+
// Fetch schema versions from /state
|
| 458 |
+
const st = await fetch(this.envUrl + '/state').then(r => r.json());
|
| 459 |
+
this.schemaVersions = st.schema_versions || {};
|
| 460 |
+
this.policyDriftActive = st.policy_drift_active || false;
|
| 461 |
+
} catch (e) {
|
| 462 |
+
this._pushLog({ type: 'error', step: 0, message: 'Reset failed: ' + e });
|
| 463 |
+
}
|
| 464 |
+
},
|
| 465 |
+
|
| 466 |
+
// ----------------------------------------------------------------
|
| 467 |
+
// Run agent via SSE
|
| 468 |
+
// ----------------------------------------------------------------
|
| 469 |
+
startAgent() {
|
| 470 |
+
if (this.isRunning) return;
|
| 471 |
+
this.isRunning = true;
|
| 472 |
+
const url = `${this.envUrl}/ui/run-agent?workflow_id=${this.selectedWorkflow}`;
|
| 473 |
+
this._sse = new EventSource(url);
|
| 474 |
+
this._sse.onmessage = (e) => {
|
| 475 |
+
try {
|
| 476 |
+
const evt = JSON.parse(e.data);
|
| 477 |
+
this._handleSSEEvent(evt);
|
| 478 |
+
} catch {}
|
| 479 |
+
};
|
| 480 |
+
this._sse.onerror = () => {
|
| 481 |
+
this.isRunning = false;
|
| 482 |
+
this._sse && this._sse.close();
|
| 483 |
+
this._pushLog({ type: 'error', step: this.stepCount, message: 'SSE connection error.' });
|
| 484 |
+
};
|
| 485 |
+
},
|
| 486 |
+
|
| 487 |
+
stopAgent() {
|
| 488 |
+
this.isRunning = false;
|
| 489 |
+
if (this._sse) { this._sse.close(); this._sse = null; }
|
| 490 |
+
},
|
| 491 |
+
|
| 492 |
+
_handleSSEEvent(evt) {
|
| 493 |
+
if (evt.type === 'reset') {
|
| 494 |
+
this.actionLog = [];
|
| 495 |
+
this.rewardHistory = [];
|
| 496 |
+
this.violationCount = 0;
|
| 497 |
+
this.violations = [];
|
| 498 |
+
this.schemaAdaptCount = 0;
|
| 499 |
+
this.schemaErrorCount = 0;
|
| 500 |
+
this._applyObservation(evt.observation, null, 0);
|
| 501 |
+
this._pushLog({ type: 'reset', step: 0, message: `Episode started — Workflow ${evt.workflow_id}` });
|
| 502 |
+
} else if (evt.type === 'step') {
|
| 503 |
+
const obs = evt.observation;
|
| 504 |
+
this._applyObservation(obs, evt.action, evt.reward);
|
| 505 |
+
// Detect schema adapt / error from message
|
| 506 |
+
if (obs.message && obs.message.includes('Stale schema')) this.schemaErrorCount++;
|
| 507 |
+
if (obs.reward > 0.05 && evt.action) this.schemaAdaptCount += (evt.action._adapted ? 1 : 0);
|
| 508 |
+
this.rewardHistory.push(evt.reward);
|
| 509 |
+
this._updateChart();
|
| 510 |
+
// Violations
|
| 511 |
+
if (obs.rule_violations && obs.rule_violations.length > 0) {
|
| 512 |
+
this.violations.push(...obs.rule_violations);
|
| 513 |
+
this.violationCount += obs.rule_violations.length;
|
| 514 |
+
}
|
| 515 |
+
this._pushLog({
|
| 516 |
+
type: evt.reward < 0 ? 'error' : (evt.reward > 0.05 ? 'success' : 'info'),
|
| 517 |
+
step: evt.step,
|
| 518 |
+
app: evt.action?.app,
|
| 519 |
+
operation: evt.action?.operation,
|
| 520 |
+
reward: evt.reward,
|
| 521 |
+
message: obs.message,
|
| 522 |
+
argsStr: evt.action?.args ? JSON.stringify(evt.action.args, null, 0).slice(0, 80) : '',
|
| 523 |
+
});
|
| 524 |
+
if (evt.done) { this.isRunning = false; }
|
| 525 |
+
} else if (evt.type === 'done') {
|
| 526 |
+
this.isRunning = false;
|
| 527 |
+
this._pushLog({
|
| 528 |
+
type: 'info', step: evt.steps,
|
| 529 |
+
message: `Episode done. Final score: ${(evt.final_score||0).toFixed(4)} | Workflow complete: ${evt.completed}`,
|
| 530 |
+
});
|
| 531 |
+
} else if (evt.type === 'error') {
|
| 532 |
+
this._pushLog({ type: 'error', step: evt.step || this.stepCount, message: evt.message });
|
| 533 |
+
}
|
| 534 |
+
},
|
| 535 |
+
|
| 536 |
+
// ----------------------------------------------------------------
|
| 537 |
+
// Apply observation to UI state
|
| 538 |
+
// ----------------------------------------------------------------
|
| 539 |
+
_applyObservation(obs, action, reward) {
|
| 540 |
+
this.workflowId = obs.workflow_id || '';
|
| 541 |
+
this.workflowGoal = obs.workflow_goal || '';
|
| 542 |
+
this.schemaHints = obs.schema_hints || {};
|
| 543 |
+
this.activeRules = obs.active_rules || {};
|
| 544 |
+
this.stepCount = obs.step_count || 0;
|
| 545 |
+
this.appStates = obs.app_states || this.appStates;
|
| 546 |
+
this.completedSteps= (obs.completed_steps || []).map(id => id);
|
| 547 |
+
this.policyDriftActive = obs.policy_drift_active || false;
|
| 548 |
+
|
| 549 |
+
// Score update with flash animation
|
| 550 |
+
const newScore = obs.current_score || 0.001;
|
| 551 |
+
if (newScore !== this.currentScore) {
|
| 552 |
+
this.currentScore = newScore;
|
| 553 |
+
this.scoreUpdated = true;
|
| 554 |
+
setTimeout(() => { this.scoreUpdated = false; }, 500);
|
| 555 |
+
}
|
| 556 |
+
|
| 557 |
+
// Workflow steps
|
| 558 |
+
const wfStepDefs = {
|
| 559 |
+
A: [
|
| 560 |
+
{ id: 'A1', description: 'Acknowledge ZD-001 in Zendesk' },
|
| 561 |
+
{ id: 'A2', description: 'Create linked Jira issue' },
|
| 562 |
+
{ id: 'A3', description: 'Verify ACME-001 in Salesforce' },
|
| 563 |
+
{ id: 'A4', description: 'Assign Jira issue to engineer' },
|
| 564 |
+
{ id: 'A5', description: 'Log SLA event in Workday' },
|
| 565 |
+
],
|
| 566 |
+
B: [
|
| 567 |
+
{ id: 'B1', description: 'Create Workday onboarding record' },
|
| 568 |
+
{ id: 'B2', description: 'Provision Jira access' },
|
| 569 |
+
{ id: 'B3', description: 'Assign to Salesforce territory team' },
|
| 570 |
+
{ id: 'B4', description: 'Create Zendesk agent profile' },
|
| 571 |
+
],
|
| 572 |
+
C: [
|
| 573 |
+
{ id: 'C1', description: 'Flag ACME-003 as churn risk' },
|
| 574 |
+
{ id: 'C2', description: 'Query Zendesk support volume' },
|
| 575 |
+
{ id: 'C3', description: 'Check Jira open bugs' },
|
| 576 |
+
{ id: 'C4', description: 'Assign intervention owner' },
|
| 577 |
+
],
|
| 578 |
+
};
|
| 579 |
+
const wfId = obs.workflow_id || this.selectedWorkflow;
|
| 580 |
+
this.allSteps = wfStepDefs[wfId] || [];
|
| 581 |
+
this.totalSteps= this.allSteps.length;
|
| 582 |
+
this.maxSteps = { A: 15, B: 20, C: 18 }[wfId] || 15;
|
| 583 |
+
|
| 584 |
+
// Reward breakdown
|
| 585 |
+
const rb = obs.reward_breakdown || {};
|
| 586 |
+
this.rewardComponents.forEach(c => {
|
| 587 |
+
c.value = rb[c.key] ?? 0;
|
| 588 |
+
});
|
| 589 |
+
},
|
| 590 |
+
|
| 591 |
+
// ----------------------------------------------------------------
|
| 592 |
+
// Log
|
| 593 |
+
// ----------------------------------------------------------------
|
| 594 |
+
_pushLog(entry) {
|
| 595 |
+
this.actionLog.push(entry);
|
| 596 |
+
// Auto-scroll to bottom
|
| 597 |
+
this.$nextTick(() => {
|
| 598 |
+
const el = document.getElementById('log-scroll');
|
| 599 |
+
if (el) el.scrollTop = el.scrollHeight;
|
| 600 |
+
});
|
| 601 |
+
},
|
| 602 |
+
|
| 603 |
+
// ----------------------------------------------------------------
|
| 604 |
+
// Chart
|
| 605 |
+
// ----------------------------------------------------------------
|
| 606 |
+
_initChart() {
|
| 607 |
+
const ctx = document.getElementById('rewardChart').getContext('2d');
|
| 608 |
+
return new Chart(ctx, {
|
| 609 |
+
type: 'line',
|
| 610 |
+
data: {
|
| 611 |
+
labels: [],
|
| 612 |
+
datasets: [{
|
| 613 |
+
data: [],
|
| 614 |
+
borderColor: '#38bdf8',
|
| 615 |
+
backgroundColor: 'rgba(56,189,248,0.08)',
|
| 616 |
+
borderWidth: 1.5,
|
| 617 |
+
pointRadius: 0,
|
| 618 |
+
tension: 0.3,
|
| 619 |
+
fill: true,
|
| 620 |
+
}],
|
| 621 |
+
},
|
| 622 |
+
options: {
|
| 623 |
+
animation: false,
|
| 624 |
+
responsive: true,
|
| 625 |
+
maintainAspectRatio: false,
|
| 626 |
+
plugins: { legend: { display: false }, tooltip: { enabled: false } },
|
| 627 |
+
scales: {
|
| 628 |
+
x: { display: false },
|
| 629 |
+
y: {
|
| 630 |
+
display: true,
|
| 631 |
+
grid: { color: 'rgba(255,255,255,0.04)' },
|
| 632 |
+
ticks: { color: '#475569', font: { size: 9 }, maxTicksLimit: 4 },
|
| 633 |
+
},
|
| 634 |
+
},
|
| 635 |
+
},
|
| 636 |
+
});
|
| 637 |
+
},
|
| 638 |
+
|
| 639 |
+
_updateChart() {
|
| 640 |
+
if (!this._chart) return;
|
| 641 |
+
const labels = this.rewardHistory.map((_, i) => i + 1);
|
| 642 |
+
this._chart.data.labels = labels;
|
| 643 |
+
this._chart.data.datasets[0].data = this.rewardHistory;
|
| 644 |
+
this._chart.update('none');
|
| 645 |
+
},
|
| 646 |
+
};
|
| 647 |
+
}
|
| 648 |
+
</script>
|
| 649 |
+
|
| 650 |
+
</body>
|
| 651 |
+
</html>
|