Taniieeee83 commited on
Commit
2305b9f
·
1 Parent(s): 4719066

added minimal ui and all 4 apps+workflows

Browse files
client.py CHANGED
@@ -1,105 +1,111 @@
1
  """
2
- Synchronous HTTP client for the Data Cleaning OpenEnv environment.
3
 
4
  Usage
5
  -----
6
- from client import DataCleaningEnvClient, DataCleaningAction
 
7
 
8
- client = DataCleaningEnvClient(base_url="http://localhost:8000")
9
 
10
- # Start a new episode (task_id 1/2/3 or omit for round-robin)
11
- result = client.reset(task_id=1)
12
- print(result.observation.task_description)
13
 
14
  # Take a step
15
- action = DataCleaningAction(
16
- operation="fill_missing",
17
- column="salary",
18
- params={"strategy": "median"},
19
  )
20
  result = client.step(action)
21
  print(result.observation.current_score, result.reward, result.done)
22
 
23
  # Inspect state
24
  state = client.state()
25
- print(state.episode_id, state.errors_remaining)
26
  """
27
 
28
  from typing import Optional
29
  import httpx
30
  from pydantic import BaseModel
31
 
32
- from models import DataCleaningAction, DataCleaningObservation, DataCleaningState
33
 
34
 
35
  class StepResult(BaseModel):
36
  """Returned by reset() and step()."""
37
- observation: DataCleaningObservation
38
  reward: float
39
  done: bool
40
  info: dict = {}
41
 
42
 
43
- class DataCleaningEnvClient:
44
  """
45
- Thin synchronous wrapper around the DataCleaning HTTP API.
46
 
47
  All methods raise httpx.HTTPStatusError on non-2xx responses.
48
  """
49
 
50
  def __init__(self, base_url: str = "http://localhost:8000", timeout: float = 30.0):
51
- self.base_url = base_url.rstrip("/")
52
  self._client = httpx.Client(base_url=self.base_url, timeout=timeout)
53
 
54
  # ------------------------------------------------------------------
55
  # Core API
56
  # ------------------------------------------------------------------
57
 
58
- def reset(self, task_id: Optional[int] = None) -> StepResult:
59
  """
60
  Start a new episode.
61
 
62
  Parameters
63
  ----------
64
- task_id : int | None
65
- 1 = Easy (fill missing values)
66
- 2 = Medium (fix formats + duplicates)
67
- 3 = Hard (full pipeline)
68
- None = round-robin (1231 …)
69
  """
70
- payload = {"task_id": task_id} if task_id is not None else {}
71
  resp = self._client.post("/reset", json=payload)
72
  resp.raise_for_status()
73
  return StepResult(**resp.json())
74
 
75
- def step(self, action: DataCleaningAction) -> StepResult:
76
  """
77
- Apply one cleaning operation and return the updated observation.
78
 
79
  Parameters
80
  ----------
81
- action : DataCleaningAction
82
- operation : str – one of fill_missing / drop_duplicates /
83
- fix_format / replace_value / drop_outliers / fix_dtype
84
- column : str target column (optional for drop_duplicates)
85
- params : dict – operation-specific parameters
86
  """
87
  resp = self._client.post("/step", json=action.model_dump())
88
  resp.raise_for_status()
89
  return StepResult(**resp.json())
90
 
91
- def state(self) -> DataCleaningState:
92
  """Return current episode metadata without modifying state."""
93
  resp = self._client.get("/state")
94
  resp.raise_for_status()
95
- return DataCleaningState(**resp.json())
96
 
97
  def health(self) -> dict:
98
- """Ping the server. Returns {"status": "ok"} if healthy."""
99
  resp = self._client.get("/health")
100
  resp.raise_for_status()
101
  return resp.json()
102
 
 
 
 
 
 
 
103
  # ------------------------------------------------------------------
104
  # Context manager support
105
  # ------------------------------------------------------------------
 
1
  """
2
+ Synchronous HTTP client for the OrgOS OpenEnv environment.
3
 
4
  Usage
5
  -----
6
+ from client import OrgOSEnvClient
7
+ from models import OrgOSAction
8
 
9
+ client = OrgOSEnvClient(base_url="http://localhost:8000")
10
 
11
+ # Start a new episode (workflow_id "A"/"B"/"C" or None for round-robin)
12
+ result = client.reset(workflow_id="A")
13
+ print(result.observation.workflow_goal)
14
 
15
  # Take a step
16
+ action = OrgOSAction(
17
+ app="zendesk",
18
+ operation="acknowledge_ticket",
19
+ args={"ticket_number": "ZD-001"},
20
  )
21
  result = client.step(action)
22
  print(result.observation.current_score, result.reward, result.done)
23
 
24
  # Inspect state
25
  state = client.state()
26
+ print(state.episode_id, state.workflow_completion)
27
  """
28
 
29
  from typing import Optional
30
  import httpx
31
  from pydantic import BaseModel
32
 
33
+ from models import OrgOSAction, OrgOSObservation, OrgOSState
34
 
35
 
36
  class StepResult(BaseModel):
37
  """Returned by reset() and step()."""
38
+ observation: OrgOSObservation
39
  reward: float
40
  done: bool
41
  info: dict = {}
42
 
43
 
44
+ class OrgOSEnvClient:
45
  """
46
+ Thin synchronous wrapper around the OrgOS HTTP API.
47
 
48
  All methods raise httpx.HTTPStatusError on non-2xx responses.
49
  """
50
 
51
  def __init__(self, base_url: str = "http://localhost:8000", timeout: float = 30.0):
52
+ self.base_url = base_url.rstrip("/")
53
  self._client = httpx.Client(base_url=self.base_url, timeout=timeout)
54
 
55
  # ------------------------------------------------------------------
56
  # Core API
57
  # ------------------------------------------------------------------
58
 
59
+ def reset(self, workflow_id: Optional[str] = None) -> StepResult:
60
  """
61
  Start a new episode.
62
 
63
  Parameters
64
  ----------
65
+ workflow_id : str | None
66
+ "A" = Customer Bug Fix (support role)
67
+ "B" = Employee Onboarding (manager role)
68
+ "C" = Churn Risk Alert (support role)
69
+ None = round-robin (ABCA …)
70
  """
71
+ payload = {"workflow_id": workflow_id} if workflow_id is not None else {}
72
  resp = self._client.post("/reset", json=payload)
73
  resp.raise_for_status()
74
  return StepResult(**resp.json())
75
 
76
+ def step(self, action: OrgOSAction) -> StepResult:
77
  """
78
+ Take one action in the environment.
79
 
80
  Parameters
81
  ----------
82
+ action : OrgOSAction
83
+ app : str – "jira" | "zendesk" | "salesforce" | "workday"
84
+ operation : str – app-specific operation name
85
+ args : dict operation arguments
 
86
  """
87
  resp = self._client.post("/step", json=action.model_dump())
88
  resp.raise_for_status()
89
  return StepResult(**resp.json())
90
 
91
+ def state(self) -> OrgOSState:
92
  """Return current episode metadata without modifying state."""
93
  resp = self._client.get("/state")
94
  resp.raise_for_status()
95
+ return OrgOSState(**resp.json())
96
 
97
  def health(self) -> dict:
98
+ """Ping the server. Returns {"status": "healthy"} if healthy."""
99
  resp = self._client.get("/health")
100
  resp.raise_for_status()
101
  return resp.json()
102
 
103
+ def app_schemas(self) -> dict:
104
+ """Return per-app operation catalogue."""
105
+ resp = self._client.get("/schema/apps")
106
+ resp.raise_for_status()
107
+ return resp.json()
108
+
109
  # ------------------------------------------------------------------
110
  # Context manager support
111
  # ------------------------------------------------------------------
inference.py CHANGED
@@ -1,17 +1,17 @@
1
  """
2
- Baseline inference script for the Data Cleaning OpenEnv environment.
3
- Uses the OpenAI client against all 3 tasks and reports scores.
4
 
5
  Required environment variables:
6
  API_BASE_URL — LLM API endpoint (OpenAI-compatible)
7
- MODEL_NAME — model identifier
8
- HF_TOKEN — API key
9
  ENV_URL — environment server URL (default: http://localhost:8000)
10
 
11
  STDOUT FORMAT (OpenEnv spec):
12
- [START] task=<task_name> env=<benchmark> model=<model_name>
13
- [STEP] step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null>
14
- [END] task=<task_name> score=<0.00> steps=<n>
15
  """
16
 
17
  import json
@@ -19,7 +19,8 @@ import os
19
  import re
20
  import sys
21
  import time
22
- from typing import List, Optional
 
23
  import httpx
24
  from openai import OpenAI
25
 
@@ -30,72 +31,86 @@ from openai import OpenAI
30
  API_BASE_URL = os.environ.get("API_BASE_URL", "https://api.openai.com/v1")
31
  MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o-mini")
32
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
33
- ENV_URL = os.environ.get("ENV_URL", "http://localhost:8000")
34
 
35
  if not HF_TOKEN:
36
  print("[WARNING] HF_TOKEN is not set — LLM calls may fail.", file=sys.stderr)
37
 
38
- client = OpenAI(api_key=HF_TOKEN, base_url=API_BASE_URL)
39
-
40
- SYSTEM_PROMPT = """You are a data cleaning agent. You control a data cleaning environment
41
- through JSON actions. Each turn you receive an observation JSON describing the current state
42
- of a dataset (preview, missing counts, duplicate count, dtype issues, current score, etc.)
43
- and a task description.
44
-
45
- Your job is to pick the single best action to improve the dataset quality.
46
-
47
- Respond ONLY with a valid JSON object — no markdown, no explanation, just the JSON.
48
-
49
- Available operations and their required parameters:
50
-
51
- 1. fill_missing
52
- {"operation": "fill_missing", "column": "<col>", "params": {"strategy": "median|mean|mode|constant", "value": <only if constant>}}
53
 
54
- 2. drop_duplicates
55
- {"operation": "drop_duplicates"}
56
-
57
- 3. fix_format
58
- {"operation": "fix_format", "column": "phone|listed_date|signup_date|country"}
59
-
60
- 4. replace_value
61
- {"operation": "replace_value", "column": "<col>", "params": {"old": "<val>", "new": "<val>"}}
62
-
63
- 5. drop_outliers
64
- {"operation": "drop_outliers", "column": "<numeric_col>"}
65
-
66
- 6. fix_dtype
67
- {"operation": "fix_dtype", "column": "<col>", "params": {"dtype": "float|int|str"}}
68
 
69
- Rules:
70
- - Address the highest-impact issues first (missing values > duplicates > outliers > format).
71
- - Do not repeat an operation that returned no effect (watch the 'message' field).
72
- - Stop when current_score >= 0.95.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  """
74
 
 
 
 
 
 
75
 
76
  # ------------------------------------------------------------------
77
  # OpenEnv stdout logging helpers
78
  # ------------------------------------------------------------------
79
 
80
- def log_start(task: str, env: str, model: str) -> None:
81
- print(f"[START] task={task} env={env} model={model}", flush=True)
82
 
83
 
84
  def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
85
  error_val = error if error else "null"
86
- done_val = str(done).lower()
87
  print(
88
- f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
 
89
  flush=True,
90
  )
91
 
92
 
93
  def log_end(task_name: str, score: float, steps: int) -> None:
94
- safe_score = max(0.01, min(0.99, float(score)))
95
- print(
96
- f"[END] task={task_name} score={safe_score:.4f} steps={steps}",
97
- flush=True
98
- )
99
 
100
 
101
  # ------------------------------------------------------------------
@@ -117,60 +132,80 @@ def api_get(path: str) -> dict:
117
 
118
 
119
  # ------------------------------------------------------------------
120
- # Agent loop
121
  # ------------------------------------------------------------------
122
 
123
  def obs_to_text(obs: dict) -> str:
124
  lines = [
125
- f"current_score: {obs['current_score']}",
126
- f"step_count: {obs['step_count']}",
127
- f"data_shape: {obs['data_shape']}",
128
- f"duplicate_count: {obs['duplicate_count']}",
129
- f"missing_counts: {json.dumps(obs['missing_counts'])}",
130
- f"dtype_issues: {json.dumps(obs['dtype_issues'])}",
131
- f"message: {obs['message']}",
 
 
 
 
 
 
 
 
132
  "",
133
- "=== DATA PREVIEW (first 10 rows) ===",
134
- obs["data_preview"],
135
  "",
136
- "=== TASK DESCRIPTION ===",
137
- obs["task_description"],
138
  ]
 
 
 
 
 
 
 
 
 
139
  return "\n".join(lines)
140
 
141
 
142
- def run_task(task_id: int) -> float:
143
- task_name = f"data-cleaning-task{task_id}"
 
 
 
 
144
 
145
- # Human-readable header (stderr so it doesn't interfere with stdout format)
146
  print(f"\n{'='*60}", file=sys.stderr)
147
- print(f" Running Task {task_id}", file=sys.stderr)
148
  print(f"{'='*60}", file=sys.stderr)
149
 
150
- result = api_post("/reset", {"task_id": task_id})
151
  obs = result["observation"]
152
- history = []
153
- rewards: List[float] = []
154
  steps_taken = 0
155
- success = False
156
 
157
- log_start(task=task_name, env="data-cleaning-openenv", model=MODEL_NAME)
158
 
159
  try:
160
- for step_num in range(1, 50):
161
  if obs["done"]:
162
- success = obs["current_score"] >= 0.95
163
  break
164
 
165
  obs_text = obs_to_text(obs)
166
  history.append({"role": "user", "content": obs_text})
167
 
 
 
 
 
168
  try:
169
- response = client.chat.completions.create(
170
  model = MODEL_NAME,
171
  messages = [{"role": "system", "content": SYSTEM_PROMPT}] + history,
172
  temperature = 0.0,
173
- max_tokens = 256,
174
  )
175
  action_str = response.choices[0].message.content.strip()
176
  except Exception as exc:
@@ -193,13 +228,13 @@ def run_task(task_id: int) -> float:
193
  pass
194
 
195
  if action is None:
196
- print(f" Step {step_num}: Could not parse action JSON, skipping.", file=sys.stderr)
197
  log_step(step_num, action_str, -0.05, False, "json_parse_error")
198
  break
199
 
200
  action_label = json.dumps(action, separators=(",", ":"))
201
  print(
202
- f" Step {step_num:2d} | score={obs['current_score']:.4f} | action={action_label}",
203
  file=sys.stderr,
204
  )
205
 
@@ -207,13 +242,15 @@ def run_task(task_id: int) -> float:
207
  obs = result["observation"]
208
  step_reward = result["reward"]
209
  done = result["done"]
210
- error_msg = None if obs["message"].startswith("Fill") or step_reward >= 0 else obs["message"]
 
 
 
 
211
 
212
- print(f" -> {obs['message']}", file=sys.stderr)
213
 
214
- rewards.append(step_reward)
215
  steps_taken = step_num
216
-
217
  log_step(
218
  step = step_num,
219
  action = action_label,
@@ -223,49 +260,161 @@ def run_task(task_id: int) -> float:
223
  )
224
 
225
  if done:
226
- success = obs["current_score"] >= 0.95
227
  break
228
 
229
- time.sleep(0.3)
230
 
231
  finally:
232
- final = obs.get("current_score", 0.01) if isinstance(obs, dict) else 0.01
233
  log_end(task_name=task_name, score=final, steps=steps_taken)
234
 
235
  final_score = obs["current_score"]
 
236
  print(
237
- f"\n Task {task_id} final score: {final_score:.4f} (steps used: {obs['step_count']})",
 
238
  file=sys.stderr,
239
  )
240
  return final_score
241
 
242
 
243
  # ------------------------------------------------------------------
244
- # Main
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  # ------------------------------------------------------------------
246
 
247
  def main():
248
- print("Data Cleaning OpenEnv -- Baseline Inference", file=sys.stderr)
249
  print(f"Model : {MODEL_NAME}", file=sys.stderr)
250
  print(f"Env : {ENV_URL}", file=sys.stderr)
251
 
252
- # Smoke-test health endpoint
253
  try:
254
  health = api_get("/health")
255
  assert health.get("status") in ("ok", "healthy"), f"Unexpected status: {health}"
256
  print("Health check: OK\n", file=sys.stderr)
257
  except Exception as exc:
258
  print(f"[ERROR] Environment not reachable at {ENV_URL}: {exc}", file=sys.stderr)
259
- print("[ERROR] Make sure the server is running and ENV_URL is correct.", file=sys.stderr)
260
  sys.exit(1)
261
 
262
- scores = {}
263
- for task_id in [1, 2, 3]:
264
  try:
265
- scores[f"task{task_id}"] = run_task(task_id)
266
  except Exception as exc:
267
- print(f"[ERROR] Task {task_id} failed: {exc}", file=sys.stderr)
268
- scores[f"task{task_id}"] = 0.01
269
 
270
  print("\n" + "="*60, file=sys.stderr)
271
  print(" BASELINE RESULTS", file=sys.stderr)
@@ -276,11 +425,10 @@ def main():
276
  print(f" average: {avg:.4f}", file=sys.stderr)
277
  print("="*60, file=sys.stderr)
278
 
279
- # Write scores to file for automated validators
280
  with open("baseline_scores.json", "w") as f:
281
  json.dump({"scores": scores, "average": avg}, f, indent=2)
282
  print("\nScores written to baseline_scores.json", file=sys.stderr)
283
 
284
 
285
  if __name__ == "__main__":
286
- main()
 
1
  """
2
+ Baseline inference script for the OrgOS OpenEnv environment.
3
+ Runs all three workflows (A / B / C) and reports scores.
4
 
5
  Required environment variables:
6
  API_BASE_URL — LLM API endpoint (OpenAI-compatible)
7
+ MODEL_NAME — model identifier (default: gpt-4o-mini)
8
+ HF_TOKEN — API key for the LLM endpoint
9
  ENV_URL — environment server URL (default: http://localhost:8000)
10
 
11
  STDOUT FORMAT (OpenEnv spec):
12
+ [START] task=<workflow_name> env=orgos-openenv model=<model>
13
+ [STEP] step=<n> action=<json> reward=<0.00> done=<true|false> error=<msg|null>
14
+ [END] task=<workflow_name> score=<0.00> steps=<n>
15
  """
16
 
17
  import json
 
19
  import re
20
  import sys
21
  import time
22
+ from typing import AsyncGenerator, Dict, List, Optional
23
+
24
  import httpx
25
  from openai import OpenAI
26
 
 
31
  API_BASE_URL = os.environ.get("API_BASE_URL", "https://api.openai.com/v1")
32
  MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o-mini")
33
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
34
+ ENV_URL = os.environ.get("ENV_URL", "http://localhost:8000")
35
 
36
  if not HF_TOKEN:
37
  print("[WARNING] HF_TOKEN is not set — LLM calls may fail.", file=sys.stderr)
38
 
39
+ llm_client = OpenAI(api_key=HF_TOKEN or "sk-placeholder", base_url=API_BASE_URL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ # ------------------------------------------------------------------
42
+ # System prompt
43
+ # ------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ SYSTEM_PROMPT = """\
46
+ You are OrgOS Agent an enterprise workflow automation agent.
47
+ You operate across four SaaS applications: Jira, Zendesk, Salesforce, and Workday.
48
+
49
+ Each turn you receive a JSON observation with:
50
+ - workflow_goal : the task you must complete
51
+ - pending_steps : remaining steps in the workflow
52
+ - app_states : current state of each app
53
+ - schema_hints : field renames in effect this episode (e.g. {"jira.priority": "severity"})
54
+ - active_rules : current SLA / approval thresholds
55
+ - message : feedback from the last action
56
+ - current_score : your cumulative score (0.001–0.999)
57
+
58
+ Respond ONLY with a valid JSON object — no markdown, no explanation.
59
+
60
+ Action format:
61
+ {"app": "<app>", "operation": "<op>", "args": {...}}
62
+
63
+ Available apps and key operations:
64
+ jira: get_issue, create_issue, update_status, set_priority, assign_owner,
65
+ add_label, link_zendesk_ticket, close_issue, list_issues
66
+ zendesk: get_ticket, acknowledge_ticket, set_urgency, assign_agent,
67
+ escalate_to_jira, resolve_ticket, add_note, list_tickets
68
+ salesforce: get_account, list_accounts, update_deal_stage, flag_churn_risk,
69
+ assign_account_owner, log_interaction, get_opportunity
70
+ workday: get_employee, list_employees, provision_access, log_sla_event,
71
+ request_budget_approval, create_onboarding_task, complete_task
72
+
73
+ CRITICAL RULES:
74
+ 1. Read schema_hints FIRST — if "jira.priority" → "severity", use "severity" not "priority" in args.
75
+ 2. Complete ALL pending_steps in order.
76
+ 3. Do not repeat a successful action.
77
+ 4. If an operation fails, read the message carefully and adapt.
78
+ 5. Use list_* operations to discover record IDs when needed.
79
+ 6. Stop when pending_steps is empty or done=true.
80
+
81
+ Example actions:
82
+ {"app": "zendesk", "operation": "acknowledge_ticket", "args": {"ticket_number": "ZD-001"}}
83
+ {"app": "jira", "operation": "create_issue", "args": {"title": "Bug fix for ACME-001", "linked_zendesk": "ZD-001"}}
84
+ {"app": "salesforce", "operation": "get_account", "args": {"account_id": "ACME-001"}}
85
+ {"app": "workday", "operation": "log_sla_event", "args": {"ticket_id": "ZD-001", "sla_met": true}}
86
  """
87
 
88
+ WORKFLOW_NAMES = {
89
+ "A": "workflow-a-bug-fix",
90
+ "B": "workflow-b-onboarding",
91
+ "C": "workflow-c-churn-alert",
92
+ }
93
 
94
  # ------------------------------------------------------------------
95
  # OpenEnv stdout logging helpers
96
  # ------------------------------------------------------------------
97
 
98
+ def log_start(task: str, env_name: str, model: str) -> None:
99
+ print(f"[START] task={task} env={env_name} model={model}", flush=True)
100
 
101
 
102
  def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
103
  error_val = error if error else "null"
 
104
  print(
105
+ f"[STEP] step={step} action={action} reward={reward:.4f} "
106
+ f"done={str(done).lower()} error={error_val}",
107
  flush=True,
108
  )
109
 
110
 
111
  def log_end(task_name: str, score: float, steps: int) -> None:
112
+ safe_score = max(0.001, min(0.999, float(score)))
113
+ print(f"[END] task={task_name} score={safe_score:.4f} steps={steps}", flush=True)
 
 
 
114
 
115
 
116
  # ------------------------------------------------------------------
 
132
 
133
 
134
  # ------------------------------------------------------------------
135
+ # Observation formatter
136
  # ------------------------------------------------------------------
137
 
138
  def obs_to_text(obs: dict) -> str:
139
  lines = [
140
+ f"current_score: {obs['current_score']}",
141
+ f"step_count: {obs['step_count']}",
142
+ f"workflow_id: {obs['workflow_id']}",
143
+ "",
144
+ "=== WORKFLOW GOAL ===",
145
+ obs["workflow_goal"],
146
+ "",
147
+ "=== PENDING STEPS ===",
148
+ "\n".join(f" - {s}" for s in obs["pending_steps"]) or " (all steps complete!)",
149
+ "",
150
+ "=== SCHEMA HINTS (use these field names) ===",
151
+ json.dumps(obs["schema_hints"], indent=2) if obs["schema_hints"] else " (no drift — use canonical names)",
152
+ "",
153
+ "=== ACTIVE RULES ===",
154
+ json.dumps(obs["active_rules"], indent=2),
155
  "",
156
+ "=== LAST MESSAGE ===",
157
+ obs["message"],
158
  "",
159
+ "=== APP STATES ===",
 
160
  ]
161
+ for app_name, view in obs.get("app_states", {}).items():
162
+ lines.append(f" [{app_name.upper()}]")
163
+ lines.append(f" {view}")
164
+ lines.append("")
165
+ if obs.get("rule_violations"):
166
+ lines.append("=== RULE VIOLATIONS (fix these!) ===")
167
+ for v in obs["rule_violations"]:
168
+ lines.append(f" ⚠ {v}")
169
+ lines.append("")
170
  return "\n".join(lines)
171
 
172
 
173
+ # ------------------------------------------------------------------
174
+ # Single-workflow inference loop
175
+ # ------------------------------------------------------------------
176
+
177
+ def run_workflow(workflow_id: str) -> float:
178
+ task_name = WORKFLOW_NAMES.get(workflow_id, f"workflow-{workflow_id.lower()}")
179
 
 
180
  print(f"\n{'='*60}", file=sys.stderr)
181
+ print(f" Running Workflow {workflow_id}", file=sys.stderr)
182
  print(f"{'='*60}", file=sys.stderr)
183
 
184
+ result = api_post("/reset", {"workflow_id": workflow_id})
185
  obs = result["observation"]
186
+ history: List[dict] = []
 
187
  steps_taken = 0
 
188
 
189
+ log_start(task=task_name, env_name="orgos-openenv", model=MODEL_NAME)
190
 
191
  try:
192
+ for step_num in range(1, 60):
193
  if obs["done"]:
 
194
  break
195
 
196
  obs_text = obs_to_text(obs)
197
  history.append({"role": "user", "content": obs_text})
198
 
199
+ # Trim history to avoid context overflow
200
+ if len(history) > 20:
201
+ history = history[-20:]
202
+
203
  try:
204
+ response = llm_client.chat.completions.create(
205
  model = MODEL_NAME,
206
  messages = [{"role": "system", "content": SYSTEM_PROMPT}] + history,
207
  temperature = 0.0,
208
+ max_tokens = 300,
209
  )
210
  action_str = response.choices[0].message.content.strip()
211
  except Exception as exc:
 
228
  pass
229
 
230
  if action is None:
231
+ print(f" Step {step_num}: Could not parse action JSON.", file=sys.stderr)
232
  log_step(step_num, action_str, -0.05, False, "json_parse_error")
233
  break
234
 
235
  action_label = json.dumps(action, separators=(",", ":"))
236
  print(
237
+ f" Step {step_num:2d} | score={obs['current_score']:.4f} | {action_label}",
238
  file=sys.stderr,
239
  )
240
 
 
242
  obs = result["observation"]
243
  step_reward = result["reward"]
244
  done = result["done"]
245
+ error_msg = (
246
+ obs["message"]
247
+ if obs.get("rule_violations") or step_reward < 0
248
+ else None
249
+ )
250
 
251
+ print(f" {obs['message']}", file=sys.stderr)
252
 
 
253
  steps_taken = step_num
 
254
  log_step(
255
  step = step_num,
256
  action = action_label,
 
260
  )
261
 
262
  if done:
 
263
  break
264
 
265
+ time.sleep(0.2)
266
 
267
  finally:
268
+ final = obs.get("current_score", 0.001) if isinstance(obs, dict) else 0.001
269
  log_end(task_name=task_name, score=final, steps=steps_taken)
270
 
271
  final_score = obs["current_score"]
272
+ wf_done = not obs.get("pending_steps")
273
  print(
274
+ f"\n Workflow {workflow_id} final score: {final_score:.4f} "
275
+ f"steps: {obs['step_count']} completed: {wf_done}",
276
  file=sys.stderr,
277
  )
278
  return final_score
279
 
280
 
281
  # ------------------------------------------------------------------
282
+ # Async generator for SSE streaming from the UI
283
+ # ------------------------------------------------------------------
284
+
285
+ async def run_workflow_generator(
286
+ workflow_id: str = "A",
287
+ env_ref=None,
288
+ ) -> AsyncGenerator[dict, None]:
289
+ """
290
+ Async generator that runs one inference episode and yields
291
+ SSE-friendly event dicts for the dashboard UI.
292
+
293
+ Each yielded dict has a "type" key:
294
+ "reset" — episode started
295
+ "step" — one action taken
296
+ "done" — episode ended
297
+ "error" — something went wrong
298
+ """
299
+ import asyncio
300
+
301
+ if env_ref is None:
302
+ # Fall back to HTTP if no direct env reference
303
+ result = api_post("/reset", {"workflow_id": workflow_id})
304
+ else:
305
+ from models import OrgOSAction as _Action
306
+ obs_obj = env_ref.reset(workflow_id=workflow_id)
307
+ result = {"observation": obs_obj.model_dump(), "reward": obs_obj.reward, "done": False}
308
+
309
+ obs = result["observation"]
310
+ history: List[dict] = []
311
+
312
+ yield {"type": "reset", "observation": obs, "workflow_id": workflow_id}
313
+ await asyncio.sleep(0)
314
+
315
+ for step_num in range(1, 60):
316
+ if obs["done"]:
317
+ break
318
+
319
+ obs_text = obs_to_text(obs)
320
+ history.append({"role": "user", "content": obs_text})
321
+ if len(history) > 20:
322
+ history = history[-20:]
323
+
324
+ try:
325
+ response = llm_client.chat.completions.create(
326
+ model = MODEL_NAME,
327
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}] + history,
328
+ temperature = 0.0,
329
+ max_tokens = 300,
330
+ )
331
+ action_str = response.choices[0].message.content.strip()
332
+ except Exception as exc:
333
+ yield {"type": "error", "step": step_num, "message": str(exc)}
334
+ break
335
+
336
+ history.append({"role": "assistant", "content": action_str})
337
+
338
+ action = None
339
+ try:
340
+ action = json.loads(action_str)
341
+ except json.JSONDecodeError:
342
+ m = re.search(r"\{.*\}", action_str, re.DOTALL)
343
+ if m:
344
+ try:
345
+ action = json.loads(m.group())
346
+ except Exception:
347
+ pass
348
+
349
+ if action is None:
350
+ yield {"type": "error", "step": step_num, "message": "JSON parse error"}
351
+ break
352
+
353
+ if env_ref is None:
354
+ result = api_post("/step", action)
355
+ else:
356
+ from models import OrgOSAction as _Action
357
+ try:
358
+ act = _Action(**action)
359
+ obs_obj = env_ref.step(act)
360
+ result = {
361
+ "observation": obs_obj.model_dump(),
362
+ "reward": obs_obj.reward,
363
+ "done": obs_obj.done,
364
+ }
365
+ except Exception as exc:
366
+ yield {"type": "error", "step": step_num, "message": str(exc)}
367
+ break
368
+
369
+ obs = result["observation"]
370
+ step_reward = result["reward"]
371
+ done = result["done"]
372
+
373
+ yield {
374
+ "type": "step",
375
+ "step": step_num,
376
+ "action": action,
377
+ "observation": obs,
378
+ "reward": step_reward,
379
+ "done": done,
380
+ }
381
+ await asyncio.sleep(0)
382
+
383
+ if done:
384
+ break
385
+
386
+ yield {
387
+ "type": "done",
388
+ "final_score": obs.get("current_score", 0.001),
389
+ "steps": obs.get("step_count", step_num),
390
+ "completed": not obs.get("pending_steps"),
391
+ }
392
+
393
+
394
+ # ------------------------------------------------------------------
395
+ # Main — run all three workflows sequentially
396
  # ------------------------------------------------------------------
397
 
398
  def main():
399
+ print("OrgOS OpenEnv Baseline Inference", file=sys.stderr)
400
  print(f"Model : {MODEL_NAME}", file=sys.stderr)
401
  print(f"Env : {ENV_URL}", file=sys.stderr)
402
 
 
403
  try:
404
  health = api_get("/health")
405
  assert health.get("status") in ("ok", "healthy"), f"Unexpected status: {health}"
406
  print("Health check: OK\n", file=sys.stderr)
407
  except Exception as exc:
408
  print(f"[ERROR] Environment not reachable at {ENV_URL}: {exc}", file=sys.stderr)
 
409
  sys.exit(1)
410
 
411
+ scores: Dict[str, float] = {}
412
+ for wf_id in ["A", "B", "C"]:
413
  try:
414
+ scores[f"workflow_{wf_id}"] = run_workflow(wf_id)
415
  except Exception as exc:
416
+ print(f"[ERROR] Workflow {wf_id} failed: {exc}", file=sys.stderr)
417
+ scores[f"workflow_{wf_id}"] = 0.001
418
 
419
  print("\n" + "="*60, file=sys.stderr)
420
  print(" BASELINE RESULTS", file=sys.stderr)
 
425
  print(f" average: {avg:.4f}", file=sys.stderr)
426
  print("="*60, file=sys.stderr)
427
 
 
428
  with open("baseline_scores.json", "w") as f:
429
  json.dump({"scores": scores, "average": avg}, f, indent=2)
430
  print("\nScores written to baseline_scores.json", file=sys.stderr)
431
 
432
 
433
  if __name__ == "__main__":
434
+ main()
models.py CHANGED
@@ -1,38 +1,46 @@
1
  # models.py
 
 
 
 
 
2
 
3
  class OrgOSAction(BaseModel):
4
- app: str # "jira" | "zendesk" | "salesforce" | "workday"
5
- operation: str # app-specific operation name
6
  args: Dict[str, Any] = {}
7
 
 
8
  class RewardBreakdown(BaseModel):
9
- workflow_completion: float = 0.0 # 0.30 weight
10
- rule_compliance: float = 0.0 # 0.25 weight
11
- schema_adaptation: float = 0.0 # 0.20 weight
12
- efficiency: float = 0.0 # 0.15 weight
13
- policy_drift_handling: float = 0.0 # 0.10 weight
 
14
 
15
  class OrgOSObservation(BaseModel):
16
  done: bool
17
  reward: float
18
  current_score: float
19
- workflow_id: str # "A", "B", or "C"
20
  step_count: int
21
  # Per-app state views (what the agent sees)
22
- app_states: Dict[str, str] # app_name → CSV/JSON string preview
23
  # Workflow progress
24
  workflow_goal: str
25
  completed_steps: List[str]
26
  pending_steps: List[str]
27
  # Schema drift info (partial — agent must probe to discover rest)
28
- schema_hints: Dict[str, str] # e.g. {"jira.priority": "severity"}
29
  # Business rules in effect this episode
30
- active_rules: Dict[str, Any] # {"sla_p0_minutes": 15, "approval_threshold": 5000}
31
  # Per-step feedback
32
- rule_violations: List[str] # violations that just occurred
33
  reward_breakdown: RewardBreakdown
34
  message: str
35
 
 
36
  class OrgOSState(BaseModel):
37
  episode_id: str
38
  workflow_id: str
@@ -42,4 +50,4 @@ class OrgOSState(BaseModel):
42
  rule_violation_count: int
43
  workflow_completion: float
44
  rule_compliance_rate: float
45
- policy_drift_active: bool
 
1
  # models.py
2
+ """Pydantic models for the OrgOS OpenEnv environment."""
3
+
4
+ from typing import Any, Dict, List
5
+ from pydantic import BaseModel
6
+
7
 
8
  class OrgOSAction(BaseModel):
9
+ app: str # "jira" | "zendesk" | "salesforce" | "workday"
10
+ operation: str # app-specific operation name
11
  args: Dict[str, Any] = {}
12
 
13
+
14
  class RewardBreakdown(BaseModel):
15
+ workflow_completion: float = 0.0 # 0.30 weight
16
+ rule_compliance: float = 0.0 # 0.25 weight
17
+ schema_adaptation: float = 0.0 # 0.20 weight
18
+ efficiency: float = 0.0 # 0.15 weight
19
+ policy_drift_handling: float = 0.0 # 0.10 weight
20
+
21
 
22
  class OrgOSObservation(BaseModel):
23
  done: bool
24
  reward: float
25
  current_score: float
26
+ workflow_id: str # "A", "B", or "C"
27
  step_count: int
28
  # Per-app state views (what the agent sees)
29
+ app_states: Dict[str, str] # app_name → string preview
30
  # Workflow progress
31
  workflow_goal: str
32
  completed_steps: List[str]
33
  pending_steps: List[str]
34
  # Schema drift info (partial — agent must probe to discover rest)
35
+ schema_hints: Dict[str, str] # e.g. {"jira.priority": "severity"}
36
  # Business rules in effect this episode
37
+ active_rules: Dict[str, Any] # {"sla_p0_minutes": 15, ...}
38
  # Per-step feedback
39
+ rule_violations: List[str]
40
  reward_breakdown: RewardBreakdown
41
  message: str
42
 
43
+
44
  class OrgOSState(BaseModel):
45
  episode_id: str
46
  workflow_id: str
 
50
  rule_violation_count: int
51
  workflow_completion: float
52
  rule_compliance_rate: float
53
+ policy_drift_active: bool
server/app.py CHANGED
@@ -1,63 +1,110 @@
1
  """
2
- FastAPI application exposing the OpenEnv-compatible HTTP API.
3
- Endpoints: GET /health, GET /metadata, GET /schema,
4
- POST /reset, POST /step, GET /state, POST /state, GET /docs
 
 
 
 
 
 
 
 
 
 
5
  """
6
 
 
 
7
  from typing import Any, Dict, Optional
 
 
8
  from fastapi import Body, FastAPI, HTTPException
 
 
9
  from pydantic import BaseModel
10
- import uvicorn
11
 
12
- from models import DataCleaningAction, DataCleaningObservation, DataCleaningState
13
- from server.environment import DataCleaningEnvironment
 
 
 
 
 
14
 
15
  app = FastAPI(
16
- title="Data Cleaning OpenEnv",
17
- description="A real-world data cleaning environment for AI agent training.",
18
- version="0.1.0",
 
 
 
19
  )
20
 
21
- # Single shared environment instance (stateful server)
22
- env = DataCleaningEnvironment()
 
 
 
 
 
23
 
24
 
25
- # New reset body accepts workflow_id
 
 
 
26
  class ResetRequest(BaseModel):
27
- workflow_id: Optional[str] = None # "A", "B", "C", or None for round-robin
28
 
29
 
30
  class StepResponse(BaseModel):
31
- observation: DataCleaningObservation
32
  reward: float
33
  done: bool
34
  info: dict = {}
35
 
36
 
37
  # ------------------------------------------------------------------
38
- # Routes
39
  # ------------------------------------------------------------------
40
 
41
  @app.get("/health")
42
  def health():
43
- return {"status": "healthy"}
44
 
45
 
46
  @app.get("/metadata")
47
  def metadata():
48
  return {
49
- "name": "data-cleaning-env",
50
  "description": (
51
- "A real-world data cleaning environment where an AI agent fixes "
52
- "missing values, duplicate rows, format inconsistencies, outliers, "
53
- "and dtype errors across three progressively harder tasks."
 
54
  ),
55
- "version": "0.1.0",
56
- "tags": ["openenv", "data-cleaning", "rl", "real-world"],
57
- "tasks": [
58
- {"id": "task1", "name": "Fill Missing Values", "difficulty": "easy"},
59
- {"id": "task2", "name": "Fix Formats and Remove Duplicates", "difficulty": "medium"},
60
- {"id": "task3", "name": "Full Cleaning Pipeline", "difficulty": "hard"},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  ],
62
  }
63
 
@@ -68,58 +115,54 @@ def schema():
68
  "action": {
69
  "type": "object",
70
  "properties": {
71
- "operation": {
72
- "type": "string",
73
- "enum": [
74
- "fill_missing",
75
- "drop_duplicates",
76
- "fix_format",
77
- "replace_value",
78
- "drop_outliers",
79
- "fix_dtype",
80
- ],
81
- },
82
- "column": {"type": "string", "nullable": True},
83
- "params": {"type": "object", "nullable": True},
84
  },
85
- "required": ["operation"],
86
  },
87
  "observation": {
88
  "type": "object",
89
  "properties": {
90
- "done": {"type": "boolean"},
91
- "reward": {"type": "number"},
92
- "data_preview": {"type": "string"},
93
- "data_shape": {"type": "array", "items": {"type": "integer"}},
94
- "missing_counts": {"type": "object"},
95
- "duplicate_count": {"type": "integer"},
96
- "dtype_issues": {"type": "object"},
97
- "task_description": {"type": "string"},
98
- "message": {"type": "string"},
99
- "step_count": {"type": "integer"},
100
- "current_score": {"type": "number"},
 
 
 
101
  },
102
  },
103
  "state": {
104
  "type": "object",
105
  "properties": {
106
- "episode_id": {"type": "string"},
107
- "task_id": {"type": "integer"},
108
- "step_count": {"type": "integer"},
109
- "max_steps": {"type": "integer"},
110
- "total_errors": {"type": "integer"},
111
- "errors_remaining": {"type": "integer"},
 
 
 
112
  },
113
  },
114
  }
115
 
116
 
117
  @app.post("/reset", response_model=StepResponse)
118
- def reset(req: ResetRequest = ResetRequest()):
119
  try:
120
- obs = env.reset(task_id=req.task_id)
121
- except ValueError as e:
122
- raise HTTPException(status_code=400, detail=str(e))
123
  return StepResponse(observation=obs, reward=obs.reward, done=False)
124
 
125
 
@@ -127,49 +170,108 @@ def reset(req: ResetRequest = ResetRequest()):
127
  async def step(body: Dict[str, Any] = Body(...)):
128
  """
129
  Accept both openenv-core wrapped format:
130
- {"action": {"operation": "...", ...}, "timeout_s": 15}
131
- and direct format (for backward compat with our own client/inference):
132
- {"operation": "...", "column": "...", "params": {...}}
133
  """
134
  action_data = body.get("action", body)
135
  try:
136
- action = DataCleaningAction(**action_data)
137
- obs = env.step(action)
138
- except (TypeError, KeyError, Exception) as e:
139
- raise HTTPException(status_code=400, detail=str(e))
140
  return StepResponse(observation=obs, reward=obs.reward, done=obs.done)
141
 
142
 
143
- @app.get("/state", response_model=DataCleaningState)
144
  def state_get():
145
  """GET /state — openenv-core spec."""
146
  return env.state()
147
 
148
 
149
- @app.post("/state", response_model=DataCleaningState)
150
  def state_post():
151
  """POST /state — backward compatibility."""
152
  return env.state()
153
 
154
 
155
-
156
- @app.get("/", response_class=HTMLResponse)
157
- def ui():
158
- """Serve the demo dashboard."""
159
- return FileResponse("ui/index.html")
160
 
161
  @app.get("/schema/apps")
162
  def app_schemas():
163
- """Return the canonical action space per app used by the UI."""
164
- return {...} # maps app → list of operations + their arg schemas
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
 
167
  # ------------------------------------------------------------------
168
- # Entry point (required by openenv-core and [project.scripts])
169
  # ------------------------------------------------------------------
170
 
171
  def main():
172
- uvicorn.run("server.app:app", host="0.0.0.0", port=8000)
173
 
174
 
175
  if __name__ == "__main__":
 
1
  """
2
+ FastAPI application OrgOS OpenEnv HTTP API.
3
+
4
+ Endpoints (OpenEnv-compatible):
5
+ GET /health — liveness probe
6
+ GET /metadata — env description
7
+ GET /schema — action / observation schema
8
+ POST /reset — start new episode
9
+ POST /step — take one action
10
+ GET /state — current episode metadata
11
+ POST /state — same (backward compat)
12
+ GET /schema/apps — per-app operation catalogue (used by UI)
13
+ GET / — serve the demo dashboard UI
14
+ GET /ui/run-agent — SSE stream of one inference episode (for UI)
15
  """
16
 
17
+ import json
18
+ import os
19
  from typing import Any, Dict, Optional
20
+
21
+ import uvicorn
22
  from fastapi import Body, FastAPI, HTTPException
23
+ from fastapi.responses import FileResponse, HTMLResponse, StreamingResponse
24
+ from fastapi.staticfiles import StaticFiles
25
  from pydantic import BaseModel
 
26
 
27
+ from models import OrgOSAction, OrgOSObservation, OrgOSState
28
+ from server.environment import OrgOSEnvironment
29
+
30
+
31
+ # ------------------------------------------------------------------
32
+ # App setup
33
+ # ------------------------------------------------------------------
34
 
35
  app = FastAPI(
36
+ title="OrgOS Multi-App Enterprise RL Environment",
37
+ description=(
38
+ "A Salesforce + Zendesk + Jira + Workday simulator for training agents "
39
+ "that handle real enterprise workflows under schema drift and policy changes."
40
+ ),
41
+ version="2.0.0",
42
  )
43
 
44
+ # Mount static assets (JS, CSS) if the ui/ directory exists
45
+ _UI_STATIC = os.path.join(os.path.dirname(__file__), "..", "ui", "static")
46
+ if os.path.isdir(_UI_STATIC):
47
+ app.mount("/static", StaticFiles(directory=_UI_STATIC), name="static")
48
+
49
+ # Single shared environment instance (stateful per-process)
50
+ env = OrgOSEnvironment()
51
 
52
 
53
+ # ------------------------------------------------------------------
54
+ # Request / response helpers
55
+ # ------------------------------------------------------------------
56
+
57
  class ResetRequest(BaseModel):
58
+ workflow_id: Optional[str] = None # "A", "B", "C", or None for round-robin
59
 
60
 
61
  class StepResponse(BaseModel):
62
+ observation: OrgOSObservation
63
  reward: float
64
  done: bool
65
  info: dict = {}
66
 
67
 
68
  # ------------------------------------------------------------------
69
+ # Core OpenEnv routes
70
  # ------------------------------------------------------------------
71
 
72
  @app.get("/health")
73
  def health():
74
+ return {"status": "healthy", "env": "orgos", "version": "2.0.0"}
75
 
76
 
77
  @app.get("/metadata")
78
  def metadata():
79
  return {
80
+ "name": "orgos-openenv",
81
  "description": (
82
+ "OrgOS: multi-app enterprise RL environment. "
83
+ "The agent completes cross-app business workflows (triage, onboarding, churn) "
84
+ "across Jira, Zendesk, Salesforce, and Workday simulators. "
85
+ "Schema drift and policy changes challenge the agent to generalise."
86
  ),
87
+ "version": "2.0.0",
88
+ "tags": ["openenv", "enterprise", "multi-app", "schema-drift", "rl"],
89
+ "workflows": [
90
+ {
91
+ "id": "A",
92
+ "name": "Customer Bug Fix",
93
+ "difficulty": "medium",
94
+ "apps": ["zendesk", "jira", "salesforce", "workday"],
95
+ },
96
+ {
97
+ "id": "B",
98
+ "name": "Employee Onboarding",
99
+ "difficulty": "medium",
100
+ "apps": ["workday", "salesforce", "zendesk"],
101
+ },
102
+ {
103
+ "id": "C",
104
+ "name": "Churn Risk Alert",
105
+ "difficulty": "hard",
106
+ "apps": ["salesforce", "zendesk", "jira"],
107
+ },
108
  ],
109
  }
110
 
 
115
  "action": {
116
  "type": "object",
117
  "properties": {
118
+ "app": {"type": "string", "enum": ["jira", "zendesk", "salesforce", "workday"]},
119
+ "operation": {"type": "string", "description": "App-specific operation name"},
120
+ "args": {"type": "object", "description": "Operation arguments"},
 
 
 
 
 
 
 
 
 
 
121
  },
122
+ "required": ["app", "operation"],
123
  },
124
  "observation": {
125
  "type": "object",
126
  "properties": {
127
+ "done": {"type": "boolean"},
128
+ "reward": {"type": "number"},
129
+ "current_score": {"type": "number"},
130
+ "workflow_id": {"type": "string"},
131
+ "step_count": {"type": "integer"},
132
+ "app_states": {"type": "object"},
133
+ "workflow_goal": {"type": "string"},
134
+ "completed_steps": {"type": "array"},
135
+ "pending_steps": {"type": "array"},
136
+ "schema_hints": {"type": "object"},
137
+ "active_rules": {"type": "object"},
138
+ "rule_violations": {"type": "array"},
139
+ "reward_breakdown":{"type": "object"},
140
+ "message": {"type": "string"},
141
  },
142
  },
143
  "state": {
144
  "type": "object",
145
  "properties": {
146
+ "episode_id": {"type": "string"},
147
+ "workflow_id": {"type": "string"},
148
+ "schema_versions": {"type": "object"},
149
+ "step_count": {"type": "integer"},
150
+ "max_steps": {"type": "integer"},
151
+ "rule_violation_count": {"type": "integer"},
152
+ "workflow_completion": {"type": "number"},
153
+ "rule_compliance_rate": {"type": "number"},
154
+ "policy_drift_active": {"type": "boolean"},
155
  },
156
  },
157
  }
158
 
159
 
160
  @app.post("/reset", response_model=StepResponse)
161
+ def reset(req: ResetRequest = Body(default=ResetRequest())):
162
  try:
163
+ obs = env.reset(workflow_id=req.workflow_id)
164
+ except (ValueError, KeyError) as exc:
165
+ raise HTTPException(status_code=400, detail=str(exc))
166
  return StepResponse(observation=obs, reward=obs.reward, done=False)
167
 
168
 
 
170
  async def step(body: Dict[str, Any] = Body(...)):
171
  """
172
  Accept both openenv-core wrapped format:
173
+ {"action": {"app": "...", "operation": "...", "args": {...}}, "timeout_s": 15}
174
+ and direct format:
175
+ {"app": "...", "operation": "...", "args": {...}}
176
  """
177
  action_data = body.get("action", body)
178
  try:
179
+ action = OrgOSAction(**action_data)
180
+ obs = env.step(action)
181
+ except (TypeError, KeyError, Exception) as exc:
182
+ raise HTTPException(status_code=400, detail=str(exc))
183
  return StepResponse(observation=obs, reward=obs.reward, done=obs.done)
184
 
185
 
186
+ @app.get("/state", response_model=OrgOSState)
187
  def state_get():
188
  """GET /state — openenv-core spec."""
189
  return env.state()
190
 
191
 
192
+ @app.post("/state", response_model=OrgOSState)
193
  def state_post():
194
  """POST /state — backward compatibility."""
195
  return env.state()
196
 
197
 
198
+ # ------------------------------------------------------------------
199
+ # UI helper routes
200
+ # ------------------------------------------------------------------
 
 
201
 
202
  @app.get("/schema/apps")
203
  def app_schemas():
204
+ """Return per-app operation catalogue. Used by the dashboard UI."""
205
+ from server.apps.jira import JiraApp
206
+ from server.apps.zendesk import ZendeskApp
207
+ from server.apps.salesforce import SalesforceApp
208
+ from server.apps.workday import WorkdayApp
209
+ return {
210
+ "jira": {"operations": JiraApp.OPERATIONS},
211
+ "zendesk": {"operations": ZendeskApp.OPERATIONS},
212
+ "salesforce": {"operations": SalesforceApp.OPERATIONS},
213
+ "workday": {"operations": WorkdayApp.OPERATIONS},
214
+ }
215
+
216
+
217
+ @app.get("/ui/run-agent")
218
+ async def run_agent_sse(workflow_id: str = "A", model: str = "gpt-4o-mini"):
219
+ """
220
+ Server-Sent Events stream.
221
+ Runs one inference episode and streams step events to the UI.
222
+ Each event is: data: <json>\n\n
223
+ """
224
+ import asyncio
225
+
226
+ async def _event_stream():
227
+ import json as _json
228
+ from inference import run_workflow_generator
229
+ try:
230
+ async for event in run_workflow_generator(workflow_id=workflow_id, env_ref=env):
231
+ yield f"data: {_json.dumps(event)}\n\n"
232
+ await asyncio.sleep(0) # yield control
233
+ except Exception as exc:
234
+ yield f"data: {_json.dumps({'type': 'error', 'message': str(exc)})}\n\n"
235
+ yield "data: {\"type\": \"done\"}\n\n"
236
+
237
+ return StreamingResponse(
238
+ _event_stream(),
239
+ media_type="text/event-stream",
240
+ headers={
241
+ "Cache-Control": "no-cache",
242
+ "X-Accel-Buffering": "no",
243
+ },
244
+ )
245
+
246
+
247
+ @app.get("/", response_class=HTMLResponse)
248
+ def ui():
249
+ """Serve the OrgOS demo dashboard."""
250
+ ui_path = os.path.join(os.path.dirname(__file__), "..", "ui", "index.html")
251
+ if os.path.exists(ui_path):
252
+ return FileResponse(ui_path, media_type="text/html")
253
+ # Minimal inline fallback if ui/ hasn't been built yet
254
+ return HTMLResponse(content="""
255
+ <!DOCTYPE html>
256
+ <html lang="en">
257
+ <head><meta charset="UTF-8"><title>OrgOS Dashboard</title>
258
+ <style>body{font-family:monospace;background:#0f172a;color:#94a3b8;padding:2rem}
259
+ h1{color:#38bdf8}a{color:#38bdf8}</style></head>
260
+ <body>
261
+ <h1>OrgOS — Enterprise RL Environment</h1>
262
+ <p>The full dashboard UI is at <code>ui/index.html</code>.</p>
263
+ <p>API docs: <a href="/docs">/docs</a> &nbsp;|&nbsp;
264
+ Health: <a href="/health">/health</a></p>
265
+ </body></html>
266
+ """)
267
 
268
 
269
  # ------------------------------------------------------------------
270
+ # Entry point
271
  # ------------------------------------------------------------------
272
 
273
  def main():
274
+ uvicorn.run("server.app:app", host="0.0.0.0", port=8000, reload=False)
275
 
276
 
277
  if __name__ == "__main__":
server/apps/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ """OrgOS app modules — 4 mock enterprise applications."""
2
+
3
+ from server.apps.jira import JiraApp
4
+ from server.apps.zendesk import ZendeskApp
5
+ from server.apps.salesforce import SalesforceApp
6
+ from server.apps.workday import WorkdayApp
7
+
8
+ __all__ = ["JiraApp", "ZendeskApp", "SalesforceApp", "WorkdayApp"]
server/apps/base_app.py CHANGED
@@ -1,19 +1,58 @@
 
 
 
 
 
 
 
 
1
  class BaseApp(ABC):
2
  APP_NAME: str = ""
3
 
4
- # --- Core interface every app must implement ---
 
 
 
 
 
 
5
  @abstractmethod
6
  def initialize(self, records: List[Dict]) -> None:
7
  """Load synthetic records into in-memory state."""
8
 
9
  @abstractmethod
10
  def execute(self, operation: str, args: Dict) -> Dict:
11
- """Execute an operation. Returns {"success": bool, "data": ..., "message": str}"""
 
 
 
 
 
 
12
 
13
  @abstractmethod
14
  def get_state_view(self, max_rows: int = 5) -> str:
15
- """Return agent-visible snapshot as a compact string."""
16
 
17
  @abstractmethod
18
  def count_open_items(self) -> int:
19
- """Count pending/open work items (used by grader)."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Abstract base class for all OrgOS app modules."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Dict, List, Optional, Tuple
5
+
6
+ from server.schema_drift import SchemaDriftEngine
7
+
8
+
9
  class BaseApp(ABC):
10
  APP_NAME: str = ""
11
 
12
+ def __init__(self, drift: SchemaDriftEngine):
13
+ self._drift = drift
14
+
15
+ # ------------------------------------------------------------------
16
+ # Core interface — every app must implement these
17
+ # ------------------------------------------------------------------
18
+
19
  @abstractmethod
20
  def initialize(self, records: List[Dict]) -> None:
21
  """Load synthetic records into in-memory state."""
22
 
23
  @abstractmethod
24
  def execute(self, operation: str, args: Dict) -> Dict:
25
+ """
26
+ Execute an operation.
27
+ Returns dict with at minimum:
28
+ {"success": bool, "message": str}
29
+ May also include:
30
+ {"data": ..., "schema_error": str, "schema_adapted": bool, "ticket": dict}
31
+ """
32
 
33
  @abstractmethod
34
  def get_state_view(self, max_rows: int = 5) -> str:
35
+ """Return agent-visible snapshot as a compact multi-line string."""
36
 
37
  @abstractmethod
38
  def count_open_items(self) -> int:
39
+ """Count pending/open work items (used by grader)."""
40
+
41
+ # ------------------------------------------------------------------
42
+ # Shared helpers available to all concrete apps
43
+ # ------------------------------------------------------------------
44
+
45
+ def _check_schema_drift(self, args: Dict) -> Tuple[Optional[str], bool]:
46
+ """
47
+ Delegate to the drift engine to check if args use stale canonical names.
48
+ Returns (schema_error_field_or_None, schema_adapted_bool).
49
+ """
50
+ return self._drift.check_args_for_drift(args, self.APP_NAME)
51
+
52
+ def _to_agent_view(self, record: Dict) -> Dict:
53
+ """Translate a canonical record to the agent-visible drifted representation."""
54
+ return self._drift.translate_record(record, self.APP_NAME)
55
+
56
+ def _compact(self, record: Dict, fields: List[str]) -> Dict:
57
+ """Return only the specified fields from a (possibly drifted) record."""
58
+ return {k: v for k, v in record.items() if k in fields and v is not None}
server/apps/jira.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Jira-like app — engineering ticket management."""
2
+
3
+ from typing import Dict, List, Optional
4
+ from server.apps.base_app import BaseApp
5
+ from server.schema_drift import SchemaDriftEngine
6
+
7
+
8
+ class JiraApp(BaseApp):
9
+ APP_NAME = "jira"
10
+
11
+ OPERATIONS = [
12
+ "get_issue", "create_issue", "update_status", "set_priority",
13
+ "assign_owner", "add_label", "link_zendesk_ticket", "close_issue", "list_issues",
14
+ ]
15
+
16
+ def __init__(self, drift: SchemaDriftEngine):
17
+ super().__init__(drift)
18
+ self._records: Dict[str, Dict] = {}
19
+ # Workflow completion state tracking
20
+ self._linked_issues: set = set() # issue_ids linked to a Zendesk ticket
21
+ self._assigned_issues: set = set() # issue_ids with a non-null assignee
22
+ self._bugs_checked: bool = False # list_issues was called (Workflow C)
23
+
24
+ # ------------------------------------------------------------------
25
+ # BaseApp interface
26
+ # ------------------------------------------------------------------
27
+
28
+ def initialize(self, records: List[Dict]) -> None:
29
+ self._records = {r["issue_id"]: r for r in records}
30
+ self._linked_issues.clear()
31
+ self._assigned_issues.clear()
32
+ self._bugs_checked = False
33
+ # Seed state from loaded data
34
+ for issue_id, rec in self._records.items():
35
+ if rec.get("assignee"):
36
+ self._assigned_issues.add(issue_id)
37
+ if rec.get("linked_zendesk"):
38
+ self._linked_issues.add(issue_id)
39
+
40
+ def execute(self, operation: str, args: Dict) -> Dict:
41
+ method = getattr(self, f"_op_{operation}", None)
42
+ if method is None:
43
+ return {
44
+ "success": False,
45
+ "message": f"Unknown operation '{operation}'. Available: {', '.join(self.OPERATIONS)}",
46
+ }
47
+ try:
48
+ return method(**args)
49
+ except TypeError as exc:
50
+ return {"success": False, "message": f"Bad args for '{operation}': {exc}"}
51
+
52
+ def get_state_view(self, max_rows: int = 5) -> str:
53
+ open_issues = [r for r in self._records.values()
54
+ if r.get("status") not in ("closed",)][:max_rows]
55
+ if not open_issues:
56
+ return "No open issues."
57
+ lines = []
58
+ for rec in open_issues:
59
+ view = self._to_agent_view(rec)
60
+ keep = ["issue_id", "title",
61
+ "priority", "severity", "urgency_level",
62
+ "assignee", "owner", "assigned_to",
63
+ "status", "state", "current_state",
64
+ "customer_id", "linked_zendesk"]
65
+ compact = {k: v for k, v in view.items() if k in keep and v is not None}
66
+ lines.append(str(compact))
67
+ return "\n".join(lines)
68
+
69
+ def count_open_items(self) -> int:
70
+ return sum(1 for r in self._records.values() if r.get("status") != "closed")
71
+
72
+ # ------------------------------------------------------------------
73
+ # Workflow completion state checks
74
+ # ------------------------------------------------------------------
75
+
76
+ def has_linked_issue(self) -> bool:
77
+ """True once any issue is linked to a Zendesk ticket (Workflow A step A2)."""
78
+ return len(self._linked_issues) > 0
79
+
80
+ def issue_assigned(self) -> bool:
81
+ """True once JIRA-001 (primary bug) has an assignee (Workflow A step A4)."""
82
+ return bool(self._records.get("JIRA-001", {}).get("assignee"))
83
+
84
+ def bugs_checked(self) -> bool:
85
+ """True once list_issues has been called (Workflow C step C3)."""
86
+ return self._bugs_checked
87
+
88
+ # ------------------------------------------------------------------
89
+ # Operations
90
+ # ------------------------------------------------------------------
91
+
92
+ def _op_get_issue(self, issue_id: str) -> Dict:
93
+ rec = self._records.get(issue_id)
94
+ if not rec:
95
+ return {"success": False, "message": f"Issue {issue_id} not found. Use list_issues to browse."}
96
+ return {"success": True, "data": self._to_agent_view(rec),
97
+ "message": f"Retrieved {issue_id}"}
98
+
99
+ def _op_create_issue(self, title: str, **kwargs) -> Dict:
100
+ schema_error, schema_adapted = self._check_schema_drift(kwargs)
101
+ if schema_error:
102
+ return {
103
+ "success": False,
104
+ "schema_error": schema_error,
105
+ "message": (f"Schema error: field '{schema_error}' is not in the current schema. "
106
+ f"Check schema_hints for the correct field name."),
107
+ }
108
+
109
+ issue_id = f"JIRA-{len(self._records) + 1:03d}"
110
+ # Accept both canonical and drifted names for priority / assignee
111
+ priority = (kwargs.get("priority") or kwargs.get("severity")
112
+ or kwargs.get("urgency_level", "p2"))
113
+ linked = kwargs.get("linked_zendesk") or kwargs.get("zendesk_ticket")
114
+
115
+ rec = {
116
+ "issue_id": issue_id,
117
+ "title": title,
118
+ "priority": priority,
119
+ "assignee": kwargs.get("assignee") or kwargs.get("owner") or kwargs.get("assigned_to"),
120
+ "status": "open",
121
+ "reporter": kwargs.get("reporter", "agent"),
122
+ "customer_id": kwargs.get("customer_id"),
123
+ "linked_zendesk": linked,
124
+ "labels": [],
125
+ "created_at": "2026-04-21T09:00:00",
126
+ }
127
+ self._records[issue_id] = rec
128
+
129
+ if linked:
130
+ self._linked_issues.add(issue_id)
131
+ if rec["assignee"]:
132
+ self._assigned_issues.add(issue_id)
133
+
134
+ return {
135
+ "success": True,
136
+ "data": {"issue_id": issue_id},
137
+ "schema_adapted": schema_adapted,
138
+ "message": f"Created {issue_id}: '{title}'"
139
+ + (f" linked to {linked}" if linked else ""),
140
+ }
141
+
142
+ def _op_update_status(self, issue_id: str, **kwargs) -> Dict:
143
+ schema_error, schema_adapted = self._check_schema_drift(kwargs)
144
+ if schema_error:
145
+ return {"success": False, "schema_error": schema_error,
146
+ "message": f"Schema error: use current field name, not '{schema_error}'"}
147
+
148
+ rec = self._records.get(issue_id)
149
+ if not rec:
150
+ return {"success": False, "message": f"Issue {issue_id} not found"}
151
+
152
+ new_status = (kwargs.get("status") or kwargs.get("state")
153
+ or kwargs.get("current_state"))
154
+ if not new_status:
155
+ return {"success": False, "message": "Provide status/state/current_state value"}
156
+
157
+ rec["status"] = new_status
158
+ return {"success": True, "schema_adapted": schema_adapted,
159
+ "message": f"{issue_id} status → '{new_status}'"}
160
+
161
+ def _op_set_priority(self, issue_id: str, **kwargs) -> Dict:
162
+ schema_error, schema_adapted = self._check_schema_drift(kwargs)
163
+ if schema_error:
164
+ return {"success": False, "schema_error": schema_error,
165
+ "message": f"Schema error: '{schema_error}' is a stale field name"}
166
+
167
+ rec = self._records.get(issue_id)
168
+ if not rec:
169
+ return {"success": False, "message": f"Issue {issue_id} not found"}
170
+
171
+ new_priority = (kwargs.get("priority") or kwargs.get("severity")
172
+ or kwargs.get("urgency_level"))
173
+ if not new_priority:
174
+ return {"success": False,
175
+ "message": "Provide priority / severity / urgency_level value"}
176
+
177
+ rec["priority"] = new_priority
178
+ return {"success": True, "schema_adapted": schema_adapted,
179
+ "message": f"{issue_id} priority → '{new_priority}'"}
180
+
181
+ def _op_assign_owner(self, issue_id: str, **kwargs) -> Dict:
182
+ schema_error, schema_adapted = self._check_schema_drift(kwargs)
183
+ if schema_error:
184
+ hint = self._drift.translate_field("assignee", self.APP_NAME)
185
+ return {"success": False, "schema_error": schema_error,
186
+ "message": f"Schema error: use '{hint}' instead of '{schema_error}'"}
187
+
188
+ rec = self._records.get(issue_id)
189
+ if not rec:
190
+ return {"success": False, "message": f"Issue {issue_id} not found"}
191
+
192
+ assignee = (kwargs.get("assignee") or kwargs.get("owner")
193
+ or kwargs.get("assigned_to"))
194
+ if not assignee:
195
+ return {"success": False,
196
+ "message": "Provide assignee / owner / assigned_to value"}
197
+
198
+ rec["assignee"] = assignee
199
+ self._assigned_issues.add(issue_id)
200
+ return {"success": True, "schema_adapted": schema_adapted,
201
+ "message": f"{issue_id} assigned to '{assignee}'"}
202
+
203
+ def _op_add_label(self, issue_id: str, label: str) -> Dict:
204
+ rec = self._records.get(issue_id)
205
+ if not rec:
206
+ return {"success": False, "message": f"Issue {issue_id} not found"}
207
+ rec.setdefault("labels", []).append(label)
208
+ return {"success": True, "message": f"Added label '{label}' to {issue_id}"}
209
+
210
+ def _op_link_zendesk_ticket(self, issue_id: str, zendesk_ticket_number: str) -> Dict:
211
+ rec = self._records.get(issue_id)
212
+ if not rec:
213
+ return {"success": False, "message": f"Issue {issue_id} not found"}
214
+ rec["linked_zendesk"] = zendesk_ticket_number
215
+ self._linked_issues.add(issue_id)
216
+ return {"success": True,
217
+ "message": f"Linked {issue_id} ↔ Zendesk {zendesk_ticket_number}"}
218
+
219
+ def _op_close_issue(self, issue_id: str) -> Dict:
220
+ rec = self._records.get(issue_id)
221
+ if not rec:
222
+ return {"success": False, "message": f"Issue {issue_id} not found"}
223
+ rec["status"] = "closed"
224
+ return {"success": True, "message": f"Closed {issue_id}"}
225
+
226
+ def _op_list_issues(self, status: str = "open", customer_id: Optional[str] = None,
227
+ limit: int = 10) -> Dict:
228
+ self._bugs_checked = True
229
+ matching = [
230
+ r for r in self._records.values()
231
+ if (status == "all" or r.get("status") == status)
232
+ and (customer_id is None or r.get("customer_id") == customer_id)
233
+ ][:limit]
234
+ drifted = [self._to_agent_view(r) for r in matching]
235
+ keep = ["issue_id", "title", "priority", "severity", "urgency_level",
236
+ "assignee", "owner", "assigned_to",
237
+ "status", "state", "current_state",
238
+ "customer_id", "linked_zendesk"]
239
+ compact = [{k: v for k, v in r.items() if k in keep and v is not None}
240
+ for r in drifted]
241
+ return {"success": True, "data": compact,
242
+ "message": f"Found {len(compact)} {status} issues"
243
+ + (f" for {customer_id}" if customer_id else "")}
server/apps/salesforce.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Salesforce-like app — CRM account and pipeline management."""
2
+
3
+ from typing import Dict, List, Optional
4
+ from server.apps.base_app import BaseApp
5
+ from server.schema_drift import SchemaDriftEngine
6
+
7
+
8
+ class SalesforceApp(BaseApp):
9
+ APP_NAME = "salesforce"
10
+
11
+ OPERATIONS = [
12
+ "get_account", "list_accounts", "update_deal_stage", "flag_churn_risk",
13
+ "assign_account_owner", "log_interaction", "get_opportunity",
14
+ ]
15
+
16
+ def __init__(self, drift: SchemaDriftEngine):
17
+ super().__init__(drift)
18
+ self._records: Dict[str, Dict] = {}
19
+
20
+ # ------------------------------------------------------------------
21
+ # BaseApp interface
22
+ # ------------------------------------------------------------------
23
+
24
+ def initialize(self, records: List[Dict]) -> None:
25
+ self._records = {r["account_id"]: r for r in records}
26
+
27
+ def execute(self, operation: str, args: Dict) -> Dict:
28
+ method = getattr(self, f"_op_{operation}", None)
29
+ if method is None:
30
+ return {
31
+ "success": False,
32
+ "message": f"Unknown operation '{operation}'. Available: {', '.join(self.OPERATIONS)}",
33
+ }
34
+ try:
35
+ return method(**args)
36
+ except TypeError as exc:
37
+ return {"success": False, "message": f"Bad args for '{operation}': {exc}"}
38
+
39
+ def get_state_view(self, max_rows: int = 5) -> str:
40
+ at_risk = [r for r in self._records.values()
41
+ if r.get("health") in ("red", "yellow")][:max_rows]
42
+ sample = at_risk or list(self._records.values())[:max_rows]
43
+ if not sample:
44
+ return "No accounts loaded."
45
+ lines = []
46
+ for rec in sample:
47
+ view = self._to_agent_view(rec)
48
+ keep = ["account_id", "company_name",
49
+ "deal_stage", "pipeline_stage", "stage",
50
+ "health", "account_health", "risk_score",
51
+ "owner", "owner_name", "account_owner", "rep_email",
52
+ "arr", "annual_recurring_revenue",
53
+ "is_paying", "territory"]
54
+ compact = {k: v for k, v in view.items() if k in keep and v is not None}
55
+ lines.append(str(compact))
56
+ return "\n".join(lines)
57
+
58
+ def count_open_items(self) -> int:
59
+ return sum(1 for r in self._records.values()
60
+ if r.get("health") in ("red", "yellow") or
61
+ r.get("deal_stage") in ("prospect", "qualification", "negotiation"))
62
+
63
+ # ------------------------------------------------------------------
64
+ # Workflow completion state checks
65
+ # ------------------------------------------------------------------
66
+
67
+ def account_checked(self) -> bool:
68
+ """True once get_account was called for ACME-001 (Workflow A step A3)."""
69
+ return bool(self._records.get("ACME-001", {}).get("_account_checked"))
70
+
71
+ def churn_flagged(self) -> bool:
72
+ """True once flag_churn_risk was called for ACME-003 (Workflow C step C1)."""
73
+ return bool(self._records.get("ACME-003", {}).get("_churn_flagged"))
74
+
75
+ def team_assigned(self) -> bool:
76
+ """True once assign_account_owner was called (Workflow B step B3)."""
77
+ return any(r.get("_team_assigned") for r in self._records.values())
78
+
79
+ def intervention_assigned(self) -> bool:
80
+ """True once assign_account_owner called on ACME-003 (Workflow C step C4)."""
81
+ return bool(self._records.get("ACME-003", {}).get("_intervention_assigned"))
82
+
83
+ # ------------------------------------------------------------------
84
+ # Operations
85
+ # ------------------------------------------------------------------
86
+
87
+ def _op_get_account(self, account_id: str) -> Dict:
88
+ rec = self._records.get(account_id)
89
+ if not rec:
90
+ return {"success": False,
91
+ "message": f"Account {account_id} not found. Use list_accounts to browse."}
92
+ rec["_account_checked"] = True
93
+ return {"success": True, "data": self._to_agent_view(rec),
94
+ "message": f"Retrieved account {account_id} ({rec.get('company_name', '')})"}
95
+
96
+ def _op_list_accounts(self, health: Optional[str] = None,
97
+ territory: Optional[str] = None,
98
+ limit: int = 10) -> Dict:
99
+ matching = [
100
+ r for r in self._records.values()
101
+ if (health is None or r.get("health") == health)
102
+ and (territory is None or r.get("territory") == territory)
103
+ ][:limit]
104
+ drifted = [self._to_agent_view(r) for r in matching]
105
+ keep = ["account_id", "company_name",
106
+ "deal_stage", "pipeline_stage", "stage",
107
+ "health", "account_health", "risk_score",
108
+ "owner", "owner_name", "account_owner", "rep_email",
109
+ "arr", "annual_recurring_revenue",
110
+ "is_paying", "territory"]
111
+ compact = [{k: v for k, v in r.items() if k in keep and v is not None}
112
+ for r in drifted]
113
+ return {"success": True, "data": compact,
114
+ "message": f"Found {len(compact)} accounts"
115
+ + (f" (health={health})" if health else "")}
116
+
117
+ def _op_update_deal_stage(self, account_id: str, amount: float = 0, **kwargs) -> Dict:
118
+ """Note: requires manager approval if amount > threshold (checked by BusinessRuleEngine)."""
119
+ schema_error, schema_adapted = self._check_schema_drift(kwargs)
120
+ if schema_error:
121
+ hint = self._drift.translate_field("deal_stage", self.APP_NAME)
122
+ return {"success": False, "schema_error": schema_error,
123
+ "message": f"Schema error: use '{hint}' not '{schema_error}'"}
124
+
125
+ rec = self._records.get(account_id)
126
+ if not rec:
127
+ return {"success": False, "message": f"Account {account_id} not found"}
128
+
129
+ new_stage = (kwargs.get("deal_stage") or kwargs.get("pipeline_stage")
130
+ or kwargs.get("stage"))
131
+ if not new_stage:
132
+ return {"success": False,
133
+ "message": "Provide deal_stage / pipeline_stage / stage value"}
134
+
135
+ rec["deal_stage"] = new_stage
136
+ return {"success": True, "schema_adapted": schema_adapted,
137
+ "message": f"{account_id} deal stage → '{new_stage}'"}
138
+
139
+ def _op_flag_churn_risk(self, account_id: str, reason: Optional[str] = None) -> Dict:
140
+ rec = self._records.get(account_id)
141
+ if not rec:
142
+ return {"success": False, "message": f"Account {account_id} not found"}
143
+ rec["_churn_flagged"] = True
144
+ rec["health"] = "red"
145
+ return {
146
+ "success": True,
147
+ "message": f"Flagged {account_id} ({rec.get('company_name', '')}) as churn risk"
148
+ + (f": {reason}" if reason else ""),
149
+ }
150
+
151
+ def _op_assign_account_owner(self, account_id: str, **kwargs) -> Dict:
152
+ schema_error, schema_adapted = self._check_schema_drift(kwargs)
153
+ if schema_error:
154
+ hint = self._drift.translate_field("owner", self.APP_NAME)
155
+ return {"success": False, "schema_error": schema_error,
156
+ "message": f"Schema error: use '{hint}' not '{schema_error}'"}
157
+
158
+ rec = self._records.get(account_id)
159
+ if not rec:
160
+ return {"success": False, "message": f"Account {account_id} not found"}
161
+
162
+ new_owner = (kwargs.get("owner") or kwargs.get("owner_name")
163
+ or kwargs.get("account_owner") or kwargs.get("rep_email"))
164
+ if not new_owner:
165
+ return {"success": False,
166
+ "message": "Provide owner / owner_name / account_owner / rep_email"}
167
+
168
+ rec["owner"] = new_owner
169
+ rec["_team_assigned"] = True
170
+ if account_id == "ACME-003":
171
+ rec["_intervention_assigned"] = True
172
+
173
+ return {"success": True, "schema_adapted": schema_adapted,
174
+ "message": f"{account_id} owner → '{new_owner}'"}
175
+
176
+ def _op_log_interaction(self, account_id: str, note: str = "") -> Dict:
177
+ rec = self._records.get(account_id)
178
+ if not rec:
179
+ return {"success": False, "message": f"Account {account_id} not found"}
180
+ rec["_interaction_logged"] = True
181
+ rec.setdefault("interactions", []).append(note)
182
+ return {"success": True,
183
+ "message": f"Logged interaction for {account_id}"}
184
+
185
+ def _op_get_opportunity(self, account_id: str) -> Dict:
186
+ rec = self._records.get(account_id)
187
+ if not rec:
188
+ return {"success": False, "message": f"Account {account_id} not found"}
189
+ opp = {
190
+ "account_id": account_id,
191
+ "company_name": rec.get("company_name"),
192
+ "arr": rec.get("arr"),
193
+ "deal_stage": rec.get("deal_stage"),
194
+ "health": rec.get("health"),
195
+ "is_paying": rec.get("is_paying"),
196
+ }
197
+ return {"success": True, "data": self._to_agent_view(opp),
198
+ "message": f"Retrieved opportunity for {account_id}"}
server/apps/workday.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Workday-like app — HR and people operations."""
2
+
3
+ from typing import Dict, List, Optional
4
+ from server.apps.base_app import BaseApp
5
+ from server.schema_drift import SchemaDriftEngine
6
+
7
+
8
+ class WorkdayApp(BaseApp):
9
+ APP_NAME = "workday"
10
+
11
+ OPERATIONS = [
12
+ "get_employee", "list_employees", "provision_access",
13
+ "log_sla_event", "request_budget_approval",
14
+ "create_onboarding_task", "complete_task",
15
+ ]
16
+
17
+ def __init__(self, drift: SchemaDriftEngine):
18
+ super().__init__(drift)
19
+ self._records: Dict[str, Dict] = {}
20
+
21
+ # ------------------------------------------------------------------
22
+ # BaseApp interface
23
+ # ------------------------------------------------------------------
24
+
25
+ def initialize(self, records: List[Dict]) -> None:
26
+ self._records = {r["employee_id"]: r for r in records}
27
+
28
+ def execute(self, operation: str, args: Dict) -> Dict:
29
+ method = getattr(self, f"_op_{operation}", None)
30
+ if method is None:
31
+ return {
32
+ "success": False,
33
+ "message": f"Unknown operation '{operation}'. Available: {', '.join(self.OPERATIONS)}",
34
+ }
35
+ try:
36
+ return method(**args)
37
+ except TypeError as exc:
38
+ return {"success": False, "message": f"Bad args for '{operation}': {exc}"}
39
+
40
+ def get_state_view(self, max_rows: int = 5) -> str:
41
+ pending = [r for r in self._records.values()
42
+ if r.get("status") == "pending"][:max_rows]
43
+ sample = pending or list(self._records.values())[:max_rows]
44
+ if not sample:
45
+ return "No employee records loaded."
46
+ lines = []
47
+ for rec in sample:
48
+ view = self._to_agent_view(rec)
49
+ keep = ["employee_id", "name",
50
+ "level", "job_level", "seniority",
51
+ "manager_id", "reports_to", "direct_manager",
52
+ "status", "request_status", "approval_state",
53
+ "department", "territory", "email"]
54
+ compact = {k: v for k, v in view.items() if k in keep and v is not None}
55
+ lines.append(str(compact))
56
+ return "\n".join(lines)
57
+
58
+ def count_open_items(self) -> int:
59
+ return sum(1 for r in self._records.values()
60
+ if r.get("status") == "pending")
61
+
62
+ # ------------------------------------------------------------------
63
+ # Workflow completion state checks
64
+ # ------------------------------------------------------------------
65
+
66
+ def sla_logged(self) -> bool:
67
+ """True once log_sla_event was called (Workflow A step A5)."""
68
+ return any(r.get("_sla_logged") for r in self._records.values())
69
+
70
+ def employee_created(self) -> bool:
71
+ """True once create_onboarding_task was called for EMP-NEW-001 (Workflow B step B1)."""
72
+ return bool(self._records.get("EMP-NEW-001", {}).get("_onboarding_created"))
73
+
74
+ def access_provisioned(self, app_name: str) -> bool:
75
+ """True once provision_access was called for the given app (Workflow B step B2)."""
76
+ return any(
77
+ r.get("_access_provisioned", {}).get(app_name)
78
+ for r in self._records.values()
79
+ )
80
+
81
+ # ------------------------------------------------------------------
82
+ # Operations
83
+ # ------------------------------------------------------------------
84
+
85
+ def _op_get_employee(self, employee_id: str) -> Dict:
86
+ rec = self._records.get(employee_id)
87
+ if not rec:
88
+ return {"success": False,
89
+ "message": f"Employee {employee_id} not found. Use list_employees to browse."}
90
+ return {"success": True, "data": self._to_agent_view(rec),
91
+ "message": f"Retrieved employee {employee_id} ({rec.get('name', '')})"}
92
+
93
+ def _op_list_employees(self, department: Optional[str] = None,
94
+ status: Optional[str] = None,
95
+ limit: int = 10) -> Dict:
96
+ matching = [
97
+ r for r in self._records.values()
98
+ if (department is None or r.get("department") == department)
99
+ and (status is None or r.get("status") == status)
100
+ ][:limit]
101
+ drifted = [self._to_agent_view(r) for r in matching]
102
+ keep = ["employee_id", "name",
103
+ "level", "job_level", "seniority",
104
+ "manager_id", "reports_to", "direct_manager",
105
+ "status", "request_status", "approval_state",
106
+ "department", "territory"]
107
+ compact = [{k: v for k, v in r.items() if k in keep and v is not None}
108
+ for r in drifted]
109
+ return {"success": True, "data": compact,
110
+ "message": f"Found {len(compact)} employees"
111
+ + (f" in {department}" if department else "")}
112
+
113
+ def _op_provision_access(self, employee_id: str, app_name: str,
114
+ **kwargs) -> Dict:
115
+ """Grant app access to an employee (Workflow B step B2)."""
116
+ schema_error, schema_adapted = self._check_schema_drift(kwargs)
117
+ if schema_error:
118
+ return {"success": False, "schema_error": schema_error,
119
+ "message": f"Schema error: use current field name, not '{schema_error}'"}
120
+
121
+ rec = self._records.get(employee_id)
122
+ if not rec:
123
+ return {"success": False, "message": f"Employee {employee_id} not found"}
124
+
125
+ rec.setdefault("_access_provisioned", {})[app_name] = True
126
+ return {"success": True, "schema_adapted": schema_adapted,
127
+ "message": f"Provisioned {app_name} access for {employee_id} ({rec.get('name', '')})"}
128
+
129
+ def _op_log_sla_event(self, ticket_id: str, sla_met: bool = True,
130
+ elapsed_minutes: Optional[float] = None) -> Dict:
131
+ """Log an SLA compliance event (Workflow A step A5)."""
132
+ # Find an employee record to attach the log to
133
+ first = next(iter(self._records.values()), None)
134
+ if first is None:
135
+ return {"success": False, "message": "No Workday records loaded"}
136
+
137
+ first["_sla_logged"] = True
138
+ status = "MET" if sla_met else "BREACHED"
139
+ detail = (f" ({elapsed_minutes:.1f} min elapsed)" if elapsed_minutes else "")
140
+ return {
141
+ "success": True,
142
+ "message": f"SLA event logged for {ticket_id}: {status}{detail}",
143
+ }
144
+
145
+ def _op_request_budget_approval(self, employee_id: str,
146
+ amount: float = 0, reason: str = "") -> Dict:
147
+ """Request budget approval (triggers RBAC / approval threshold check upstream)."""
148
+ rec = self._records.get(employee_id)
149
+ if not rec:
150
+ return {"success": False, "message": f"Employee {employee_id} not found"}
151
+ return {
152
+ "success": True,
153
+ "message": f"Budget approval request submitted for {employee_id}: ${amount:,.0f}",
154
+ }
155
+
156
+ def _op_create_onboarding_task(self, employee_id: str, **kwargs) -> Dict:
157
+ """Create onboarding record for a new employee (Workflow B step B1)."""
158
+ schema_error, schema_adapted = self._check_schema_drift(kwargs)
159
+ if schema_error:
160
+ return {"success": False, "schema_error": schema_error,
161
+ "message": f"Schema error: use current field name, not '{schema_error}'"}
162
+
163
+ rec = self._records.get(employee_id)
164
+ if not rec:
165
+ # Auto-create a stub record if it doesn't exist yet
166
+ rec = {
167
+ "employee_id": employee_id,
168
+ "name": kwargs.get("name", "New Employee"),
169
+ "level": kwargs.get("level") or kwargs.get("job_level") or kwargs.get("seniority", "IC1"),
170
+ "manager_id": kwargs.get("manager_id") or kwargs.get("reports_to") or kwargs.get("direct_manager"),
171
+ "status": "pending",
172
+ "department": kwargs.get("department", "support"),
173
+ "territory": kwargs.get("territory", "west"),
174
+ "email": kwargs.get("email", f"{employee_id.lower()}@company.com"),
175
+ "_access_provisioned": {},
176
+ "_sla_logged": False,
177
+ "_onboarding_created": True,
178
+ }
179
+ self._records[employee_id] = rec
180
+ else:
181
+ rec["_onboarding_created"] = True
182
+
183
+ rec.setdefault("_onboarding_tasks", []).append("onboarding_checklist")
184
+ return {"success": True, "schema_adapted": schema_adapted,
185
+ "message": f"Onboarding task created for {employee_id} ({rec.get('name', '')})"}
186
+
187
+ def _op_complete_task(self, employee_id: str, task: str) -> Dict:
188
+ rec = self._records.get(employee_id)
189
+ if not rec:
190
+ return {"success": False, "message": f"Employee {employee_id} not found"}
191
+ tasks = rec.get("_onboarding_tasks", [])
192
+ if task in tasks:
193
+ tasks.remove(task)
194
+ return {"success": True,
195
+ "message": f"Completed task '{task}' for {employee_id}"}
server/apps/zendesk.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Zendesk-like app — customer support ticket management."""
2
+
3
+ from typing import Dict, List, Optional
4
+ from server.apps.base_app import BaseApp
5
+ from server.schema_drift import SchemaDriftEngine
6
+
7
+
8
+ class ZendeskApp(BaseApp):
9
+ APP_NAME = "zendesk"
10
+
11
+ OPERATIONS = [
12
+ "get_ticket", "acknowledge_ticket", "set_urgency", "assign_agent",
13
+ "escalate_to_jira", "resolve_ticket", "add_note", "list_tickets",
14
+ ]
15
+
16
+ def __init__(self, drift: SchemaDriftEngine):
17
+ super().__init__(drift)
18
+ self._records: Dict[str, Dict] = {}
19
+
20
+ # ------------------------------------------------------------------
21
+ # BaseApp interface
22
+ # ------------------------------------------------------------------
23
+
24
+ def initialize(self, records: List[Dict]) -> None:
25
+ self._records = {r["ticket_number"]: r for r in records}
26
+
27
+ def execute(self, operation: str, args: Dict) -> Dict:
28
+ method = getattr(self, f"_op_{operation}", None)
29
+ if method is None:
30
+ return {
31
+ "success": False,
32
+ "message": f"Unknown operation '{operation}'. Available: {', '.join(self.OPERATIONS)}",
33
+ }
34
+ try:
35
+ return method(**args)
36
+ except TypeError as exc:
37
+ return {"success": False, "message": f"Bad args for '{operation}': {exc}"}
38
+
39
+ def get_state_view(self, max_rows: int = 5) -> str:
40
+ open_tickets = [r for r in self._records.values()
41
+ if r.get("state") not in ("resolved", "closed")][:max_rows]
42
+ if not open_tickets:
43
+ return "No open tickets."
44
+ lines = []
45
+ for rec in open_tickets:
46
+ view = self._to_agent_view(rec)
47
+ keep = ["ticket_number", "title",
48
+ "urgency", "priority", "impact_level",
49
+ "agent_email", "handler", "assigned_agent",
50
+ "state", "ticket_state", "resolution_status",
51
+ "customer_id"]
52
+ compact = {k: v for k, v in view.items() if k in keep and v is not None}
53
+ lines.append(str(compact))
54
+ return "\n".join(lines)
55
+
56
+ def count_open_items(self) -> int:
57
+ return sum(1 for r in self._records.values()
58
+ if r.get("state") not in ("resolved", "closed"))
59
+
60
+ # ------------------------------------------------------------------
61
+ # Workflow completion state checks
62
+ # ------------------------------------------------------------------
63
+
64
+ def ticket_acknowledged(self) -> bool:
65
+ """True once ZD-001 has been acknowledged (Workflow A step A1)."""
66
+ return bool(self._records.get("ZD-001", {}).get("_acknowledged"))
67
+
68
+ def support_queried(self, account_id: str) -> bool:
69
+ """True once tickets for account_id were listed (Workflow C step C2)."""
70
+ return account_id in self._records.get("ZD-001", {}).get("_queried_accounts", []) or \
71
+ any(account_id in r.get("_queried_accounts", []) for r in self._records.values())
72
+
73
+ def profile_created(self) -> bool:
74
+ """True once a new agent profile was created (Workflow B step B4)."""
75
+ return any(r.get("_profile_created") for r in self._records.values())
76
+
77
+ # ------------------------------------------------------------------
78
+ # Operations
79
+ # ------------------------------------------------------------------
80
+
81
+ def _op_get_ticket(self, ticket_number: str, customer_id: Optional[str] = None) -> Dict:
82
+ # If customer_id provided, look up all tickets for that customer
83
+ if customer_id:
84
+ matching = [r for r in self._records.values()
85
+ if r.get("customer_id") == customer_id]
86
+ # Mark as queried for Workflow C
87
+ for r in matching:
88
+ r.setdefault("_queried_accounts", [])
89
+ if customer_id not in r["_queried_accounts"]:
90
+ r["_queried_accounts"].append(customer_id)
91
+ if not matching:
92
+ return {"success": True, "data": [],
93
+ "message": f"No tickets found for customer {customer_id}"}
94
+ return {
95
+ "success": True,
96
+ "data": [self._to_agent_view(r) for r in matching[:5]],
97
+ "message": f"Found {len(matching)} tickets for {customer_id}",
98
+ }
99
+
100
+ rec = self._records.get(ticket_number)
101
+ if not rec:
102
+ return {"success": False,
103
+ "message": f"Ticket {ticket_number} not found. Use list_tickets to browse."}
104
+ rec.setdefault("_queried_accounts", [])
105
+ cid = rec.get("customer_id")
106
+ if cid and cid not in rec["_queried_accounts"]:
107
+ rec["_queried_accounts"].append(cid)
108
+
109
+ return {"success": True, "data": self._to_agent_view(rec),
110
+ "ticket": rec,
111
+ "message": f"Retrieved {ticket_number}"}
112
+
113
+ def _op_acknowledge_ticket(self, ticket_number: str) -> Dict:
114
+ rec = self._records.get(ticket_number)
115
+ if not rec:
116
+ return {"success": False, "message": f"Ticket {ticket_number} not found"}
117
+ rec["_acknowledged"] = True
118
+ if rec.get("state") == "new":
119
+ rec["state"] = "open"
120
+ return {"success": True, "ticket": rec,
121
+ "message": f"Acknowledged {ticket_number} — status → open"}
122
+
123
+ def _op_set_urgency(self, ticket_number: str, **kwargs) -> Dict:
124
+ schema_error, schema_adapted = self._check_schema_drift(kwargs)
125
+ if schema_error:
126
+ hint = self._drift.translate_field("urgency", self.APP_NAME)
127
+ return {"success": False, "schema_error": schema_error,
128
+ "message": f"Schema error: use '{hint}' not '{schema_error}'"}
129
+
130
+ rec = self._records.get(ticket_number)
131
+ if not rec:
132
+ return {"success": False, "message": f"Ticket {ticket_number} not found"}
133
+
134
+ new_urgency = (kwargs.get("urgency") or kwargs.get("priority")
135
+ or kwargs.get("impact_level"))
136
+ if not new_urgency:
137
+ return {"success": False,
138
+ "message": "Provide urgency / priority / impact_level value"}
139
+
140
+ rec["urgency"] = new_urgency
141
+ return {"success": True, "schema_adapted": schema_adapted,
142
+ "message": f"{ticket_number} urgency → '{new_urgency}'"}
143
+
144
+ def _op_assign_agent(self, ticket_number: str, **kwargs) -> Dict:
145
+ schema_error, schema_adapted = self._check_schema_drift(kwargs)
146
+ if schema_error:
147
+ hint = self._drift.translate_field("agent_email", self.APP_NAME)
148
+ return {"success": False, "schema_error": schema_error,
149
+ "message": f"Schema error: use '{hint}' not '{schema_error}'"}
150
+
151
+ rec = self._records.get(ticket_number)
152
+ # For Workflow B profile creation: allow creating a new agent entry
153
+ if not rec:
154
+ # Create a minimal profile record for the new agent
155
+ email = (kwargs.get("agent_email") or kwargs.get("handler")
156
+ or kwargs.get("assigned_agent"))
157
+ if not email:
158
+ return {"success": False, "message": f"Ticket {ticket_number} not found"}
159
+ # Create a synthetic profile ticket
160
+ profile_rec = {
161
+ "ticket_number": ticket_number,
162
+ "title": "Agent profile",
163
+ "urgency": "p3",
164
+ "agent_email": email,
165
+ "state": "closed",
166
+ "customer_id": None,
167
+ "_acknowledged": False,
168
+ "_queried_accounts": [],
169
+ "_profile_created": True,
170
+ }
171
+ self._records[ticket_number] = profile_rec
172
+ return {"success": True, "schema_adapted": schema_adapted,
173
+ "message": f"Created Zendesk profile for agent '{email}'"}
174
+
175
+ email = (kwargs.get("agent_email") or kwargs.get("handler")
176
+ or kwargs.get("assigned_agent"))
177
+ if not email:
178
+ return {"success": False,
179
+ "message": "Provide agent_email / handler / assigned_agent value"}
180
+
181
+ rec["agent_email"] = email
182
+ rec["_profile_created"] = True
183
+ return {"success": True, "schema_adapted": schema_adapted,
184
+ "message": f"{ticket_number} assigned to '{email}'"}
185
+
186
+ def _op_escalate_to_jira(self, ticket_number: str,
187
+ jira_issue_id: Optional[str] = None) -> Dict:
188
+ rec = self._records.get(ticket_number)
189
+ if not rec:
190
+ return {"success": False, "message": f"Ticket {ticket_number} not found"}
191
+ rec["state"] = "pending"
192
+ rec["escalated_to_jira"] = jira_issue_id or "pending"
193
+ return {"success": True,
194
+ "message": f"{ticket_number} escalated to Jira"
195
+ + (f" ({jira_issue_id})" if jira_issue_id else "")}
196
+
197
+ def _op_resolve_ticket(self, ticket_number: str) -> Dict:
198
+ rec = self._records.get(ticket_number)
199
+ if not rec:
200
+ return {"success": False, "message": f"Ticket {ticket_number} not found"}
201
+ rec["state"] = "resolved"
202
+ return {"success": True, "message": f"{ticket_number} resolved"}
203
+
204
+ def _op_add_note(self, ticket_number: str, note: str) -> Dict:
205
+ rec = self._records.get(ticket_number)
206
+ if not rec:
207
+ return {"success": False, "message": f"Ticket {ticket_number} not found"}
208
+ rec.setdefault("notes", []).append(note)
209
+ return {"success": True, "message": f"Note added to {ticket_number}"}
210
+
211
+ def _op_list_tickets(self, state: str = "open", customer_id: Optional[str] = None,
212
+ limit: int = 10) -> Dict:
213
+ matching = [
214
+ r for r in self._records.values()
215
+ if (state == "all" or r.get("state") == state)
216
+ and (customer_id is None or r.get("customer_id") == customer_id)
217
+ ][:limit]
218
+ # Mark accounts as queried
219
+ if customer_id:
220
+ for r in matching:
221
+ r.setdefault("_queried_accounts", [])
222
+ if customer_id not in r["_queried_accounts"]:
223
+ r["_queried_accounts"].append(customer_id)
224
+
225
+ drifted = [self._to_agent_view(r) for r in matching]
226
+ keep = ["ticket_number", "title",
227
+ "urgency", "priority", "impact_level",
228
+ "agent_email", "handler", "assigned_agent",
229
+ "state", "ticket_state", "resolution_status",
230
+ "customer_id"]
231
+ compact = [{k: v for k, v in r.items() if k in keep and v is not None}
232
+ for r in drifted]
233
+ return {
234
+ "success": True,
235
+ "data": compact,
236
+ "message": f"Found {len(compact)} {state} tickets"
237
+ + (f" for {customer_id}" if customer_id else ""),
238
+ }
server/business_rules.py CHANGED
@@ -1,62 +1,129 @@
1
- DEFAULT_RULES = {
2
- "sla_p0_minutes": 30, # P0 tickets: acknowledge within 30 min
3
- "sla_p1_hours": 4, # P1 tickets: first response within 4h
4
- "approval_threshold": 10_000, # $ above which manager approval needed
5
- "max_tickets_per_agent": 10, # RBAC: agent capacity cap
6
- "gdpr_max_days": 30, # compliance: GDPR ticket resolution
 
 
 
 
 
 
 
7
  "rbac": {
8
- "support": {"salesforce": ["read"], "jira": ["read", "create_issue"]},
9
- "engineer": {"jira": ["*"], "zendesk": ["read"]},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  "manager": {"*": ["*"]},
11
- }
12
  }
13
 
14
- POLICY_DRIFT_EVENTS = {
15
- "sla_tighten": {"sla_p0_minutes": 15, "sla_p1_hours": 2},
16
- "approval_tighten": {"approval_threshold": 5_000},
17
- "gdpr_expedite": {"gdpr_max_days": 7},
18
  }
19
 
 
20
  class BusinessRuleEngine:
21
  def __init__(self):
22
- self.rules = DEFAULT_RULES.copy()
 
23
  self._violation_log: List[str] = []
24
 
 
 
 
 
25
  def apply_policy_drift(self, event: str) -> None:
26
  """Called mid-episode or at episode start to change rules."""
27
  if event in POLICY_DRIFT_EVENTS:
28
  self.rules.update(POLICY_DRIFT_EVENTS[event])
29
 
 
 
 
 
30
  def check_action(self, action: OrgOSAction, context: Dict) -> Tuple[bool, str, float]:
31
- """Returns (allowed, reason, penalty)."""
32
- violations = []
33
 
34
- # RBAC check
 
 
 
35
  role = context.get("agent_role", "support")
36
  app_perms = self.rules["rbac"].get(role, {})
37
- allowed_ops = app_perms.get(action.app, app_perms.get("*", []))
38
- if "*" not in allowed_ops and action.operation not in allowed_ops:
39
- violations.append(f"RBAC: {role} cannot {action.operation} on {action.app}")
40
- return False, violations[0], -0.25
 
 
 
 
 
 
41
 
42
  # Approval threshold check
43
  if action.operation in ("request_budget_approval", "update_deal_stage"):
44
  amount = action.args.get("amount", 0)
45
  if amount > self.rules["approval_threshold"] and not context.get("manager_approved"):
46
- violations.append(f"Approval required: ${amount} > ${self.rules['approval_threshold']}")
47
- return False, violations[0], -0.10
 
 
 
 
48
 
49
- self._violation_log.extend(violations)
50
  return True, "", 0.0
51
 
 
 
 
 
52
  def check_sla(self, ticket: Dict, elapsed_minutes: float) -> Tuple[bool, float]:
53
  """Returns (sla_met, penalty)."""
54
  priority = ticket.get("priority", ticket.get("urgency", "p2"))
55
  if priority in ("p0", "critical") and elapsed_minutes > self.rules["sla_p0_minutes"]:
56
  return False, -0.15
 
 
57
  return True, 0.0
58
 
 
 
 
 
59
  def get_violations_this_step(self) -> List[str]:
 
60
  v = self._violation_log.copy()
61
  self._violation_log.clear()
62
- return v
 
 
 
 
 
 
 
 
 
 
1
+ """Business rule engine — RBAC, SLA checks, approval thresholds, policy drift."""
2
+
3
+ from typing import Dict, List, Tuple
4
+
5
+ from models import OrgOSAction
6
+
7
+
8
+ DEFAULT_RULES: Dict = {
9
+ "sla_p0_minutes": 30, # P0 tickets: acknowledge within 30 min
10
+ "sla_p1_hours": 4, # P1 tickets: first response within 4 h
11
+ "approval_threshold": 10_000, # $ above which manager approval is needed
12
+ "max_tickets_per_agent": 10, # RBAC: agent capacity cap
13
+ "gdpr_max_days": 30, # GDPR ticket resolution SLA
14
  "rbac": {
15
+ # Support engineers can complete Workflows A and C
16
+ "support": {
17
+ "zendesk": ["*"], # full ticket lifecycle
18
+ "jira": ["*"], # full issue lifecycle
19
+ "salesforce": [
20
+ "get_account", "list_accounts", "get_opportunity",
21
+ "log_interaction", "flag_churn_risk", "assign_account_owner",
22
+ ],
23
+ "workday": [
24
+ "get_employee", "list_employees", "log_sla_event",
25
+ ],
26
+ },
27
+ # Engineers — focused on Jira + limited Zendesk/Salesforce reads
28
+ "engineer": {
29
+ "jira": ["*"],
30
+ "zendesk": ["get_ticket", "list_tickets", "add_note", "resolve_ticket"],
31
+ "salesforce": ["get_account", "list_accounts"],
32
+ "workday": ["get_employee"],
33
+ },
34
+ # Managers — full access to all apps (Workflow B)
35
  "manager": {"*": ["*"]},
36
+ },
37
  }
38
 
39
+ POLICY_DRIFT_EVENTS: Dict = {
40
+ "sla_tighten": {"sla_p0_minutes": 15, "sla_p1_hours": 2},
41
+ "approval_tighten": {"approval_threshold": 5_000},
42
+ "gdpr_expedite": {"gdpr_max_days": 7},
43
  }
44
 
45
+
46
  class BusinessRuleEngine:
47
  def __init__(self):
48
+ import copy
49
+ self.rules = copy.deepcopy(DEFAULT_RULES)
50
  self._violation_log: List[str] = []
51
 
52
+ # ------------------------------------------------------------------
53
+ # Policy drift
54
+ # ------------------------------------------------------------------
55
+
56
  def apply_policy_drift(self, event: str) -> None:
57
  """Called mid-episode or at episode start to change rules."""
58
  if event in POLICY_DRIFT_EVENTS:
59
  self.rules.update(POLICY_DRIFT_EVENTS[event])
60
 
61
+ # ------------------------------------------------------------------
62
+ # Action validation
63
+ # ------------------------------------------------------------------
64
+
65
  def check_action(self, action: OrgOSAction, context: Dict) -> Tuple[bool, str, float]:
66
+ """
67
+ Returns (allowed, reason, penalty).
68
 
69
+ penalty values:
70
+ -0.25 RBAC violation
71
+ -0.10 approval threshold exceeded without manager approval
72
+ """
73
  role = context.get("agent_role", "support")
74
  app_perms = self.rules["rbac"].get(role, {})
75
+
76
+ # Wildcard role (manager) always allowed
77
+ if "*" in app_perms and "*" in app_perms.get("*", []):
78
+ pass # fall through to approval check
79
+ else:
80
+ allowed_ops = app_perms.get(action.app, app_perms.get("*", []))
81
+ if "*" not in allowed_ops and action.operation not in allowed_ops:
82
+ reason = f"RBAC: '{role}' cannot run '{action.operation}' on '{action.app}'"
83
+ self._violation_log.append(reason)
84
+ return False, reason, -0.25
85
 
86
  # Approval threshold check
87
  if action.operation in ("request_budget_approval", "update_deal_stage"):
88
  amount = action.args.get("amount", 0)
89
  if amount > self.rules["approval_threshold"] and not context.get("manager_approved"):
90
+ reason = (
91
+ f"Approval required: ${amount:,.0f} exceeds "
92
+ f"${self.rules['approval_threshold']:,.0f} threshold"
93
+ )
94
+ self._violation_log.append(reason)
95
+ return False, reason, -0.10
96
 
 
97
  return True, "", 0.0
98
 
99
+ # ------------------------------------------------------------------
100
+ # SLA checks
101
+ # ------------------------------------------------------------------
102
+
103
  def check_sla(self, ticket: Dict, elapsed_minutes: float) -> Tuple[bool, float]:
104
  """Returns (sla_met, penalty)."""
105
  priority = ticket.get("priority", ticket.get("urgency", "p2"))
106
  if priority in ("p0", "critical") and elapsed_minutes > self.rules["sla_p0_minutes"]:
107
  return False, -0.15
108
+ if priority in ("p1", "high") and elapsed_minutes > self.rules["sla_p1_hours"] * 60:
109
+ return False, -0.10
110
  return True, 0.0
111
 
112
+ # ------------------------------------------------------------------
113
+ # Violation log
114
+ # ------------------------------------------------------------------
115
+
116
  def get_violations_this_step(self) -> List[str]:
117
+ """Return and clear the per-step violation log."""
118
  v = self._violation_log.copy()
119
  self._violation_log.clear()
120
+ return v
121
+
122
+ def get_active_rules_summary(self) -> Dict:
123
+ """Return scalar rules for inclusion in observation."""
124
+ return {
125
+ "sla_p0_minutes": self.rules["sla_p0_minutes"],
126
+ "sla_p1_hours": self.rules["sla_p1_hours"],
127
+ "approval_threshold": self.rules["approval_threshold"],
128
+ "gdpr_max_days": self.rules["gdpr_max_days"],
129
+ }
server/data_generator.py CHANGED
@@ -6,9 +6,20 @@ All datasets are generated purely from numpy/random — no external downloads.
6
  import random
7
  import numpy as np
8
  import pandas as pd
 
9
 
10
  SEED = 42
11
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  # ---------------------------------------------------------------------------
14
  # Task 1 — Employee records with missing values
@@ -21,17 +32,12 @@ def generate_task1_datasets():
21
 
22
  n = 100
23
  departments = ["Engineering", "Marketing", "Sales", "HR", "Finance"]
24
- first_names = ["Alice", "Bob", "Carol", "David", "Eve", "Frank", "Grace",
25
- "Heidi", "Ivan", "Judy", "Karl", "Laura", "Mallory", "Niaj",
26
- "Oscar", "Peggy", "Quinn", "Romeo", "Sybil", "Trent"]
27
- last_names = ["Smith", "Jones", "Brown", "Taylor", "Wilson", "Davis",
28
- "Miller", "Anderson", "Thomas", "Jackson"]
29
-
30
- names = [f"{random.choice(first_names)} {random.choice(last_names)}" for _ in range(n)]
31
- ages = rng.integers(22, 60, size=n).astype(float)
32
- salaries = rng.integers(40_000, 120_000, size=n).astype(float)
33
- depts = rng.choice(departments, size=n)
34
- experience = rng.integers(0, 30, size=n).astype(float)
35
 
36
  clean_df = pd.DataFrame({
37
  "name": names,
@@ -42,8 +48,6 @@ def generate_task1_datasets():
42
  })
43
 
44
  dirty_df = clean_df.copy()
45
-
46
- # Inject ~20 % NaN into age, salary, department
47
  for col, frac in [("age", 0.20), ("salary", 0.20), ("department", 0.10)]:
48
  idx = rng.choice(n, size=int(n * frac), replace=False)
49
  dirty_df.loc[idx, col] = np.nan
@@ -59,11 +63,11 @@ def _scramble_phone(phone: str, rng) -> str:
59
  digits = phone.replace("-", "")
60
  fmt = rng.integers(0, 3)
61
  if fmt == 0:
62
- return digits # 5551234567
63
  elif fmt == 1:
64
- return f"({digits[:3]}){digits[3:]}" # (555)1234567
65
  else:
66
- return phone # 555-123-4567 (canonical)
67
 
68
 
69
  def _scramble_date(date_str: str, rng) -> str:
@@ -85,16 +89,16 @@ def generate_task2_datasets():
85
  n = 200
86
  categories = ["Electronics", "Clothing", "Food", "Books", "Toys"]
87
 
88
- product_ids = [f"P{str(i).zfill(4)}" for i in range(1, n + 1)]
89
- product_names = [f"Product_{i}" for i in range(1, n + 1)]
90
- prices = np.round(rng.uniform(5.0, 500.0, size=n), 2)
91
  categories_col = rng.choice(categories, size=n)
92
- phones = [
93
  f"{rng.integers(100,999)}-{rng.integers(100,999)}-{rng.integers(1000,9999)}"
94
  for _ in range(n)
95
  ]
96
- days_offset = rng.integers(0, 1000, size=n)
97
- dates = [
98
  (pd.Timestamp("2020-01-01") + pd.Timedelta(days=int(d))).strftime("%Y-%m-%d")
99
  for d in days_offset
100
  ]
@@ -110,19 +114,16 @@ def generate_task2_datasets():
110
 
111
  dirty_df = clean_df.copy()
112
 
113
- # Scramble ~60 % of phone formats
114
  phone_idx = rng.choice(n, size=int(n * 0.6), replace=False)
115
  dirty_df.loc[phone_idx, "phone"] = [
116
  _scramble_phone(dirty_df.loc[i, "phone"], rng) for i in phone_idx
117
  ]
118
 
119
- # Scramble ~60 % of date formats
120
  date_idx = rng.choice(n, size=int(n * 0.6), replace=False)
121
  dirty_df.loc[date_idx, "listed_date"] = [
122
  _scramble_date(dirty_df.loc[i, "listed_date"], rng) for i in date_idx
123
  ]
124
 
125
- # Add 15 duplicate rows
126
  dup_idx = rng.choice(n, size=15, replace=False)
127
  dup_rows = dirty_df.iloc[dup_idx].copy()
128
  dirty_df = pd.concat([dirty_df, dup_rows], ignore_index=True)
@@ -140,18 +141,15 @@ def generate_task3_datasets():
140
  random.seed(SEED)
141
 
142
  n = 300
143
- countries = ["USA", "UK", "Canada", "Australia", "Germany"]
144
- first_names = ["Alice", "Bob", "Carol", "David", "Eve", "Frank", "Grace",
145
- "Heidi", "Ivan", "Judy"]
146
- last_names = ["Smith", "Jones", "Brown", "Taylor", "Wilson"]
147
-
148
- names = [f"{random.choice(first_names)} {random.choice(last_names)}" for _ in range(n)]
149
- ages = rng.integers(18, 75, size=n).astype(float)
150
- purchase_amounts = np.round(rng.uniform(10.0, 500.0, size=n), 2)
151
- countries_col = rng.choice(countries, size=n)
152
- emails = [f"user{i}@example.com" for i in range(1, n + 1)]
153
- days_offset = rng.integers(0, 730, size=n)
154
- signup_dates = [
155
  (pd.Timestamp("2022-01-01") + pd.Timedelta(days=int(d))).strftime("%Y-%m-%d")
156
  for d in days_offset
157
  ]
@@ -167,29 +165,24 @@ def generate_task3_datasets():
167
 
168
  dirty_df = clean_df.copy()
169
 
170
- # Missing values (~15 % in age, purchase_amount, country, signup_date)
171
  for col, frac in [("age", 0.15), ("purchase_amount", 0.15),
172
  ("country", 0.10), ("signup_date", 0.10)]:
173
  idx = rng.choice(n, size=int(n * frac), replace=False)
174
  dirty_df.loc[idx, col] = np.nan
175
 
176
- # Outliers in purchase_amount (~3 %)
177
  out_idx = rng.choice(n, size=int(n * 0.03), replace=False)
178
  dirty_df.loc[out_idx, "purchase_amount"] = (
179
  dirty_df.loc[out_idx, "purchase_amount"] * 10
180
  )
181
 
182
- # Mixed case in country (~40 %)
183
  case_idx = rng.choice(n, size=int(n * 0.40), replace=False)
184
  dirty_df.loc[case_idx, "country"] = dirty_df.loc[case_idx, "country"].str.lower()
185
 
186
- # Mixed date formats (~50 %) — only scramble non-null entries
187
  date_idx = rng.choice(n, size=int(n * 0.50), replace=False)
188
  valid_date_idx = [i for i in date_idx if pd.notna(dirty_df.loc[i, "signup_date"])]
189
  for i in valid_date_idx:
190
  dirty_df.loc[i, "signup_date"] = _scramble_date(dirty_df.loc[i, "signup_date"], rng)
191
 
192
- # 20 duplicate rows
193
  dup_idx = rng.choice(n, size=20, replace=False)
194
  dup_rows = dirty_df.iloc[dup_idx].copy()
195
  dirty_df = pd.concat([dirty_df, dup_rows], ignore_index=True)
@@ -197,18 +190,201 @@ def generate_task3_datasets():
197
  return dirty_df.reset_index(drop=True), clean_df.reset_index(drop=True)
198
 
199
 
200
- def generate_jira_records(n=50, seed=42) -> List[Dict]:
201
- """50 engineering tickets with priority, assignee, status, linked_ticket."""
 
202
 
203
- def generate_zendesk_records(n=40, seed=42) -> List[Dict]:
204
- """40 support tickets with urgency, agent_email, state, customer_id."""
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
- def generate_salesforce_records(n=30, seed=42) -> List[Dict]:
207
- """30 accounts with deal_stage, health, owner_name, arr."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
- def generate_workday_records(n=20, seed=42) -> List[Dict]:
210
- """20 employee/HR records with level, manager_id, resolution."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
- def generate_episode_data(workflow_id: str, seed: int = 42) -> Dict[str, List[Dict]]:
213
- """Generate correlated data for a full episode across all 4 apps.
214
- Ensures tickets in Zendesk reference customers in Salesforce, etc."""
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import random
7
  import numpy as np
8
  import pandas as pd
9
+ from typing import Dict, List
10
 
11
  SEED = 42
12
 
13
+ # ---------------------------------------------------------------------------
14
+ # Shared name pools (cross-referenced across apps)
15
+ # ---------------------------------------------------------------------------
16
+
17
+ FIRST_NAMES = ["Alice", "Bob", "Carol", "David", "Eve", "Frank", "Grace",
18
+ "Heidi", "Ivan", "Judy", "Karl", "Laura", "Mallory", "Niaj",
19
+ "Oscar", "Peggy", "Quinn", "Romeo", "Sybil", "Trent"]
20
+ LAST_NAMES = ["Smith", "Jones", "Brown", "Taylor", "Wilson", "Davis",
21
+ "Miller", "Anderson", "Thomas", "Jackson"]
22
+
23
 
24
  # ---------------------------------------------------------------------------
25
  # Task 1 — Employee records with missing values
 
32
 
33
  n = 100
34
  departments = ["Engineering", "Marketing", "Sales", "HR", "Finance"]
35
+
36
+ names = [f"{random.choice(FIRST_NAMES)} {random.choice(LAST_NAMES)}" for _ in range(n)]
37
+ ages = rng.integers(22, 60, size=n).astype(float)
38
+ salaries = rng.integers(40_000, 120_000, size=n).astype(float)
39
+ depts = rng.choice(departments, size=n)
40
+ experience = rng.integers(0, 30, size=n).astype(float)
 
 
 
 
 
41
 
42
  clean_df = pd.DataFrame({
43
  "name": names,
 
48
  })
49
 
50
  dirty_df = clean_df.copy()
 
 
51
  for col, frac in [("age", 0.20), ("salary", 0.20), ("department", 0.10)]:
52
  idx = rng.choice(n, size=int(n * frac), replace=False)
53
  dirty_df.loc[idx, col] = np.nan
 
63
  digits = phone.replace("-", "")
64
  fmt = rng.integers(0, 3)
65
  if fmt == 0:
66
+ return digits
67
  elif fmt == 1:
68
+ return f"({digits[:3]}){digits[3:]}"
69
  else:
70
+ return phone
71
 
72
 
73
  def _scramble_date(date_str: str, rng) -> str:
 
89
  n = 200
90
  categories = ["Electronics", "Clothing", "Food", "Books", "Toys"]
91
 
92
+ product_ids = [f"P{str(i).zfill(4)}" for i in range(1, n + 1)]
93
+ product_names = [f"Product_{i}" for i in range(1, n + 1)]
94
+ prices = np.round(rng.uniform(5.0, 500.0, size=n), 2)
95
  categories_col = rng.choice(categories, size=n)
96
+ phones = [
97
  f"{rng.integers(100,999)}-{rng.integers(100,999)}-{rng.integers(1000,9999)}"
98
  for _ in range(n)
99
  ]
100
+ days_offset = rng.integers(0, 1000, size=n)
101
+ dates = [
102
  (pd.Timestamp("2020-01-01") + pd.Timedelta(days=int(d))).strftime("%Y-%m-%d")
103
  for d in days_offset
104
  ]
 
114
 
115
  dirty_df = clean_df.copy()
116
 
 
117
  phone_idx = rng.choice(n, size=int(n * 0.6), replace=False)
118
  dirty_df.loc[phone_idx, "phone"] = [
119
  _scramble_phone(dirty_df.loc[i, "phone"], rng) for i in phone_idx
120
  ]
121
 
 
122
  date_idx = rng.choice(n, size=int(n * 0.6), replace=False)
123
  dirty_df.loc[date_idx, "listed_date"] = [
124
  _scramble_date(dirty_df.loc[i, "listed_date"], rng) for i in date_idx
125
  ]
126
 
 
127
  dup_idx = rng.choice(n, size=15, replace=False)
128
  dup_rows = dirty_df.iloc[dup_idx].copy()
129
  dirty_df = pd.concat([dirty_df, dup_rows], ignore_index=True)
 
141
  random.seed(SEED)
142
 
143
  n = 300
144
+ countries = ["USA", "UK", "Canada", "Australia", "Germany"]
145
+
146
+ names = [f"{random.choice(FIRST_NAMES)} {random.choice(LAST_NAMES)}" for _ in range(n)]
147
+ ages = rng.integers(18, 75, size=n).astype(float)
148
+ purchase_amounts = np.round(rng.uniform(10.0, 500.0, size=n), 2)
149
+ countries_col = rng.choice(countries, size=n)
150
+ emails = [f"user{i}@example.com" for i in range(1, n + 1)]
151
+ days_offset = rng.integers(0, 730, size=n)
152
+ signup_dates = [
 
 
 
153
  (pd.Timestamp("2022-01-01") + pd.Timedelta(days=int(d))).strftime("%Y-%m-%d")
154
  for d in days_offset
155
  ]
 
165
 
166
  dirty_df = clean_df.copy()
167
 
 
168
  for col, frac in [("age", 0.15), ("purchase_amount", 0.15),
169
  ("country", 0.10), ("signup_date", 0.10)]:
170
  idx = rng.choice(n, size=int(n * frac), replace=False)
171
  dirty_df.loc[idx, col] = np.nan
172
 
 
173
  out_idx = rng.choice(n, size=int(n * 0.03), replace=False)
174
  dirty_df.loc[out_idx, "purchase_amount"] = (
175
  dirty_df.loc[out_idx, "purchase_amount"] * 10
176
  )
177
 
 
178
  case_idx = rng.choice(n, size=int(n * 0.40), replace=False)
179
  dirty_df.loc[case_idx, "country"] = dirty_df.loc[case_idx, "country"].str.lower()
180
 
 
181
  date_idx = rng.choice(n, size=int(n * 0.50), replace=False)
182
  valid_date_idx = [i for i in date_idx if pd.notna(dirty_df.loc[i, "signup_date"])]
183
  for i in valid_date_idx:
184
  dirty_df.loc[i, "signup_date"] = _scramble_date(dirty_df.loc[i, "signup_date"], rng)
185
 
 
186
  dup_idx = rng.choice(n, size=20, replace=False)
187
  dup_rows = dirty_df.iloc[dup_idx].copy()
188
  dirty_df = pd.concat([dirty_df, dup_rows], ignore_index=True)
 
190
  return dirty_df.reset_index(drop=True), clean_df.reset_index(drop=True)
191
 
192
 
193
+ # ---------------------------------------------------------------------------
194
+ # OrgOS App Data Generators
195
+ # ---------------------------------------------------------------------------
196
 
197
+ def generate_jira_records(n: int = 50, seed: int = SEED) -> List[Dict]:
198
+ """Generate synthetic Jira-like engineering tickets (canonical field names)."""
199
+ random.seed(seed)
200
+ priorities = ["p0", "p1", "p2", "p3"]
201
+ statuses = ["open", "in_progress", "in_review", "closed"]
202
+ employees = [f"EMP-{i:03d}" for i in range(1, 21)]
203
+ accounts = [f"ACME-{i:03d}" for i in range(1, 31)]
204
+ titles = [
205
+ "Login fails intermittently", "API timeout on checkout",
206
+ "Dashboard charts not rendering", "Email notifications delayed",
207
+ "Password reset broken", "Search returns no results",
208
+ "Import fails for large files", "Session expires too quickly",
209
+ "Reports missing data", "Webhook delivery failures",
210
+ ]
211
 
212
+ records = []
213
+ for i in range(1, n + 1):
214
+ records.append({
215
+ "issue_id": f"JIRA-{i:03d}",
216
+ "title": f"{random.choice(titles)} #{i}",
217
+ "priority": random.choices(priorities, weights=[5, 15, 50, 30])[0],
218
+ "assignee": random.choice(employees) if random.random() > 0.3 else None,
219
+ "status": random.choices(statuses, weights=[30, 40, 15, 15])[0],
220
+ "reporter": random.choice(employees),
221
+ "customer_id": random.choice(accounts),
222
+ "linked_zendesk": None,
223
+ "labels": random.sample(["bug", "urgent", "customer-reported"], k=random.randint(0, 2)),
224
+ "created_at": "2026-04-20T09:00:00",
225
+ })
226
+
227
+ # Workflow A primary issue: JIRA-001 is unassigned, linked to ACME-001
228
+ records[0].update({
229
+ "title": "Customer login fails intermittently",
230
+ "priority": "p1",
231
+ "status": "open",
232
+ "customer_id": "ACME-001",
233
+ "assignee": None,
234
+ "linked_zendesk": None,
235
+ })
236
 
237
+ return records
238
+
239
+
240
+ def generate_zendesk_records(n: int = 40, seed: int = SEED) -> List[Dict]:
241
+ """Generate synthetic Zendesk-like support tickets (canonical field names)."""
242
+ random.seed(seed)
243
+ urgencies = ["p0", "p1", "p2", "p3"]
244
+ states = ["new", "open", "pending", "resolved", "closed"]
245
+ accounts = [f"ACME-{i:03d}" for i in range(1, 31)]
246
+ agents = [f"agent{i}@company.com" for i in range(1, 6)]
247
+
248
+ records = []
249
+ for i in range(1, n + 1):
250
+ records.append({
251
+ "ticket_number": f"ZD-{i:03d}",
252
+ "title": f"Support request #{i}",
253
+ "urgency": random.choices(urgencies, weights=[3, 12, 55, 30])[0],
254
+ "agent_email": random.choice(agents) if random.random() > 0.4 else None,
255
+ "state": random.choices(states, weights=[20, 35, 20, 15, 10])[0],
256
+ "customer_id": random.choice(accounts),
257
+ "channel": random.choice(["email", "chat", "phone", "web"]),
258
+ "created_at": "2026-04-20T08:00:00",
259
+ # Internal state tracking — stripped before agent sees record
260
+ "_acknowledged": False,
261
+ "_queried_accounts": [],
262
+ "_profile_created": False,
263
+ })
264
+
265
+ # Workflow A primary: ZD-001 is unacknowledged, from ACME-001
266
+ records[0].update({
267
+ "title": "Login issue — cannot access my account",
268
+ "urgency": "p1",
269
+ "state": "new",
270
+ "customer_id": "ACME-001",
271
+ "_acknowledged": False,
272
+ })
273
+
274
+ # Workflow C: several tickets from ACME-003
275
+ for i in [4, 11, 17]:
276
+ if i < len(records):
277
+ records[i]["customer_id"] = "ACME-003"
278
+
279
+ return records
280
+
281
+
282
+ def generate_salesforce_records(n: int = 30, seed: int = SEED) -> List[Dict]:
283
+ """Generate synthetic Salesforce-like CRM accounts (canonical field names)."""
284
+ random.seed(seed)
285
+ deal_stages = ["prospect", "qualification", "negotiation", "closed_won", "closed_lost"]
286
+ healths = ["green", "yellow", "red"]
287
+ territories = ["west", "east", "central", "apac", "emea"]
288
+ employees = [f"EMP-{i:03d}" for i in range(1, 21)]
289
+ companies = [
290
+ "Acme Corporation", "Globex Systems", "Initech Ltd", "Umbrella Corp",
291
+ "Stark Industries", "Wayne Enterprises", "Hooli Inc", "Pied Piper",
292
+ "Bluth Company", "Vandelay Industries",
293
+ ]
294
+
295
+ records = []
296
+ for i in range(1, n + 1):
297
+ records.append({
298
+ "account_id": f"ACME-{i:03d}",
299
+ "company_name": f"{companies[(i-1) % len(companies)]} {i}",
300
+ "deal_stage": random.choice(deal_stages),
301
+ "health": random.choices(healths, weights=[60, 30, 10])[0],
302
+ "owner": random.choice(employees),
303
+ "arr": random.randint(5_000, 200_000),
304
+ "is_paying": random.random() > 0.3,
305
+ "territory": random.choice(territories),
306
+ "industry": random.choice(["tech", "finance", "healthcare", "retail"]),
307
+ # Internal state tracking
308
+ "_account_checked": False,
309
+ "_churn_flagged": False,
310
+ "_team_assigned": False,
311
+ "_intervention_assigned": False,
312
+ })
313
+
314
+ # Workflow A: ACME-001 is a paying customer with yellow health
315
+ records[0].update({
316
+ "company_name": "Acme Corporation",
317
+ "deal_stage": "closed_won",
318
+ "health": "yellow",
319
+ "is_paying": True,
320
+ "arr": 50_000,
321
+ "territory": "west",
322
+ })
323
+
324
+ # Workflow C: ACME-003 is at churn risk
325
+ records[2].update({
326
+ "company_name": "Globex Systems",
327
+ "health": "red",
328
+ "deal_stage": "negotiation",
329
+ "is_paying": True,
330
+ "arr": 30_000,
331
+ "_churn_flagged": False,
332
+ })
333
+
334
+ return records
335
+
336
+
337
+ def generate_workday_records(n: int = 20, seed: int = SEED) -> List[Dict]:
338
+ """Generate synthetic Workday-like HR records (canonical field names)."""
339
+ random.seed(seed)
340
+ levels = ["IC1", "IC2", "IC3", "IC4", "M1", "M2"]
341
+ departments = ["engineering", "support", "sales", "hr", "data"]
342
+ territories = ["west", "east", "central", "apac", "emea"]
343
+
344
+ records = []
345
+ for i in range(1, n + 1):
346
+ records.append({
347
+ "employee_id": f"EMP-{i:03d}",
348
+ "name": f"{random.choice(FIRST_NAMES)} {random.choice(LAST_NAMES)}",
349
+ "level": random.choice(levels),
350
+ "manager_id": f"EMP-{random.randint(1, min(i, 5)):03d}" if i > 1 else None,
351
+ "status": random.choices(["active", "pending"], weights=[90, 10])[0],
352
+ "department": random.choice(departments),
353
+ "territory": random.choice(territories),
354
+ "email": f"emp{i}@company.com",
355
+ # Internal state tracking
356
+ "_access_provisioned": {}, # app_name → bool
357
+ "_sla_logged": False,
358
+ "_onboarding_created": False,
359
+ })
360
+
361
+ # Workflow B: one pending new hire to onboard
362
+ records.append({
363
+ "employee_id": "EMP-NEW-001",
364
+ "name": "Jordan Riley",
365
+ "level": "IC2",
366
+ "manager_id": "EMP-001",
367
+ "status": "pending",
368
+ "department": "support",
369
+ "territory": "west",
370
+ "email": "jordan.riley@company.com",
371
+ "_access_provisioned": {},
372
+ "_sla_logged": False,
373
+ "_onboarding_created": False,
374
+ })
375
 
376
+ return records
377
+
378
+
379
+ def generate_episode_data(workflow_id: str, seed: int = SEED) -> Dict[str, List[Dict]]:
380
+ """
381
+ Generate correlated data for a full episode across all 4 apps.
382
+ Cross-references are maintained: Zendesk customer_ids match Salesforce account_ids,
383
+ Jira reporters are Workday employees, etc.
384
+ """
385
+ return {
386
+ "jira": generate_jira_records(n=50, seed=seed),
387
+ "zendesk": generate_zendesk_records(n=40, seed=seed),
388
+ "salesforce": generate_salesforce_records(n=30, seed=seed),
389
+ "workday": generate_workday_records(n=20, seed=seed),
390
+ }
server/environment.py CHANGED
@@ -1,41 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  class OrgOSEnvironment:
2
  MAX_STEPS = {"A": 15, "B": 20, "C": 18}
3
- WORKFLOWS = ["A", "B", "C"]
4
 
5
  def __init__(self):
6
  self._drift = SchemaDriftEngine(seed=42)
7
  self._rules = BusinessRuleEngine()
8
  self._workflow = WorkflowEngine()
9
- self._apps: Dict[str, BaseApp] = {
10
- "jira": JiraApp(self._drift),
11
- "zendesk": ZendeskApp(self._drift),
12
- "salesforce": SalesforceApp(self._drift),
13
- "workday": WorkdayApp(self._drift),
14
  }
15
- self._episode_num = 0
16
- self._episode_id = ""
17
- self._workflow_id = "A"
18
- self._step_count = 0
19
- self._last_score = 0.001
20
  self._policy_drift_applied = False
21
 
22
  # Reward component trackers
23
- self._wf_score = 0.0 # workflow completion
24
- self._rule_score = 1.0 # compliance (starts perfect, penalized on violation)
25
- self._schema_score = 0.0 # schema adaptation successes
26
- self._efficiency = 1.0 # degrades with no-ops
27
- self._policy_score = 0.0 # policy drift handling
 
 
 
 
28
 
29
  def reset(self, workflow_id: Optional[str] = None) -> OrgOSObservation:
30
  self._episode_num += 1
31
- self._episode_id = str(uuid.uuid4())
32
- self._workflow_id = workflow_id or self.WORKFLOWS[(self._episode_num - 1) % 3]
33
- self._step_count = 0
34
- self._last_score = 0.001
35
- self._rule_score = 1.0
36
- self._wf_score = 0.0
37
  self._schema_score = 0.0
38
- self._efficiency = 1.0
39
  self._policy_score = 0.0
40
  self._policy_drift_applied = False
41
 
@@ -56,70 +76,122 @@ class OrgOSEnvironment:
56
  # Start workflow tracking
57
  self._workflow.start(self._workflow_id)
58
 
59
- return self._build_obs(0.001, False, "Episode started. Study the workflow goal and schema hints.")
 
 
 
 
60
 
61
  def step(self, action: OrgOSAction) -> OrgOSObservation:
62
  self._step_count += 1
63
- old_score = self._last_score
64
  extra_penalty = 0.0
65
 
66
  # 1. Validate app exists
67
  if action.app not in self._apps:
68
- return self._build_obs(old_score - 0.05, False, f"Unknown app '{action.app}'")
 
 
 
 
69
 
70
  # 2. Business rule check (RBAC, approvals)
71
- ctx = {"agent_role": "support", "manager_approved": False}
 
72
  allowed, reason, rule_penalty = self._rules.check_action(action, ctx)
73
  if not allowed:
74
  self._rule_score = max(0.0, self._rule_score - 0.08)
75
- extra_penalty = rule_penalty
76
  return self._build_obs(
77
- max(-0.25, old_score + extra_penalty),
78
- False, f"Rule violation: {reason}"
 
79
  )
80
 
81
  # 3. Execute on app
82
  result = self._apps[action.app].execute(action.operation, action.args)
83
- if not result["success"]:
84
- self._efficiency -= 0.02 # penalize failed/no-op actions
85
- return self._build_obs(old_score - 0.01, False, result["message"])
86
 
87
- # 4. Check schema drift adaptation
88
- # If agent used canonical field names on a v2/v3 schema → penalize
89
  if result.get("schema_error"):
90
- extra_penalty -= 0.20
91
- return self._build_obs(old_score - 0.20, False,
92
- f"Stale schema: field '{result['schema_error']}' not found in current schema")
93
- elif result.get("schema_adapted"):
94
- # Agent correctly used drifted field name → bonus
95
- self._schema_score = min(1.0, self._schema_score + 0.1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  # 5. Re-evaluate workflow completion
98
  self._wf_score = self._workflow.evaluate(self._apps)
99
 
100
- # 6. Check SLA violations
101
- sla_ok, sla_pen = self._rules.check_sla(result.get("ticket", {}),
102
- self._step_count * 2.5) # 2.5 min per step
 
 
103
  if not sla_ok:
104
- extra_penalty += sla_pen
105
  self._rule_score = max(0.0, self._rule_score - 0.05)
106
 
107
  # 7. Compute composite score
108
  new_score = self._compute_score()
109
- delta = new_score - old_score + extra_penalty
110
  self._last_score = max(0.001, min(0.999, new_score))
111
 
112
  # 8. Terminal condition
113
- done = (self._wf_score >= 0.95 or
114
- self._step_count >= self.MAX_STEPS[self._workflow_id])
 
 
115
  if done and self._wf_score >= 0.95:
116
- delta += 0.20 # terminal bonus
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
- return self._build_obs(delta, done, result["message"])
 
 
119
 
120
  def _compute_score(self) -> float:
121
  raw = (
122
- 0.30 * self._wf_score +
123
  0.25 * self._rule_score +
124
  0.20 * self._schema_score +
125
  0.15 * self._efficiency +
@@ -127,15 +199,50 @@ class OrgOSEnvironment:
127
  )
128
  return max(0.001, min(0.999, raw))
129
 
130
- def state(self) -> OrgOSState:
131
- return OrgOSState(
132
- episode_id=self._episode_id,
133
- workflow_id=self._workflow_id,
134
- schema_versions=self._drift._versions,
135
- step_count=self._step_count,
136
- max_steps=self.MAX_STEPS.get(self._workflow_id, 15),
137
- rule_violation_count=len(self._rules._violation_log),
138
- workflow_completion=self._wf_score,
139
- rule_compliance_rate=self._rule_score,
140
- policy_drift_active=self._policy_drift_applied,
141
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """OrgOS environment — the single stateful RL environment object."""
2
+
3
+ import uuid
4
+ from typing import Dict, Optional
5
+
6
+ from models import OrgOSAction, OrgOSObservation, OrgOSState, RewardBreakdown
7
+ from server.apps.jira import JiraApp
8
+ from server.apps.zendesk import ZendeskApp
9
+ from server.apps.salesforce import SalesforceApp
10
+ from server.apps.workday import WorkdayApp
11
+ from server.business_rules import BusinessRuleEngine
12
+ from server.data_generator import generate_episode_data
13
+ from server.schema_drift import SchemaDriftEngine
14
+ from server.workflow_engine import WorkflowEngine
15
+
16
+
17
  class OrgOSEnvironment:
18
  MAX_STEPS = {"A": 15, "B": 20, "C": 18}
19
+ WORKFLOWS = ["A", "B", "C"]
20
 
21
  def __init__(self):
22
  self._drift = SchemaDriftEngine(seed=42)
23
  self._rules = BusinessRuleEngine()
24
  self._workflow = WorkflowEngine()
25
+ self._apps: Dict[str, object] = {
26
+ "jira": JiraApp(self._drift),
27
+ "zendesk": ZendeskApp(self._drift),
28
+ "salesforce": SalesforceApp(self._drift),
29
+ "workday": WorkdayApp(self._drift),
30
  }
31
+ self._episode_num = 0
32
+ self._episode_id = ""
33
+ self._workflow_id = "A"
34
+ self._step_count = 0
35
+ self._last_score = 0.001
36
  self._policy_drift_applied = False
37
 
38
  # Reward component trackers
39
+ self._wf_score = 0.0 # workflow completion
40
+ self._rule_score = 1.0 # compliance (starts perfect, penalized on violation)
41
+ self._schema_score = 0.0 # schema adaptation successes
42
+ self._efficiency = 1.0 # degrades with failed/no-op actions
43
+ self._policy_score = 0.0 # policy drift handling bonus
44
+
45
+ # ------------------------------------------------------------------
46
+ # OpenEnv core API
47
+ # ------------------------------------------------------------------
48
 
49
  def reset(self, workflow_id: Optional[str] = None) -> OrgOSObservation:
50
  self._episode_num += 1
51
+ self._episode_id = str(uuid.uuid4())
52
+ self._workflow_id = workflow_id or self.WORKFLOWS[(self._episode_num - 1) % 3]
53
+ self._step_count = 0
54
+ self._last_score = 0.001
55
+ self._rule_score = 1.0
56
+ self._wf_score = 0.0
57
  self._schema_score = 0.0
58
+ self._efficiency = 1.0
59
  self._policy_score = 0.0
60
  self._policy_drift_applied = False
61
 
 
76
  # Start workflow tracking
77
  self._workflow.start(self._workflow_id)
78
 
79
+ return self._build_obs(
80
+ reward=0.001,
81
+ done=False,
82
+ message="Episode started. Study the workflow goal and schema hints before acting.",
83
+ )
84
 
85
  def step(self, action: OrgOSAction) -> OrgOSObservation:
86
  self._step_count += 1
87
+ old_score = self._last_score
88
  extra_penalty = 0.0
89
 
90
  # 1. Validate app exists
91
  if action.app not in self._apps:
92
+ return self._build_obs(
93
+ reward=old_score - 0.05,
94
+ done=False,
95
+ message=f"Unknown app '{action.app}'. Valid apps: {list(self._apps)}",
96
+ )
97
 
98
  # 2. Business rule check (RBAC, approvals)
99
+ agent_role = self._workflow.get_role()
100
+ ctx = {"agent_role": agent_role, "manager_approved": False}
101
  allowed, reason, rule_penalty = self._rules.check_action(action, ctx)
102
  if not allowed:
103
  self._rule_score = max(0.0, self._rule_score - 0.08)
104
+ extra_penalty = rule_penalty
105
  return self._build_obs(
106
+ reward=max(-0.25, old_score + extra_penalty),
107
+ done=False,
108
+ message=f"Rule violation: {reason}",
109
  )
110
 
111
  # 3. Execute on app
112
  result = self._apps[action.app].execute(action.operation, action.args)
 
 
 
113
 
114
+ # 4. Check schema drift FIRST — apps return success:False when schema_error is set
 
115
  if result.get("schema_error"):
116
+ self._efficiency -= 0.02
117
+ return self._build_obs(
118
+ reward=old_score - 0.20,
119
+ done=False,
120
+ message=(
121
+ f"Stale schema: field '{result['schema_error']}' is no longer valid. "
122
+ "Check schema_hints for the current field name. "
123
+ f"Hint: {result.get('message', '')}"
124
+ ),
125
+ )
126
+
127
+ if not result.get("success"):
128
+ self._efficiency -= 0.02 # penalize failed/no-op actions
129
+ return self._build_obs(
130
+ reward=old_score - 0.01,
131
+ done=False,
132
+ message=result.get("message", "Operation failed"),
133
+ )
134
+
135
+ # Schema adaptation bonus (agent used correct drifted field name)
136
+ if result.get("schema_adapted"):
137
+ self._schema_score = min(1.0, self._schema_score + 0.10)
138
+ self._policy_score = min(1.0, self._policy_score + 0.05)
139
 
140
  # 5. Re-evaluate workflow completion
141
  self._wf_score = self._workflow.evaluate(self._apps)
142
 
143
+ # 6. SLA check (only if a ticket was touched)
144
+ sla_ok, sla_pen = self._rules.check_sla(
145
+ result.get("ticket", {}),
146
+ self._step_count * 2.5, # approximate 2.5 min per step
147
+ )
148
  if not sla_ok:
149
+ extra_penalty += sla_pen
150
  self._rule_score = max(0.0, self._rule_score - 0.05)
151
 
152
  # 7. Compute composite score
153
  new_score = self._compute_score()
154
+ delta = new_score - old_score + extra_penalty
155
  self._last_score = max(0.001, min(0.999, new_score))
156
 
157
  # 8. Terminal condition
158
+ done = (
159
+ self._wf_score >= 0.95
160
+ or self._step_count >= self.MAX_STEPS[self._workflow_id]
161
+ )
162
  if done and self._wf_score >= 0.95:
163
+ delta += 0.20 # terminal completion bonus
164
+
165
+ return self._build_obs(
166
+ reward=delta,
167
+ done=done,
168
+ message=result.get("message", "OK"),
169
+ )
170
+
171
+ # ------------------------------------------------------------------
172
+ # State endpoint
173
+ # ------------------------------------------------------------------
174
+
175
+ def state(self) -> OrgOSState:
176
+ return OrgOSState(
177
+ episode_id = self._episode_id,
178
+ workflow_id = self._workflow_id,
179
+ schema_versions = self._drift._versions,
180
+ step_count = self._step_count,
181
+ max_steps = self.MAX_STEPS.get(self._workflow_id, 15),
182
+ rule_violation_count = len(self._rules._violation_log),
183
+ workflow_completion = self._wf_score,
184
+ rule_compliance_rate = self._rule_score,
185
+ policy_drift_active = self._policy_drift_applied,
186
+ )
187
 
188
+ # ------------------------------------------------------------------
189
+ # Internal helpers
190
+ # ------------------------------------------------------------------
191
 
192
  def _compute_score(self) -> float:
193
  raw = (
194
+ 0.30 * self._wf_score +
195
  0.25 * self._rule_score +
196
  0.20 * self._schema_score +
197
  0.15 * self._efficiency +
 
199
  )
200
  return max(0.001, min(0.999, raw))
201
 
202
+ def _build_obs(self, reward: float, done: bool, message: str) -> OrgOSObservation:
203
+ """Construct a fully-populated observation from current environment state."""
204
+ # Per-app state previews
205
+ app_states = {
206
+ name: app.get_state_view(max_rows=3)
207
+ for name, app in self._apps.items()
208
+ }
209
+
210
+ # Schema hints (partial — agent must probe to discover full mapping)
211
+ schema_hints = self._drift.get_all_changes()
212
+ # Flatten to dot-notation: {"jira.priority": "severity", ...}
213
+ flat_hints: Dict[str, str] = {}
214
+ for app_name, field_map in schema_hints.items():
215
+ for canonical, drifted in field_map.items():
216
+ if canonical != drifted:
217
+ flat_hints[f"{app_name}.{canonical}"] = drifted
218
+
219
+ # Workflow progress
220
+ completed_steps = self._workflow.get_completed()
221
+ pending_steps = self._workflow.get_pending()
222
+ workflow_goal = self._workflow.get_goal()
223
+
224
+ # Reward breakdown snapshot
225
+ breakdown = RewardBreakdown(
226
+ workflow_completion = self._wf_score,
227
+ rule_compliance = self._rule_score,
228
+ schema_adaptation = self._schema_score,
229
+ efficiency = self._efficiency,
230
+ policy_drift_handling = self._policy_score,
231
+ )
232
+
233
+ return OrgOSObservation(
234
+ done = done,
235
+ reward = round(float(reward), 6),
236
+ current_score = float(self._last_score),
237
+ workflow_id = self._workflow_id,
238
+ step_count = self._step_count,
239
+ app_states = app_states,
240
+ workflow_goal = workflow_goal,
241
+ completed_steps = completed_steps,
242
+ pending_steps = pending_steps,
243
+ schema_hints = flat_hints,
244
+ active_rules = self._rules.get_active_rules_summary(),
245
+ rule_violations = self._rules.get_violations_this_step(),
246
+ reward_breakdown = breakdown,
247
+ message = message,
248
+ )
server/schema_drift.py CHANGED
@@ -1,33 +1,39 @@
1
- # Canonical actual field names per app per schema version
 
 
 
 
 
2
  SCHEMA_MAP = {
3
  "jira": {
4
- "v1": {"priority": "priority", "assignee": "assignee", "status": "status"},
5
- "v2": {"priority": "severity", "assignee": "owner", "status": "state"},
6
- "v3": {"priority": "urgency_level", "assignee": "assigned_to", "status": "current_state",
7
- "sla_deadline": "due_by"}, # v3 adds a new field
8
  },
9
  "zendesk": {
10
- "v1": {"urgency": "urgency", "agent_email": "agent_email", "state": "state"},
11
- "v2": {"urgency": "priority", "agent_email": "handler", "state": "ticket_state"},
12
- "v3": {"urgency": "impact_level", "agent_email": "assigned_agent","state": "resolution_status"},
13
  },
14
  "salesforce": {
15
- "v1": {"deal_stage": "deal_stage", "health": "health", "owner": "owner_name"},
16
- "v2": {"deal_stage": "pipeline_stage","health": "account_health", "owner": "account_owner"},
17
- "v3": {"deal_stage": "stage", "health": "risk_score", "owner": "rep_email",
18
  "arr": "annual_recurring_revenue"},
19
  },
20
  "workday": {
21
- "v1": {"level": "level", "manager_id": "manager_id", "status": "resolution"},
22
- "v2": {"level": "job_level", "manager_id": "reports_to", "status": "request_status"},
23
- "v3": {"level": "seniority", "manager_id": "direct_manager","status": "approval_state"},
24
  },
25
  }
26
 
 
27
  class SchemaDriftEngine:
28
  def __init__(self, seed: int = 42):
29
  self._seed = seed
30
- self._versions: Dict[str, str] = {} # app "v1"/"v2"/"v3"
31
 
32
  def sample_for_episode(self, episode_num: int) -> None:
33
  """Sample schema versions deterministically per episode."""
@@ -35,21 +41,58 @@ class SchemaDriftEngine:
35
  self._versions = {app: rng.choice(["v1", "v2", "v3"]) for app in SCHEMA_MAP}
36
 
37
  def translate_record(self, record: Dict, app: str) -> Dict:
38
- """Rename canonical field names → current schema's field names."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  version = self._versions.get(app, "v1")
40
- mapping = SCHEMA_MAP[app][version]
41
- return {mapping.get(k, k): v for k, v in record.items()}
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  def get_hints(self) -> Dict[str, str]:
44
  """Return partial schema hints visible in observation.
45
- Only reveal 1 random field per app (agent must probe for the rest)."""
46
  hints = {}
47
  rng = random.Random(self._seed)
48
  for app, version in self._versions.items():
49
- mapping = SCHEMA_MAP[app][version]
50
- # Reveal only fields that actually changed (v2/v3)
51
  changed = {f"{app}.{k}": v for k, v in mapping.items() if k != v}
52
  if changed:
53
  key = rng.choice(list(changed.keys()))
54
  hints[key] = changed[key]
55
- return hints
 
 
 
 
 
 
 
 
 
1
+ """Schema drift engine manages per-episode field-name versioning across all 4 apps."""
2
+
3
+ import random
4
+ from typing import Dict, Optional
5
+
6
+ # Canonical field → actual field name, per app, per schema version
7
  SCHEMA_MAP = {
8
  "jira": {
9
+ "v1": {"priority": "priority", "assignee": "assignee", "status": "status"},
10
+ "v2": {"priority": "severity", "assignee": "owner", "status": "state"},
11
+ "v3": {"priority": "urgency_level", "assignee": "assigned_to", "status": "current_state",
12
+ "sla_deadline": "due_by"},
13
  },
14
  "zendesk": {
15
+ "v1": {"urgency": "urgency", "agent_email": "agent_email", "state": "state"},
16
+ "v2": {"urgency": "priority", "agent_email": "handler", "state": "ticket_state"},
17
+ "v3": {"urgency": "impact_level", "agent_email": "assigned_agent", "state": "resolution_status"},
18
  },
19
  "salesforce": {
20
+ "v1": {"deal_stage": "deal_stage", "health": "health", "owner": "owner_name"},
21
+ "v2": {"deal_stage": "pipeline_stage", "health": "account_health","owner": "account_owner"},
22
+ "v3": {"deal_stage": "stage", "health": "risk_score", "owner": "rep_email",
23
  "arr": "annual_recurring_revenue"},
24
  },
25
  "workday": {
26
+ "v1": {"level": "level", "manager_id": "manager_id", "status": "resolution"},
27
+ "v2": {"level": "job_level", "manager_id": "reports_to", "status": "request_status"},
28
+ "v3": {"level": "seniority", "manager_id": "direct_manager", "status": "approval_state"},
29
  },
30
  }
31
 
32
+
33
  class SchemaDriftEngine:
34
  def __init__(self, seed: int = 42):
35
  self._seed = seed
36
+ self._versions: Dict[str, str] = {app: "v1" for app in SCHEMA_MAP}
37
 
38
  def sample_for_episode(self, episode_num: int) -> None:
39
  """Sample schema versions deterministically per episode."""
 
41
  self._versions = {app: rng.choice(["v1", "v2", "v3"]) for app in SCHEMA_MAP}
42
 
43
  def translate_record(self, record: Dict, app: str) -> Dict:
44
+ """Rename canonical field names → current schema's field names (for output to agent)."""
45
+ version = self._versions.get(app, "v1")
46
+ mapping = SCHEMA_MAP.get(app, {}).get(version, {})
47
+ return {mapping.get(k, k): v for k, v in record.items()
48
+ if not k.startswith("_")} # strip internal state-tracking fields
49
+
50
+ def translate_field(self, canonical_field: str, app: str) -> str:
51
+ """Get the current drifted name for a canonical field."""
52
+ version = self._versions.get(app, "v1")
53
+ mapping = SCHEMA_MAP.get(app, {}).get(version, {})
54
+ return mapping.get(canonical_field, canonical_field)
55
+
56
+ def check_args_for_drift(self, args: Dict, app: str):
57
+ """
58
+ Check whether action args use canonical (stale) vs drifted (correct) field names.
59
+ Returns (schema_error: Optional[str], schema_adapted: bool).
60
+ - schema_error: the canonical field name the agent incorrectly used, or None
61
+ - schema_adapted: True if agent correctly used a drifted field name
62
+ """
63
  version = self._versions.get(app, "v1")
64
+ if version == "v1":
65
+ return None, False # v1 is canonical no drift, no credit/penalty
66
+
67
+ mapping = SCHEMA_MAP.get(app, {}).get(version, {})
68
+ changed = {k: v for k, v in mapping.items() if k != v} # canonical → drifted
69
+ reverse = {v: k for k, v in changed.items()} # drifted → canonical
70
+
71
+ for key in args:
72
+ if key in changed:
73
+ return key, False # Agent used old canonical name on drifted schema → error
74
+ if key in reverse:
75
+ return None, True # Agent correctly used drifted name → adaptation bonus
76
+
77
+ return None, False
78
 
79
  def get_hints(self) -> Dict[str, str]:
80
  """Return partial schema hints visible in observation.
81
+ Reveals 1 changed field per app that has drift (agent must discover the rest)."""
82
  hints = {}
83
  rng = random.Random(self._seed)
84
  for app, version in self._versions.items():
85
+ mapping = SCHEMA_MAP.get(app, {}).get(version, {})
 
86
  changed = {f"{app}.{k}": v for k, v in mapping.items() if k != v}
87
  if changed:
88
  key = rng.choice(list(changed.keys()))
89
  hints[key] = changed[key]
90
+ return hints
91
+
92
+ def get_all_changes(self) -> Dict[str, Dict[str, str]]:
93
+ """Return all field changes for every app (used by UI schema drift viewer)."""
94
+ result = {}
95
+ for app, version in self._versions.items():
96
+ mapping = SCHEMA_MAP.get(app, {}).get(version, {})
97
+ result[app] = {k: v for k, v in mapping.items() if k != v}
98
+ return result
server/workflow_engine.py CHANGED
@@ -1,3 +1,9 @@
 
 
 
 
 
 
1
  @dataclass
2
  class WorkflowStep:
3
  step_id: str
@@ -5,59 +11,179 @@ class WorkflowStep:
5
  app: str
6
  operation: str
7
  # Callable that checks if this step was completed given the app states
8
- completion_check: Callable[[Dict[str, "BaseApp"]], bool]
9
-
10
- # Workflow A: Customer Bug → Engineering Fix
11
- WORKFLOW_A_STEPS = [
12
- WorkflowStep("A1", "Acknowledge ticket in Zendesk",
13
- "zendesk", "acknowledge_ticket",
14
- lambda apps: apps["zendesk"].ticket_acknowledged()),
15
 
16
- WorkflowStep("A2", "Escalate to Jira — create linked issue",
17
- "jira", "create_issue",
18
- lambda apps: apps["jira"].has_linked_issue()),
19
 
20
- WorkflowStep("A3", "Check if customer is paying (Salesforce lookup)",
21
- "salesforce", "get_account",
22
- lambda apps: apps["salesforce"].account_checked()),
23
-
24
- WorkflowStep("A4", "Assign correct engineer in Jira based on priority",
25
- "jira", "assign_owner",
26
- lambda apps: apps["jira"].issue_assigned()),
27
-
28
- WorkflowStep("A5", "Log SLA status in Workday",
29
- "workday", "log_sla_event",
30
- lambda apps: apps["workday"].sla_logged()),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  ]
32
 
33
- # Workflow B: Employee Onboarding
 
 
 
34
  WORKFLOW_B_STEPS = [
35
- WorkflowStep("B1", "Create employee record in Workday", ...),
36
- WorkflowStep("B2", "Provision Jira access based on role", ...),
37
- WorkflowStep("B3", "Add to Salesforce team by territory", ...),
38
- WorkflowStep("B4", "Create Zendesk support profile if customer-facing", ...),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  ]
40
 
41
- # Workflow C: Churn Risk Alert
 
 
 
42
  WORKFLOW_C_STEPS = [
43
- WorkflowStep("C1", "Flag at-risk account in Salesforce", ...),
44
- WorkflowStep("C2", "Query recent support volume in Zendesk", ...),
45
- WorkflowStep("C3", "Check outstanding bugs in Jira", ...),
46
- WorkflowStep("C4", "Synthesize churn score and assign intervention owner", ...),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  ]
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  class WorkflowEngine:
50
- WORKFLOWS = {"A": WORKFLOW_A_STEPS, "B": WORKFLOW_B_STEPS, "C": WORKFLOW_C_STEPS}
 
 
 
 
 
 
 
 
 
51
 
52
  def start(self, workflow_id: str) -> None:
 
 
53
  self._steps = self.WORKFLOWS[workflow_id].copy()
54
- self._completed: List[str] = []
55
 
56
  def evaluate(self, apps: Dict) -> float:
57
- """Check all steps and return completion ratio (0.0-1.0)."""
 
 
58
  completed = sum(1 for s in self._steps if s.completion_check(apps))
59
  self._completed = [s.step_id for s in self._steps if s.completion_check(apps)]
60
  return completed / len(self._steps)
61
 
62
  def get_pending(self) -> List[str]:
63
- return [s.description for s in self._steps if s.step_id not in self._completed]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Workflow engine — defines and evaluates multi-app workflow completion."""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Callable, Dict, List
5
+
6
+
7
  @dataclass
8
  class WorkflowStep:
9
  step_id: str
 
11
  app: str
12
  operation: str
13
  # Callable that checks if this step was completed given the app states
14
+ completion_check: Callable[[Dict], bool]
 
 
 
 
 
 
15
 
 
 
 
16
 
17
+ # ---------------------------------------------------------------------------
18
+ # Workflow A: Customer Bug Fix (Zendesk → Jira → Salesforce → Workday)
19
+ # Agent role: support
20
+ # ---------------------------------------------------------------------------
21
+ WORKFLOW_A_STEPS = [
22
+ WorkflowStep(
23
+ "A1", "Acknowledge the incoming Zendesk ticket (ZD-001)",
24
+ "zendesk", "acknowledge_ticket",
25
+ lambda apps: apps["zendesk"].ticket_acknowledged(),
26
+ ),
27
+ WorkflowStep(
28
+ "A2", "Escalate to Jira — create a new issue linked to ZD-001",
29
+ "jira", "create_issue",
30
+ lambda apps: apps["jira"].has_linked_issue(),
31
+ ),
32
+ WorkflowStep(
33
+ "A3", "Verify the customer's account status in Salesforce (ACME-001)",
34
+ "salesforce", "get_account",
35
+ lambda apps: apps["salesforce"].account_checked(),
36
+ ),
37
+ WorkflowStep(
38
+ "A4", "Assign the Jira issue to an engineer (JIRA-001)",
39
+ "jira", "assign_owner",
40
+ lambda apps: apps["jira"].issue_assigned(),
41
+ ),
42
+ WorkflowStep(
43
+ "A5", "Log the SLA compliance event in Workday",
44
+ "workday", "log_sla_event",
45
+ lambda apps: apps["workday"].sla_logged(),
46
+ ),
47
  ]
48
 
49
+ # ---------------------------------------------------------------------------
50
+ # Workflow B: Employee Onboarding (Workday → Workday → Salesforce → Zendesk)
51
+ # Agent role: manager
52
+ # ---------------------------------------------------------------------------
53
  WORKFLOW_B_STEPS = [
54
+ WorkflowStep(
55
+ "B1", "Create the new employee's onboarding record in Workday (EMP-NEW-001)",
56
+ "workday", "create_onboarding_task",
57
+ lambda apps: apps["workday"].employee_created(),
58
+ ),
59
+ WorkflowStep(
60
+ "B2", "Provision Jira access for the new employee via Workday",
61
+ "workday", "provision_access",
62
+ lambda apps: apps["workday"].access_provisioned("jira"),
63
+ ),
64
+ WorkflowStep(
65
+ "B3", "Assign the new employee to the correct Salesforce territory team",
66
+ "salesforce", "assign_account_owner",
67
+ lambda apps: apps["salesforce"].team_assigned(),
68
+ ),
69
+ WorkflowStep(
70
+ "B4", "Create a Zendesk support agent profile for the new employee",
71
+ "zendesk", "assign_agent",
72
+ lambda apps: apps["zendesk"].profile_created(),
73
+ ),
74
  ]
75
 
76
+ # ---------------------------------------------------------------------------
77
+ # Workflow C: Churn Risk Alert (Salesforce → Zendesk → Jira → Salesforce)
78
+ # Agent role: support
79
+ # ---------------------------------------------------------------------------
80
  WORKFLOW_C_STEPS = [
81
+ WorkflowStep(
82
+ "C1", "Flag at-risk account ACME-003 as churn risk in Salesforce",
83
+ "salesforce", "flag_churn_risk",
84
+ lambda apps: apps["salesforce"].churn_flagged(),
85
+ ),
86
+ WorkflowStep(
87
+ "C2", "Query recent support ticket volume for ACME-003 in Zendesk",
88
+ "zendesk", "get_ticket",
89
+ lambda apps: apps["zendesk"].support_queried("ACME-003"),
90
+ ),
91
+ WorkflowStep(
92
+ "C3", "Check outstanding Jira bugs linked to ACME-003",
93
+ "jira", "list_issues",
94
+ lambda apps: apps["jira"].bugs_checked(),
95
+ ),
96
+ WorkflowStep(
97
+ "C4", "Assign an intervention owner to ACME-003 in Salesforce",
98
+ "salesforce", "assign_account_owner",
99
+ lambda apps: apps["salesforce"].intervention_assigned(),
100
+ ),
101
  ]
102
 
103
+ # ---------------------------------------------------------------------------
104
+ # Goal descriptions shown to the agent at reset
105
+ # ---------------------------------------------------------------------------
106
+ WORKFLOW_GOALS: Dict[str, str] = {
107
+ "A": (
108
+ "Workflow A — Customer Bug Fix: "
109
+ "A P1 bug has been reported via Zendesk (ticket ZD-001) by customer ACME-001. "
110
+ "Steps required: "
111
+ "(1) acknowledge Zendesk ticket ZD-001, "
112
+ "(2) create a new Jira issue linked to ZD-001, "
113
+ "(3) verify ACME-001's account status in Salesforce, "
114
+ "(4) assign the Jira issue (JIRA-001) to an engineer, "
115
+ "(5) log the SLA compliance event in Workday. "
116
+ "Use list operations if you need to discover record IDs."
117
+ ),
118
+ "B": (
119
+ "Workflow B — Employee Onboarding: "
120
+ "A new support engineer has joined the West team. "
121
+ "Employee ID: EMP-NEW-001, Name: Alex Rivera, department: support, territory: west. "
122
+ "Steps required: "
123
+ "(1) create an onboarding record in Workday for EMP-NEW-001, "
124
+ "(2) provision Jira access for EMP-NEW-001 via Workday, "
125
+ "(3) assign EMP-NEW-001 to the correct Salesforce territory (use any ACME-* account in the west region), "
126
+ "(4) create a Zendesk agent profile for EMP-NEW-001. "
127
+ "You have manager-level access."
128
+ ),
129
+ "C": (
130
+ "Workflow C — Churn Risk Alert: "
131
+ "Account ACME-003 (GlobalTech) is showing churn signals. "
132
+ "Steps required: "
133
+ "(1) flag ACME-003 as a churn risk in Salesforce, "
134
+ "(2) query recent support tickets for ACME-003 in Zendesk (use customer_id=ACME-003), "
135
+ "(3) list open Jira bugs related to ACME-003, "
136
+ "(4) assign an intervention owner to ACME-003 in Salesforce. "
137
+ "Focus account: ACME-003."
138
+ ),
139
+ }
140
+
141
+ # Role each workflow expects the agent to act as
142
+ WORKFLOW_ROLES: Dict[str, str] = {
143
+ "A": "support",
144
+ "B": "manager",
145
+ "C": "support",
146
+ }
147
+
148
+
149
  class WorkflowEngine:
150
+ WORKFLOWS = {
151
+ "A": WORKFLOW_A_STEPS,
152
+ "B": WORKFLOW_B_STEPS,
153
+ "C": WORKFLOW_C_STEPS,
154
+ }
155
+
156
+ def __init__(self):
157
+ self._steps: List[WorkflowStep] = []
158
+ self._completed: List[str] = []
159
+ self._workflow_id: str = "A"
160
 
161
  def start(self, workflow_id: str) -> None:
162
+ """Initialise engine for the given workflow."""
163
+ self._workflow_id = workflow_id
164
  self._steps = self.WORKFLOWS[workflow_id].copy()
165
+ self._completed = []
166
 
167
  def evaluate(self, apps: Dict) -> float:
168
+ """Check all steps and return completion ratio (0.01.0)."""
169
+ if not self._steps:
170
+ return 0.0
171
  completed = sum(1 for s in self._steps if s.completion_check(apps))
172
  self._completed = [s.step_id for s in self._steps if s.completion_check(apps)]
173
  return completed / len(self._steps)
174
 
175
  def get_pending(self) -> List[str]:
176
+ """Return descriptions of not-yet-completed steps."""
177
+ return [s.description for s in self._steps if s.step_id not in self._completed]
178
+
179
+ def get_completed(self) -> List[str]:
180
+ """Return step IDs that have been completed."""
181
+ return list(self._completed)
182
+
183
+ def get_goal(self) -> str:
184
+ """Return the natural-language goal description for the active workflow."""
185
+ return WORKFLOW_GOALS.get(self._workflow_id, "Complete the assigned workflow.")
186
+
187
+ def get_role(self) -> str:
188
+ """Return the expected agent role for RBAC checks."""
189
+ return WORKFLOW_ROLES.get(self._workflow_id, "support")
training/grpo_orgos.ipynb ADDED
@@ -0,0 +1,550 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 5,
4
+ "metadata": {
5
+ "kernelspec": {
6
+ "display_name": "Python 3",
7
+ "language": "python",
8
+ "name": "python3"
9
+ },
10
+ "language_info": {
11
+ "name": "python",
12
+ "version": "3.10.0"
13
+ },
14
+ "colab": {
15
+ "gpuType": "T4",
16
+ "provenance": []
17
+ },
18
+ "accelerator": "GPU"
19
+ },
20
+ "cells": [
21
+ {
22
+ "cell_type": "markdown",
23
+ "id": "title",
24
+ "metadata": {},
25
+ "source": [
26
+ "# OrgOS GRPO Training Notebook\n",
27
+ "\n",
28
+ "**Environment:** OrgOS — Multi-App Enterprise RL Environment \n",
29
+ "**Model:** `Qwen/Qwen2.5-3B-Instruct` (4-bit LoRA via Unsloth) \n",
30
+ "**Algorithm:** GRPO (Group Relative Policy Optimization) via HuggingFace TRL \n",
31
+ "**Hardware:** Colab T4 (free tier compatible) \n",
32
+ "\n",
33
+ "## What this notebook does\n",
34
+ "1. Installs dependencies (Unsloth + TRL)\n",
35
+ "2. Loads Qwen2.5-3B-Instruct with 4-bit LoRA\n",
36
+ "3. Collects **baseline rollouts** (untrained model) on Workflows A & C\n",
37
+ "4. Fine-tunes with **GRPOTrainer** using OrgOS dense rewards\n",
38
+ "5. Collects **post-training rollouts** and computes score improvement\n",
39
+ "6. Plots the **before/after reward curve** for the demo\n",
40
+ "\n",
41
+ "**Key training signal:** The schema drift mechanic creates a sharp signal gap —\n",
42
+ "an untrained model uses stale canonical field names (−0.20 per step),\n",
43
+ "while a GRPO-trained model learns to read `schema_hints` first (+reward).\n",
44
+ "This produces a clear, visually compelling before/after improvement."
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "markdown",
49
+ "id": "sec1",
50
+ "metadata": {},
51
+ "source": ["## 1. Install Dependencies"]
52
+ },
53
+ {
54
+ "cell_type": "code",
55
+ "execution_count": null,
56
+ "id": "install",
57
+ "metadata": {},
58
+ "outputs": [],
59
+ "source": [
60
+ "# Install Unsloth (optimised 4-bit LLM training) + TRL (GRPO)\n",
61
+ "!pip install -q unsloth[colab-new] trl>=0.9.0 peft accelerate bitsandbytes\n",
62
+ "!pip install -q fastapi uvicorn httpx openai pydantic\n",
63
+ "!pip install -q matplotlib numpy\n",
64
+ "\n",
65
+ "# Clone / mount the OrgOS repo\n",
66
+ "import os\n",
67
+ "if not os.path.exists('/content/openEnv'):\n",
68
+ " !git clone https://huggingface.co/spaces/YOUR_HF_USERNAME/orgos-openenv /content/openEnv\n",
69
+ " # Alternatively: upload the repo zip and unzip it here\n",
70
+ "\n",
71
+ "os.chdir('/content/openEnv')\n",
72
+ "print('Working directory:', os.getcwd())"
73
+ ]
74
+ },
75
+ {
76
+ "cell_type": "markdown",
77
+ "id": "sec2",
78
+ "metadata": {},
79
+ "source": ["## 2. Load Model with Unsloth 4-bit LoRA"]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": null,
84
+ "id": "load_model",
85
+ "metadata": {},
86
+ "outputs": [],
87
+ "source": [
88
+ "from unsloth import FastLanguageModel\n",
89
+ "import torch\n",
90
+ "\n",
91
+ "MAX_SEQ_LEN = 2048\n",
92
+ "MODEL_NAME = 'Qwen/Qwen2.5-3B-Instruct'\n",
93
+ "\n",
94
+ "model, tokenizer = FastLanguageModel.from_pretrained(\n",
95
+ " model_name = MODEL_NAME,\n",
96
+ " max_seq_length = MAX_SEQ_LEN,\n",
97
+ " dtype = None, # auto-detect\n",
98
+ " load_in_4bit = True,\n",
99
+ ")\n",
100
+ "\n",
101
+ "# Add LoRA adapters\n",
102
+ "model = FastLanguageModel.get_peft_model(\n",
103
+ " model,\n",
104
+ " r = 16,\n",
105
+ " target_modules = ['q_proj', 'k_proj', 'v_proj', 'o_proj',\n",
106
+ " 'gate_proj', 'up_proj', 'down_proj'],\n",
107
+ " lora_alpha = 16,\n",
108
+ " lora_dropout = 0,\n",
109
+ " bias = 'none',\n",
110
+ " use_gradient_checkpointing = 'unsloth',\n",
111
+ " random_state = 42,\n",
112
+ ")\n",
113
+ "print(f'Model loaded — trainable params: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}')"
114
+ ]
115
+ },
116
+ {
117
+ "cell_type": "markdown",
118
+ "id": "sec3",
119
+ "metadata": {},
120
+ "source": ["## 3. Start the OrgOS Environment Server (subprocess)"]
121
+ },
122
+ {
123
+ "cell_type": "code",
124
+ "execution_count": null,
125
+ "id": "start_server",
126
+ "metadata": {},
127
+ "outputs": [],
128
+ "source": [
129
+ "import subprocess, time, httpx\n",
130
+ "\n",
131
+ "server_proc = subprocess.Popen(\n",
132
+ " ['python', '-m', 'uvicorn', 'server.app:app', '--host', '0.0.0.0', '--port', '8000'],\n",
133
+ " stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL\n",
134
+ ")\n",
135
+ "time.sleep(3)\n",
136
+ "\n",
137
+ "health = httpx.get('http://localhost:8000/health').json()\n",
138
+ "assert health['status'] == 'healthy', f'Server not healthy: {health}'\n",
139
+ "print('OrgOS server running — health:', health)"
140
+ ]
141
+ },
142
+ {
143
+ "cell_type": "markdown",
144
+ "id": "sec4",
145
+ "metadata": {},
146
+ "source": ["## 4. Rollout Harness (collect trajectories)"]
147
+ },
148
+ {
149
+ "cell_type": "code",
150
+ "execution_count": null,
151
+ "id": "rollout_harness",
152
+ "metadata": {},
153
+ "outputs": [],
154
+ "source": [
155
+ "import json, re, sys\n",
156
+ "from typing import List, Dict, Tuple\n",
157
+ "\n",
158
+ "SYSTEM_PROMPT = open('inference.py').read().split('SYSTEM_PROMPT = \\\"\\\"\\\"')[1].split('\\\"\\\"\\\"')[0]\n",
159
+ "\n",
160
+ "def obs_to_text(obs: dict) -> str:\n",
161
+ " \"\"\"Convert observation dict to text for the model.\"\"\"\n",
162
+ " hints = obs.get('schema_hints', {})\n",
163
+ " pending = obs.get('pending_steps', [])\n",
164
+ " return (\n",
165
+ " f\"current_score: {obs['current_score']}\\n\"\n",
166
+ " f\"step_count: {obs['step_count']}\\n\"\n",
167
+ " f\"workflow_id: {obs['workflow_id']}\\n\\n\"\n",
168
+ " f\"=== WORKFLOW GOAL ===\\n{obs['workflow_goal']}\\n\\n\"\n",
169
+ " f\"=== PENDING STEPS ===\\n\" + ('\\n'.join(f'- {s}' for s in pending) or '(done!)') + \"\\n\\n\"\n",
170
+ " f\"=== SCHEMA HINTS ===\\n{json.dumps(hints, indent=2)}\\n\\n\"\n",
171
+ " f\"=== ACTIVE RULES ===\\n{json.dumps(obs.get('active_rules', {}), indent=2)}\\n\\n\"\n",
172
+ " f\"=== LAST MESSAGE ===\\n{obs['message']}\\n\"\n",
173
+ " )\n",
174
+ "\n",
175
+ "def generate_action(prompt_messages: List[Dict], max_tokens=256) -> str:\n",
176
+ " \"\"\"Run the model to produce an action JSON string.\"\"\"\n",
177
+ " from transformers import GenerationConfig\n",
178
+ " # Format as chat\n",
179
+ " text = tokenizer.apply_chat_template(\n",
180
+ " prompt_messages, tokenize=False, add_generation_prompt=True\n",
181
+ " )\n",
182
+ " inputs = tokenizer(text, return_tensors='pt').to(model.device)\n",
183
+ " with torch.no_grad():\n",
184
+ " out = model.generate(\n",
185
+ " **inputs,\n",
186
+ " max_new_tokens = max_tokens,\n",
187
+ " temperature = 0.7,\n",
188
+ " do_sample = True,\n",
189
+ " pad_token_id = tokenizer.eos_token_id,\n",
190
+ " )\n",
191
+ " decoded = tokenizer.decode(out[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)\n",
192
+ " return decoded.strip()\n",
193
+ "\n",
194
+ "def run_episode(workflow_id: str, max_steps: int = 15) -> Tuple[List[dict], float]:\n",
195
+ " \"\"\"\n",
196
+ " Run one episode. Returns (trajectory, final_score).\n",
197
+ " trajectory = list of {'messages': [...], 'reward': float}\n",
198
+ " \"\"\"\n",
199
+ " resp = httpx.post('http://localhost:8000/reset', json={'workflow_id': workflow_id})\n",
200
+ " obs = resp.json()['observation']\n",
201
+ " history = []\n",
202
+ " trajectory = []\n",
203
+ " cumulative_reward = 0.0\n",
204
+ "\n",
205
+ " for step_i in range(max_steps):\n",
206
+ " if obs['done']:\n",
207
+ " break\n",
208
+ "\n",
209
+ " obs_text = obs_to_text(obs)\n",
210
+ " history.append({'role': 'user', 'content': obs_text})\n",
211
+ "\n",
212
+ " msgs = [{'role': 'system', 'content': SYSTEM_PROMPT}] + history[-10:]\n",
213
+ " action_str = generate_action(msgs)\n",
214
+ "\n",
215
+ " history.append({'role': 'assistant', 'content': action_str})\n",
216
+ "\n",
217
+ " # Parse action\n",
218
+ " action = None\n",
219
+ " try:\n",
220
+ " action = json.loads(action_str)\n",
221
+ " except:\n",
222
+ " m = re.search(r'\\{.*\\}', action_str, re.DOTALL)\n",
223
+ " if m:\n",
224
+ " try: action = json.loads(m.group())\n",
225
+ " except: pass\n",
226
+ "\n",
227
+ " if action is None:\n",
228
+ " cumulative_reward -= 0.05\n",
229
+ " break\n",
230
+ "\n",
231
+ " result = httpx.post('http://localhost:8000/step', json=action).json()\n",
232
+ " obs = result['observation']\n",
233
+ " reward = result['reward']\n",
234
+ " cumulative_reward += reward\n",
235
+ "\n",
236
+ " # Store step for GRPO\n",
237
+ " trajectory.append({\n",
238
+ " 'messages': msgs + [{'role': 'assistant', 'content': action_str}],\n",
239
+ " 'reward': reward,\n",
240
+ " })\n",
241
+ "\n",
242
+ " if obs['done']:\n",
243
+ " break\n",
244
+ "\n",
245
+ " return trajectory, obs.get('current_score', 0.001)\n",
246
+ "\n",
247
+ "print('Rollout harness ready.')"
248
+ ]
249
+ },
250
+ {
251
+ "cell_type": "markdown",
252
+ "id": "sec5",
253
+ "metadata": {},
254
+ "source": ["## 5. Collect Baseline Rollouts (Pre-Training)"]
255
+ },
256
+ {
257
+ "cell_type": "code",
258
+ "execution_count": null,
259
+ "id": "baseline_rollouts",
260
+ "metadata": {},
261
+ "outputs": [],
262
+ "source": [
263
+ "import numpy as np\n",
264
+ "\n",
265
+ "N_BASELINE = 30 # 30 episodes pre-training (10 per workflow)\n",
266
+ "\n",
267
+ "baseline_scores = {'A': [], 'B': [], 'C': []}\n",
268
+ "all_trajectories = []\n",
269
+ "\n",
270
+ "print('Collecting baseline rollouts...')\n",
271
+ "for wf in ['A', 'B', 'C']:\n",
272
+ " for ep in range(N_BASELINE // 3):\n",
273
+ " traj, score = run_episode(wf)\n",
274
+ " baseline_scores[wf].append(score)\n",
275
+ " all_trajectories.extend(traj)\n",
276
+ " print(f' Workflow {wf} ep {ep+1}: score={score:.4f}', end='\\r')\n",
277
+ " print(f' Workflow {wf}: mean={np.mean(baseline_scores[wf]):.4f} ± {np.std(baseline_scores[wf]):.4f}')\n",
278
+ "\n",
279
+ "print(f'\\nTotal baseline episodes: {N_BASELINE}')\n",
280
+ "print(f'Total trajectory steps: {len(all_trajectories)}')\n",
281
+ "print(f'Overall baseline mean: {np.mean([s for v in baseline_scores.values() for s in v]):.4f}')"
282
+ ]
283
+ },
284
+ {
285
+ "cell_type": "markdown",
286
+ "id": "sec6",
287
+ "metadata": {},
288
+ "source": ["## 6. Build GRPO Dataset from Trajectories"]
289
+ },
290
+ {
291
+ "cell_type": "code",
292
+ "execution_count": null,
293
+ "id": "build_dataset",
294
+ "metadata": {},
295
+ "outputs": [],
296
+ "source": [
297
+ "from datasets import Dataset\n",
298
+ "\n",
299
+ "def trajectories_to_dataset(trajectories: List[dict]) -> Dataset:\n",
300
+ " \"\"\"\n",
301
+ " Convert trajectory steps into a GRPO-compatible dataset.\n",
302
+ " Each row = one (prompt, completion, reward) triple.\n",
303
+ " \"\"\"\n",
304
+ " rows = []\n",
305
+ " for step in trajectories:\n",
306
+ " messages = step['messages']\n",
307
+ " reward = step['reward']\n",
308
+ " # Separate prompt (all but last assistant turn) from completion\n",
309
+ " prompt_msgs = messages[:-1]\n",
310
+ " completion = messages[-1]['content']\n",
311
+ " prompt_text = tokenizer.apply_chat_template(\n",
312
+ " prompt_msgs, tokenize=False, add_generation_prompt=True\n",
313
+ " )\n",
314
+ " rows.append({'prompt': prompt_text, 'completion': completion, 'reward': reward})\n",
315
+ " return Dataset.from_list(rows)\n",
316
+ "\n",
317
+ "train_dataset = trajectories_to_dataset(all_trajectories)\n",
318
+ "print(f'Training dataset: {len(train_dataset)} examples')\n",
319
+ "print(f'Reward range: [{min(train_dataset[\"reward\"]):.4f}, {max(train_dataset[\"reward\"]):.4f}]')\n",
320
+ "print(f'Mean reward: {np.mean(train_dataset[\"reward\"]):.4f}')\n",
321
+ "train_dataset[0]"
322
+ ]
323
+ },
324
+ {
325
+ "cell_type": "markdown",
326
+ "id": "sec7",
327
+ "metadata": {},
328
+ "source": ["## 7. GRPO Training"]
329
+ },
330
+ {
331
+ "cell_type": "code",
332
+ "execution_count": null,
333
+ "id": "grpo_training",
334
+ "metadata": {},
335
+ "outputs": [],
336
+ "source": [
337
+ "from trl import GRPOConfig, GRPOTrainer\n",
338
+ "\n",
339
+ "# Reward function for GRPO: directly use the env's per-step reward\n",
340
+ "def reward_fn(completions: List[str], prompts: List[str], **kwargs) -> List[float]:\n",
341
+ " \"\"\"GRPO reward function — called on each group of completions.\"\"\"\n",
342
+ " # In GRPO the rewards come from rollouts; we pre-compute them above.\n",
343
+ " # This function returns the rewards already stored in the dataset.\n",
344
+ " return kwargs.get('reward', [0.0] * len(completions))\n",
345
+ "\n",
346
+ "grpo_config = GRPOConfig(\n",
347
+ " output_dir = './orgos_grpo_ckpt',\n",
348
+ " num_train_epochs = 3,\n",
349
+ " per_device_train_batch_size = 2,\n",
350
+ " gradient_accumulation_steps = 4,\n",
351
+ " learning_rate = 5e-5,\n",
352
+ " warmup_steps = 10,\n",
353
+ " logging_steps = 5,\n",
354
+ " save_steps = 50,\n",
355
+ " fp16 = not torch.cuda.is_bf16_supported(),\n",
356
+ " bf16 = torch.cuda.is_bf16_supported(),\n",
357
+ " max_grad_norm = 1.0,\n",
358
+ " # GRPO-specific\n",
359
+ " num_generations = 4, # group size G\n",
360
+ " max_new_tokens = 256,\n",
361
+ " temperature = 0.7,\n",
362
+ " beta = 0.04, # KL penalty\n",
363
+ " report_to = 'none',\n",
364
+ " seed = 42,\n",
365
+ ")\n",
366
+ "\n",
367
+ "trainer = GRPOTrainer(\n",
368
+ " model = model,\n",
369
+ " args = grpo_config,\n",
370
+ " reward_funcs = reward_fn,\n",
371
+ " train_dataset = train_dataset,\n",
372
+ " tokenizer = tokenizer,\n",
373
+ ")\n",
374
+ "\n",
375
+ "print('Starting GRPO training...')\n",
376
+ "train_result = trainer.train()\n",
377
+ "print('Training complete!')\n",
378
+ "print(train_result.metrics)"
379
+ ]
380
+ },
381
+ {
382
+ "cell_type": "markdown",
383
+ "id": "sec8",
384
+ "metadata": {},
385
+ "source": ["## 8. Collect Post-Training Rollouts"]
386
+ },
387
+ {
388
+ "cell_type": "code",
389
+ "execution_count": null,
390
+ "id": "posttraining_rollouts",
391
+ "metadata": {},
392
+ "outputs": [],
393
+ "source": [
394
+ "# Switch model to inference mode\n",
395
+ "FastLanguageModel.for_inference(model)\n",
396
+ "\n",
397
+ "N_EVAL = 30\n",
398
+ "post_scores = {'A': [], 'B': [], 'C': []}\n",
399
+ "\n",
400
+ "print('Collecting post-training rollouts...')\n",
401
+ "for wf in ['A', 'B', 'C']:\n",
402
+ " for ep in range(N_EVAL // 3):\n",
403
+ " _, score = run_episode(wf)\n",
404
+ " post_scores[wf].append(score)\n",
405
+ " print(f' Workflow {wf} ep {ep+1}: score={score:.4f}', end='\\r')\n",
406
+ " print(f' Workflow {wf}: mean={np.mean(post_scores[wf]):.4f} ± {np.std(post_scores[wf]):.4f}')\n",
407
+ "\n",
408
+ "print(f'\\nOverall post-training mean: {np.mean([s for v in post_scores.values() for s in v]):.4f}')"
409
+ ]
410
+ },
411
+ {
412
+ "cell_type": "markdown",
413
+ "id": "sec9",
414
+ "metadata": {},
415
+ "source": ["## 9. Plot Before/After Reward Curves"]
416
+ },
417
+ {
418
+ "cell_type": "code",
419
+ "execution_count": null,
420
+ "id": "plot_curves",
421
+ "metadata": {},
422
+ "outputs": [],
423
+ "source": [
424
+ "import matplotlib.pyplot as plt\n",
425
+ "import matplotlib.gridspec as gridspec\n",
426
+ "\n",
427
+ "fig = plt.figure(figsize=(14, 8), facecolor='#0f172a')\n",
428
+ "fig.suptitle('OrgOS: Before vs After GRPO Training', fontsize=15,\n",
429
+ " color='white', fontweight='bold', y=0.98)\n",
430
+ "\n",
431
+ "gs = gridspec.GridSpec(2, 3, figure=fig, hspace=0.45, wspace=0.35)\n",
432
+ "\n",
433
+ "COLORS = {'before': '#f87171', 'after': '#34d399', 'bg': '#1e293b', 'grid': '#334155'}\n",
434
+ "WF_LABELS = {'A': 'Workflow A\\nCustomer Bug Fix',\n",
435
+ " 'B': 'Workflow B\\nEmployee Onboarding',\n",
436
+ " 'C': 'Workflow C\\nChurn Risk Alert'}\n",
437
+ "\n",
438
+ "for col, wf in enumerate(['A', 'B', 'C']):\n",
439
+ " ax = fig.add_subplot(gs[0, col])\n",
440
+ " ax.set_facecolor(COLORS['bg'])\n",
441
+ " ax.grid(color=COLORS['grid'], linewidth=0.5, alpha=0.7)\n",
442
+ "\n",
443
+ " before = baseline_scores[wf]\n",
444
+ " after = post_scores[wf]\n",
445
+ "\n",
446
+ " ax.plot(before, color=COLORS['before'], linewidth=1.5, alpha=0.8, label='Before GRPO')\n",
447
+ " ax.plot(after, color=COLORS['after'], linewidth=1.5, alpha=0.8, label='After GRPO')\n",
448
+ "\n",
449
+ " ax.axhline(np.mean(before), color=COLORS['before'], linestyle='--', linewidth=1, alpha=0.5)\n",
450
+ " ax.axhline(np.mean(after), color=COLORS['after'], linestyle='--', linewidth=1, alpha=0.5)\n",
451
+ "\n",
452
+ " delta = np.mean(after) - np.mean(before)\n",
453
+ " ax.set_title(WF_LABELS[wf] + f'\\n(Δ = {delta:+.4f})', color='white', fontsize=9)\n",
454
+ " ax.set_xlabel('Episode', color='#94a3b8', fontsize=8)\n",
455
+ " ax.set_ylabel('Final Score', color='#94a3b8', fontsize=8)\n",
456
+ " ax.tick_params(colors='#64748b', labelsize=7)\n",
457
+ " ax.set_ylim(0, 1)\n",
458
+ " ax.legend(fontsize=7, facecolor='#1e293b', labelcolor='white',\n",
459
+ " edgecolor='#475569', framealpha=0.8)\n",
460
+ " for spine in ax.spines.values():\n",
461
+ " spine.set_edgecolor('#334155')\n",
462
+ "\n",
463
+ "# Bottom row: combined histogram\n",
464
+ "ax_hist = fig.add_subplot(gs[1, :])\n",
465
+ "ax_hist.set_facecolor(COLORS['bg'])\n",
466
+ "ax_hist.grid(color=COLORS['grid'], linewidth=0.5, alpha=0.5, axis='x')\n",
467
+ "\n",
468
+ "all_before = [s for v in baseline_scores.values() for s in v]\n",
469
+ "all_after = [s for v in post_scores.values() for s in v]\n",
470
+ "\n",
471
+ "bins = np.linspace(0, 1, 25)\n",
472
+ "ax_hist.hist(all_before, bins=bins, color=COLORS['before'], alpha=0.6, label=f'Before GRPO (mean={np.mean(all_before):.4f})', edgecolor='none')\n",
473
+ "ax_hist.hist(all_after, bins=bins, color=COLORS['after'], alpha=0.6, label=f'After GRPO (mean={np.mean(all_after):.4f})', edgecolor='none')\n",
474
+ "ax_hist.axvline(np.mean(all_before), color=COLORS['before'], linestyle='--', linewidth=1.5)\n",
475
+ "ax_hist.axvline(np.mean(all_after), color=COLORS['after'], linestyle='--', linewidth=1.5)\n",
476
+ "\n",
477
+ "ax_hist.set_title('Score Distribution Across All Workflows', color='white', fontsize=10)\n",
478
+ "ax_hist.set_xlabel('Final Score', color='#94a3b8', fontsize=9)\n",
479
+ "ax_hist.set_ylabel('Count', color='#94a3b8', fontsize=9)\n",
480
+ "ax_hist.tick_params(colors='#64748b', labelsize=8)\n",
481
+ "ax_hist.legend(fontsize=9, facecolor='#1e293b', labelcolor='white',\n",
482
+ " edgecolor='#475569', framealpha=0.9)\n",
483
+ "for spine in ax_hist.spines.values():\n",
484
+ " spine.set_edgecolor('#334155')\n",
485
+ "\n",
486
+ "plt.savefig('before_after_curves.png', dpi=150, bbox_inches='tight',\n",
487
+ " facecolor='#0f172a', edgecolor='none')\n",
488
+ "plt.show()\n",
489
+ "print('Saved: before_after_curves.png')"
490
+ ]
491
+ },
492
+ {
493
+ "cell_type": "markdown",
494
+ "id": "sec10",
495
+ "metadata": {},
496
+ "source": ["## 10. Save LoRA Adapter & Upload to HuggingFace"]
497
+ },
498
+ {
499
+ "cell_type": "code",
500
+ "execution_count": null,
501
+ "id": "save_model",
502
+ "metadata": {},
503
+ "outputs": [],
504
+ "source": [
505
+ "# Save LoRA adapter locally\n",
506
+ "model.save_pretrained('orgos_lora_adapter')\n",
507
+ "tokenizer.save_pretrained('orgos_lora_adapter')\n",
508
+ "print('LoRA adapter saved to ./orgos_lora_adapter')\n",
509
+ "\n",
510
+ "# Optionally push to HuggingFace Hub\n",
511
+ "# from huggingface_hub import login\n",
512
+ "# login(token=os.environ['HF_TOKEN'])\n",
513
+ "# model.push_to_hub('YOUR_HF_USERNAME/orgos-qwen25-3b-grpo-lora')\n",
514
+ "# tokenizer.push_to_hub('YOUR_HF_USERNAME/orgos-qwen25-3b-grpo-lora')\n",
515
+ "# print('Pushed to HuggingFace Hub!')"
516
+ ]
517
+ },
518
+ {
519
+ "cell_type": "markdown",
520
+ "id": "sec11",
521
+ "metadata": {},
522
+ "source": [
523
+ "## 11. Summary\n",
524
+ "\n",
525
+ "```\n",
526
+ "OrgOS GRPO Training Summary\n",
527
+ "============================\n",
528
+ "Model: Qwen2.5-3B-Instruct + 4-bit LoRA\n",
529
+ "Algorithm: GRPO (Group Relative Policy Optimization)\n",
530
+ "Epochs: 3\n",
531
+ "Episodes: 30 baseline + 30 post-training\n",
532
+ "\n",
533
+ "Key result: The GRPO-trained model learns to:\n",
534
+ " 1. Read schema_hints before constructing action args\n",
535
+ " 2. Use drifted field names (e.g. 'severity' not 'priority')\n",
536
+ " 3. Complete workflow steps in the correct order\n",
537
+ " 4. Avoid RBAC violations by checking role constraints\n",
538
+ "\n",
539
+ "This produces a clear, measurable improvement visible in\n",
540
+ "before_after_curves.png — the core evidence for judging.\n",
541
+ "```\n",
542
+ "\n",
543
+ "**Artefacts produced:**\n",
544
+ "- `before_after_curves.png` — the money chart for the pitch\n",
545
+ "- `orgos_lora_adapter/` — the trained LoRA weights\n",
546
+ "- `baseline_scores.json` — raw score data"
547
+ ]
548
+ }
549
+ ]
550
+ }
ui/index.html ADDED
@@ -0,0 +1,651 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en" x-data="orgos()" x-init="init()">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>OrgOS — Multi-App Enterprise RL Environment</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
9
+ <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
10
+ <style>
11
+ [x-cloak] { display: none !important; }
12
+
13
+ body { font-family: 'JetBrains Mono', 'Fira Code', monospace; }
14
+
15
+ .app-tab.active { @apply border-b-2; }
16
+ .step-done { color: #22c55e; }
17
+ .step-active { color: #fbbf24; }
18
+ .step-pending{ color: #475569; }
19
+
20
+ /* Scrollbar styling */
21
+ ::-webkit-scrollbar { width: 4px; height: 4px; }
22
+ ::-webkit-scrollbar-track { background: #1e293b; }
23
+ ::-webkit-scrollbar-thumb { background: #334155; border-radius: 2px; }
24
+
25
+ /* Log entry fade-in */
26
+ @keyframes fadeIn { from { opacity: 0; transform: translateY(4px); } to { opacity: 1; } }
27
+ .log-entry { animation: fadeIn 0.2s ease; }
28
+
29
+ /* Score pulse when updating */
30
+ @keyframes scorePulse {
31
+ 0%, 100% { color: #38bdf8; }
32
+ 50% { color: #7dd3fc; }
33
+ }
34
+ .score-updated { animation: scorePulse 0.4s ease; }
35
+ </style>
36
+ </head>
37
+
38
+ <body class="bg-slate-950 text-slate-300 min-h-screen">
39
+
40
+ <!-- ================================================================
41
+ TOP BAR
42
+ ================================================================ -->
43
+ <header class="bg-slate-900 border-b border-slate-800 px-4 py-3 flex items-center gap-4">
44
+ <!-- Logo -->
45
+ <div class="flex items-center gap-2 mr-4">
46
+ <div class="w-7 h-7 rounded bg-sky-500 flex items-center justify-center text-white font-bold text-sm">O</div>
47
+ <span class="text-white font-semibold text-sm tracking-wide">OrgOS</span>
48
+ <span class="text-slate-500 text-xs">Enterprise RL Environment</span>
49
+ </div>
50
+
51
+ <!-- Workflow selector -->
52
+ <div class="flex items-center gap-2">
53
+ <label class="text-xs text-slate-500 uppercase tracking-widest">Workflow</label>
54
+ <select x-model="selectedWorkflow"
55
+ class="bg-slate-800 border border-slate-700 text-slate-200 text-xs rounded px-2 py-1 focus:outline-none focus:border-sky-500">
56
+ <option value="A">A — Customer Bug Fix</option>
57
+ <option value="B">B — Employee Onboarding</option>
58
+ <option value="C">C — Churn Risk Alert</option>
59
+ </select>
60
+ </div>
61
+
62
+ <!-- Run / Stop button -->
63
+ <button @click="isRunning ? stopAgent() : startAgent()"
64
+ :class="isRunning
65
+ ? 'bg-red-600 hover:bg-red-500 text-white'
66
+ : 'bg-sky-600 hover:bg-sky-500 text-white'"
67
+ class="px-3 py-1.5 rounded text-xs font-medium transition-colors flex items-center gap-1.5">
68
+ <svg x-show="!isRunning" xmlns="http://www.w3.org/2000/svg" class="w-3 h-3" fill="currentColor" viewBox="0 0 16 16">
69
+ <path d="M11.596 8.697l-6.363 3.692c-.54.313-1.233-.066-1.233-.697V4.308c0-.63.692-1.01 1.233-.696l6.363 3.692a.802.802 0 0 1 0 1.393z"/>
70
+ </svg>
71
+ <svg x-show="isRunning" xmlns="http://www.w3.org/2000/svg" class="w-3 h-3" fill="currentColor" viewBox="0 0 16 16">
72
+ <path d="M5.5 3.5A1.5 1.5 0 0 1 7 5v6a1.5 1.5 0 0 1-3 0V5a1.5 1.5 0 0 1 1.5-1.5zm5 0A1.5 1.5 0 0 1 12 5v6a1.5 1.5 0 0 1-3 0V5a1.5 1.5 0 0 1 1.5-1.5z"/>
73
+ </svg>
74
+ <span x-text="isRunning ? 'Stop' : 'Run Agent'"></span>
75
+ </button>
76
+
77
+ <!-- Reset button -->
78
+ <button @click="resetEpisode()"
79
+ :disabled="isRunning"
80
+ class="px-3 py-1.5 rounded text-xs font-medium bg-slate-700 hover:bg-slate-600 text-slate-300 transition-colors disabled:opacity-40 disabled:cursor-not-allowed">
81
+ Reset
82
+ </button>
83
+
84
+ <!-- Status indicators -->
85
+ <div class="ml-auto flex items-center gap-4">
86
+ <!-- Score -->
87
+ <div class="text-right">
88
+ <div class="text-xs text-slate-500 uppercase tracking-widest">Score</div>
89
+ <div class="text-sky-400 font-bold text-base tabular-nums"
90
+ :class="scoreUpdated ? 'score-updated' : ''"
91
+ x-text="currentScore.toFixed(4)"></div>
92
+ </div>
93
+ <!-- Steps -->
94
+ <div class="text-right">
95
+ <div class="text-xs text-slate-500 uppercase tracking-widest">Step</div>
96
+ <div class="text-slate-200 font-bold text-base tabular-nums"
97
+ x-text="stepCount + ' / ' + maxSteps"></div>
98
+ </div>
99
+ <!-- Policy drift badge -->
100
+ <div x-show="policyDriftActive"
101
+ class="px-2 py-0.5 rounded-full text-xs bg-amber-900 text-amber-300 border border-amber-700">
102
+ Policy Drift
103
+ </div>
104
+ <!-- Health dot -->
105
+ <div class="flex items-center gap-1.5">
106
+ <div class="w-2 h-2 rounded-full"
107
+ :class="serverHealthy ? 'bg-green-500' : 'bg-red-500'"></div>
108
+ <span class="text-xs text-slate-500" x-text="serverHealthy ? 'Live' : 'Offline'"></span>
109
+ </div>
110
+ </div>
111
+ </header>
112
+
113
+ <!-- ================================================================
114
+ MAIN LAYOUT (3-column)
115
+ ================================================================ -->
116
+ <div class="flex h-[calc(100vh-52px)]">
117
+
118
+ <!-- ============================================================
119
+ LEFT: Workflow Progress + Schema Hints + Rules
120
+ ============================================================ -->
121
+ <aside class="w-72 flex-shrink-0 bg-slate-900 border-r border-slate-800 flex flex-col overflow-hidden">
122
+
123
+ <!-- Workflow goal -->
124
+ <div class="px-4 pt-4 pb-3 border-b border-slate-800">
125
+ <div class="text-xs text-slate-500 uppercase tracking-widest mb-1">Goal</div>
126
+ <p class="text-slate-300 text-xs leading-relaxed" x-text="workflowGoal || 'Reset to start an episode.'"></p>
127
+ </div>
128
+
129
+ <!-- Step tracker -->
130
+ <div class="px-4 pt-3 pb-2 border-b border-slate-800 flex-shrink-0">
131
+ <div class="text-xs text-slate-500 uppercase tracking-widest mb-2">
132
+ Workflow Steps
133
+ <span class="ml-1 text-sky-400 font-bold"
134
+ x-text="'(' + completedSteps.length + '/' + totalSteps + ')'"></span>
135
+ </div>
136
+ <template x-for="(step, i) in allSteps" :key="i">
137
+ <div class="flex items-start gap-2 py-1">
138
+ <!-- Icon -->
139
+ <div class="mt-0.5 w-4 h-4 flex-shrink-0">
140
+ <template x-if="completedSteps.includes(step.id)">
141
+ <svg class="w-4 h-4 text-green-500" fill="currentColor" viewBox="0 0 20 20">
142
+ <path fill-rule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zm3.707-9.293a1 1 0 00-1.414-1.414L9 10.586 7.707 9.293a1 1 0 00-1.414 1.414l2 2a1 1 0 001.414 0l4-4z" clip-rule="evenodd"/>
143
+ </svg>
144
+ </template>
145
+ <template x-if="!completedSteps.includes(step.id)">
146
+ <div class="w-4 h-4 rounded-full border border-slate-600 flex items-center justify-center">
147
+ <span class="text-[9px] text-slate-500" x-text="step.id"></span>
148
+ </div>
149
+ </template>
150
+ </div>
151
+ <!-- Description -->
152
+ <span class="text-xs leading-tight"
153
+ :class="completedSteps.includes(step.id)
154
+ ? 'text-green-400 line-through decoration-green-600'
155
+ : 'text-slate-400'"
156
+ x-text="step.description"></span>
157
+ </div>
158
+ </template>
159
+ </div>
160
+
161
+ <!-- Schema hints -->
162
+ <div class="px-4 pt-3 pb-3 border-b border-slate-800 flex-shrink-0">
163
+ <div class="text-xs text-slate-500 uppercase tracking-widest mb-2">Schema Hints</div>
164
+ <template x-if="Object.keys(schemaHints).length === 0">
165
+ <p class="text-xs text-slate-600">No drift — canonical names in effect.</p>
166
+ </template>
167
+ <template x-for="[field, drifted] in Object.entries(schemaHints)" :key="field">
168
+ <div class="flex items-center gap-1 py-0.5 font-mono text-[11px]">
169
+ <span class="text-red-400 line-through" x-text="field.split('.')[1] ?? field"></span>
170
+ <span class="text-slate-600">→</span>
171
+ <span class="text-green-400" x-text="drifted"></span>
172
+ <span class="text-slate-600 text-[10px]" x-text="'(' + (field.split('.')[0] ?? '') + ')'"></span>
173
+ </div>
174
+ </template>
175
+ </div>
176
+
177
+ <!-- Active rules -->
178
+ <div class="px-4 pt-3 pb-3 flex-shrink-0">
179
+ <div class="text-xs text-slate-500 uppercase tracking-widest mb-2">Active Rules</div>
180
+ <template x-for="[key, val] in Object.entries(activeRules)" :key="key">
181
+ <div class="flex justify-between py-0.5 text-[11px]">
182
+ <span class="text-slate-500" x-text="key.replace(/_/g,' ')"></span>
183
+ <span class="text-slate-300 font-bold tabular-nums" x-text="val"></span>
184
+ </div>
185
+ </template>
186
+ </div>
187
+ </aside>
188
+
189
+ <!-- ============================================================
190
+ CENTER: App State Tabs + Agent Log
191
+ ============================================================ -->
192
+ <main class="flex-1 flex flex-col overflow-hidden min-w-0">
193
+
194
+ <!-- App state tabs -->
195
+ <div class="bg-slate-900 border-b border-slate-800 flex-shrink-0">
196
+ <!-- Tab headers -->
197
+ <div class="flex">
198
+ <template x-for="tab in appTabs" :key="tab.id">
199
+ <button @click="activeAppTab = tab.id"
200
+ :class="activeAppTab === tab.id
201
+ ? 'border-b-2 border-sky-500 text-sky-400 bg-slate-950'
202
+ : 'text-slate-500 hover:text-slate-300'"
203
+ class="px-4 py-2 text-xs font-medium transition-colors flex items-center gap-1.5">
204
+ <span x-text="tab.icon"></span>
205
+ <span x-text="tab.label"></span>
206
+ <!-- Open items badge -->
207
+ <template x-if="appOpenCounts[tab.id] > 0">
208
+ <span class="px-1.5 py-0.5 rounded-full text-[10px] font-bold"
209
+ :class="activeAppTab === tab.id ? 'bg-sky-900 text-sky-300' : 'bg-slate-700 text-slate-400'"
210
+ x-text="appOpenCounts[tab.id]"></span>
211
+ </template>
212
+ </button>
213
+ </template>
214
+ </div>
215
+ <!-- Tab content -->
216
+ <div class="p-3 max-h-48 overflow-y-auto">
217
+ <template x-for="tab in appTabs" :key="tab.id">
218
+ <pre x-show="activeAppTab === tab.id"
219
+ class="text-[11px] font-mono text-slate-300 whitespace-pre-wrap leading-relaxed"
220
+ x-text="appStates[tab.id] || 'No data yet — reset to load.'"></pre>
221
+ </template>
222
+ </div>
223
+ </div>
224
+
225
+ <!-- Agent action log -->
226
+ <div class="flex-1 overflow-hidden flex flex-col">
227
+ <div class="px-4 py-2 border-b border-slate-800 flex items-center justify-between bg-slate-900 flex-shrink-0">
228
+ <span class="text-xs text-slate-500 uppercase tracking-widest">Agent Log</span>
229
+ <button @click="actionLog = []" class="text-xs text-slate-600 hover:text-slate-400">Clear</button>
230
+ </div>
231
+ <div class="flex-1 overflow-y-auto px-4 py-3 space-y-1.5" id="log-scroll">
232
+ <template x-if="actionLog.length === 0">
233
+ <p class="text-slate-600 text-xs italic">Waiting for episode to start…</p>
234
+ </template>
235
+ <template x-for="(entry, i) in actionLog" :key="i">
236
+ <div class="log-entry flex gap-3 items-start text-xs font-mono py-1 border-b border-slate-800/50">
237
+ <!-- Step number -->
238
+ <span class="text-slate-600 w-8 text-right flex-shrink-0" x-text="'#' + entry.step"></span>
239
+ <!-- Color dot -->
240
+ <span class="w-2 h-2 rounded-full flex-shrink-0 mt-0.5"
241
+ :class="{
242
+ 'bg-green-500': entry.type === 'success',
243
+ 'bg-red-500': entry.type === 'error',
244
+ 'bg-amber-500': entry.type === 'warning',
245
+ 'bg-sky-500': entry.type === 'info',
246
+ 'bg-slate-500': entry.type === 'reset',
247
+ }"></span>
248
+ <!-- Content -->
249
+ <div class="flex-1 min-w-0">
250
+ <div class="flex items-center gap-2 flex-wrap">
251
+ <template x-if="entry.app">
252
+ <span class="px-1.5 py-0.5 rounded text-[10px] font-bold uppercase"
253
+ :class="{
254
+ 'bg-violet-900 text-violet-300': entry.app === 'jira',
255
+ 'bg-emerald-900 text-emerald-300': entry.app === 'zendesk',
256
+ 'bg-blue-900 text-blue-300': entry.app === 'salesforce',
257
+ 'bg-orange-900 text-orange-300': entry.app === 'workday',
258
+ }"
259
+ x-text="entry.app"></span>
260
+ </template>
261
+ <template x-if="entry.operation">
262
+ <span class="text-sky-400" x-text="entry.operation"></span>
263
+ </template>
264
+ <template x-if="entry.reward !== undefined">
265
+ <span :class="entry.reward >= 0 ? 'text-green-400' : 'text-red-400'"
266
+ x-text="(entry.reward >= 0 ? '+' : '') + entry.reward.toFixed(4)"></span>
267
+ </template>
268
+ </div>
269
+ <div class="text-slate-400 text-[11px] mt-0.5 leading-snug" x-text="entry.message"></div>
270
+ <template x-if="entry.argsStr">
271
+ <div class="text-slate-600 text-[10px] mt-0.5 truncate" x-text="entry.argsStr"></div>
272
+ </template>
273
+ </div>
274
+ </div>
275
+ </template>
276
+ </div>
277
+ </div>
278
+ </main>
279
+
280
+ <!-- ============================================================
281
+ RIGHT: Metrics Panel
282
+ ============================================================ -->
283
+ <aside class="w-64 flex-shrink-0 bg-slate-900 border-l border-slate-800 flex flex-col overflow-hidden">
284
+
285
+ <!-- Reward curve chart -->
286
+ <div class="p-4 border-b border-slate-800">
287
+ <div class="text-xs text-slate-500 uppercase tracking-widest mb-2">Reward Curve</div>
288
+ <canvas id="rewardChart" class="w-full" style="max-height:120px"></canvas>
289
+ </div>
290
+
291
+ <!-- Score breakdown bars -->
292
+ <div class="p-4 border-b border-slate-800">
293
+ <div class="text-xs text-slate-500 uppercase tracking-widest mb-3">Score Breakdown</div>
294
+ <template x-for="comp in rewardComponents" :key="comp.key">
295
+ <div class="mb-2">
296
+ <div class="flex justify-between text-[11px] mb-0.5">
297
+ <span class="text-slate-500" x-text="comp.label"></span>
298
+ <span class="text-slate-300 tabular-nums" x-text="(comp.value * 100).toFixed(0) + '%'"></span>
299
+ </div>
300
+ <div class="w-full bg-slate-800 rounded-full h-1.5">
301
+ <div class="h-1.5 rounded-full transition-all duration-300"
302
+ :class="comp.color"
303
+ :style="'width: ' + (comp.value * 100) + '%'"></div>
304
+ </div>
305
+ </div>
306
+ </template>
307
+ </div>
308
+
309
+ <!-- Episode stats -->
310
+ <div class="p-4 border-b border-slate-800">
311
+ <div class="text-xs text-slate-500 uppercase tracking-widest mb-2">Episode Stats</div>
312
+ <div class="space-y-1">
313
+ <div class="flex justify-between text-[11px]">
314
+ <span class="text-slate-500">Violations</span>
315
+ <span :class="violationCount > 0 ? 'text-red-400' : 'text-green-400'"
316
+ x-text="violationCount" class="font-bold tabular-nums"></span>
317
+ </div>
318
+ <div class="flex justify-between text-[11px]">
319
+ <span class="text-slate-500">Schema boosts</span>
320
+ <span class="text-green-400 font-bold tabular-nums" x-text="schemaAdaptCount"></span>
321
+ </div>
322
+ <div class="flex justify-between text-[11px]">
323
+ <span class="text-slate-500">Schema errors</span>
324
+ <span :class="schemaErrorCount > 0 ? 'text-red-400' : 'text-slate-600'"
325
+ x-text="schemaErrorCount" class="font-bold tabular-nums"></span>
326
+ </div>
327
+ <div class="flex justify-between text-[11px]">
328
+ <span class="text-slate-500">Workflow ID</span>
329
+ <span class="text-sky-400 font-bold" x-text="workflowId || '—'"></span>
330
+ </div>
331
+ <div class="flex justify-between text-[11px]">
332
+ <span class="text-slate-500">Schema versions</span>
333
+ <template x-for="[app, ver] in Object.entries(schemaVersions)" :key="app">
334
+ <span class="text-slate-300 text-[10px] tabular-nums"
335
+ x-text="app[0].toUpperCase() + ':' + ver"></span>
336
+ </template>
337
+ </div>
338
+ </div>
339
+ </div>
340
+
341
+ <!-- Recent violations -->
342
+ <div class="p-4 flex-1 overflow-y-auto">
343
+ <div class="text-xs text-slate-500 uppercase tracking-widest mb-2">Violations</div>
344
+ <template x-if="violations.length === 0">
345
+ <p class="text-slate-600 text-xs italic">None this episode.</p>
346
+ </template>
347
+ <template x-for="(v, i) in violations.slice(-8)" :key="i">
348
+ <div class="text-[10px] text-red-400 py-0.5 border-b border-slate-800/50 leading-snug"
349
+ x-text="v"></div>
350
+ </template>
351
+ </div>
352
+ </aside>
353
+
354
+ </div><!-- end main layout -->
355
+
356
+ <!-- ================================================================
357
+ ALPINE.JS + CHART.JS LOGIC
358
+ ================================================================ -->
359
+ <script>
360
+ function orgos() {
361
+ return {
362
+ // ---- Config ----
363
+ envUrl: window.location.origin,
364
+
365
+ // ---- Episode state ----
366
+ selectedWorkflow: 'A',
367
+ workflowId: '',
368
+ workflowGoal: '',
369
+ currentScore: 0.001,
370
+ stepCount: 0,
371
+ maxSteps: 15,
372
+ isRunning: false,
373
+ policyDriftActive: false,
374
+ serverHealthy: false,
375
+
376
+ // ---- Step tracking ----
377
+ allSteps: [],
378
+ completedSteps: [],
379
+ totalSteps: 0,
380
+
381
+ // ---- App state ----
382
+ appTabs: [
383
+ { id: 'zendesk', label: 'Zendesk', icon: '🎫' },
384
+ { id: 'jira', label: 'Jira', icon: '🐛' },
385
+ { id: 'salesforce', label: 'Salesforce', icon: '💼' },
386
+ { id: 'workday', label: 'Workday', icon: '👥' },
387
+ ],
388
+ activeAppTab: 'zendesk',
389
+ appStates: { zendesk: '', jira: '', salesforce: '', workday: '' },
390
+ appOpenCounts:{ zendesk: 0, jira: 0, salesforce: 0, workday: 0 },
391
+
392
+ // ---- Schema / Rules ----
393
+ schemaHints: {},
394
+ schemaVersions:{},
395
+ activeRules: {},
396
+
397
+ // ---- Metrics ----
398
+ rewardHistory: [],
399
+ rewardComponents: [
400
+ { key: 'workflow_completion', label: 'Workflow', value: 0, color: 'bg-sky-500' },
401
+ { key: 'rule_compliance', label: 'Compliance',value: 0, color: 'bg-green-500' },
402
+ { key: 'schema_adaptation', label: 'Schema', value: 0, color: 'bg-violet-500' },
403
+ { key: 'efficiency', label: 'Efficiency',value: 0, color: 'bg-amber-500' },
404
+ { key: 'policy_drift_handling', label: 'Policy', value: 0, color: 'bg-pink-500' },
405
+ ],
406
+ violationCount: 0,
407
+ schemaAdaptCount: 0,
408
+ schemaErrorCount: 0,
409
+ violations: [],
410
+
411
+ // ---- Log ----
412
+ actionLog: [],
413
+
414
+ // ---- SSE handle ----
415
+ _sse: null,
416
+ _chart: null,
417
+ scoreUpdated: false,
418
+
419
+ // ----------------------------------------------------------------
420
+ // Init
421
+ // ----------------------------------------------------------------
422
+ async init() {
423
+ await this.checkHealth();
424
+ this._chart = this._initChart();
425
+ // Poll health every 10s
426
+ setInterval(() => this.checkHealth(), 10_000);
427
+ },
428
+
429
+ async checkHealth() {
430
+ try {
431
+ const r = await fetch(this.envUrl + '/health');
432
+ this.serverHealthy = r.ok;
433
+ } catch { this.serverHealthy = false; }
434
+ },
435
+
436
+ // ----------------------------------------------------------------
437
+ // Reset
438
+ // ----------------------------------------------------------------
439
+ async resetEpisode() {
440
+ this.stopAgent();
441
+ try {
442
+ const r = await fetch(this.envUrl + '/reset', {
443
+ method: 'POST',
444
+ headers: { 'Content-Type': 'application/json' },
445
+ body: JSON.stringify({ workflow_id: this.selectedWorkflow }),
446
+ });
447
+ const data = await r.json();
448
+ this._applyObservation(data.observation, null, 0);
449
+ this.actionLog = [];
450
+ this.rewardHistory = [];
451
+ this.violationCount = 0;
452
+ this.schemaAdaptCount = 0;
453
+ this.schemaErrorCount = 0;
454
+ this.violations = [];
455
+ this._updateChart();
456
+ this._pushLog({ type: 'reset', step: 0, message: 'Episode reset. Ready to run agent.' });
457
+ // Fetch schema versions from /state
458
+ const st = await fetch(this.envUrl + '/state').then(r => r.json());
459
+ this.schemaVersions = st.schema_versions || {};
460
+ this.policyDriftActive = st.policy_drift_active || false;
461
+ } catch (e) {
462
+ this._pushLog({ type: 'error', step: 0, message: 'Reset failed: ' + e });
463
+ }
464
+ },
465
+
466
+ // ----------------------------------------------------------------
467
+ // Run agent via SSE
468
+ // ----------------------------------------------------------------
469
+ startAgent() {
470
+ if (this.isRunning) return;
471
+ this.isRunning = true;
472
+ const url = `${this.envUrl}/ui/run-agent?workflow_id=${this.selectedWorkflow}`;
473
+ this._sse = new EventSource(url);
474
+ this._sse.onmessage = (e) => {
475
+ try {
476
+ const evt = JSON.parse(e.data);
477
+ this._handleSSEEvent(evt);
478
+ } catch {}
479
+ };
480
+ this._sse.onerror = () => {
481
+ this.isRunning = false;
482
+ this._sse && this._sse.close();
483
+ this._pushLog({ type: 'error', step: this.stepCount, message: 'SSE connection error.' });
484
+ };
485
+ },
486
+
487
+ stopAgent() {
488
+ this.isRunning = false;
489
+ if (this._sse) { this._sse.close(); this._sse = null; }
490
+ },
491
+
492
+ _handleSSEEvent(evt) {
493
+ if (evt.type === 'reset') {
494
+ this.actionLog = [];
495
+ this.rewardHistory = [];
496
+ this.violationCount = 0;
497
+ this.violations = [];
498
+ this.schemaAdaptCount = 0;
499
+ this.schemaErrorCount = 0;
500
+ this._applyObservation(evt.observation, null, 0);
501
+ this._pushLog({ type: 'reset', step: 0, message: `Episode started — Workflow ${evt.workflow_id}` });
502
+ } else if (evt.type === 'step') {
503
+ const obs = evt.observation;
504
+ this._applyObservation(obs, evt.action, evt.reward);
505
+ // Detect schema adapt / error from message
506
+ if (obs.message && obs.message.includes('Stale schema')) this.schemaErrorCount++;
507
+ if (obs.reward > 0.05 && evt.action) this.schemaAdaptCount += (evt.action._adapted ? 1 : 0);
508
+ this.rewardHistory.push(evt.reward);
509
+ this._updateChart();
510
+ // Violations
511
+ if (obs.rule_violations && obs.rule_violations.length > 0) {
512
+ this.violations.push(...obs.rule_violations);
513
+ this.violationCount += obs.rule_violations.length;
514
+ }
515
+ this._pushLog({
516
+ type: evt.reward < 0 ? 'error' : (evt.reward > 0.05 ? 'success' : 'info'),
517
+ step: evt.step,
518
+ app: evt.action?.app,
519
+ operation: evt.action?.operation,
520
+ reward: evt.reward,
521
+ message: obs.message,
522
+ argsStr: evt.action?.args ? JSON.stringify(evt.action.args, null, 0).slice(0, 80) : '',
523
+ });
524
+ if (evt.done) { this.isRunning = false; }
525
+ } else if (evt.type === 'done') {
526
+ this.isRunning = false;
527
+ this._pushLog({
528
+ type: 'info', step: evt.steps,
529
+ message: `Episode done. Final score: ${(evt.final_score||0).toFixed(4)} | Workflow complete: ${evt.completed}`,
530
+ });
531
+ } else if (evt.type === 'error') {
532
+ this._pushLog({ type: 'error', step: evt.step || this.stepCount, message: evt.message });
533
+ }
534
+ },
535
+
536
+ // ----------------------------------------------------------------
537
+ // Apply observation to UI state
538
+ // ----------------------------------------------------------------
539
+ _applyObservation(obs, action, reward) {
540
+ this.workflowId = obs.workflow_id || '';
541
+ this.workflowGoal = obs.workflow_goal || '';
542
+ this.schemaHints = obs.schema_hints || {};
543
+ this.activeRules = obs.active_rules || {};
544
+ this.stepCount = obs.step_count || 0;
545
+ this.appStates = obs.app_states || this.appStates;
546
+ this.completedSteps= (obs.completed_steps || []).map(id => id);
547
+ this.policyDriftActive = obs.policy_drift_active || false;
548
+
549
+ // Score update with flash animation
550
+ const newScore = obs.current_score || 0.001;
551
+ if (newScore !== this.currentScore) {
552
+ this.currentScore = newScore;
553
+ this.scoreUpdated = true;
554
+ setTimeout(() => { this.scoreUpdated = false; }, 500);
555
+ }
556
+
557
+ // Workflow steps
558
+ const wfStepDefs = {
559
+ A: [
560
+ { id: 'A1', description: 'Acknowledge ZD-001 in Zendesk' },
561
+ { id: 'A2', description: 'Create linked Jira issue' },
562
+ { id: 'A3', description: 'Verify ACME-001 in Salesforce' },
563
+ { id: 'A4', description: 'Assign Jira issue to engineer' },
564
+ { id: 'A5', description: 'Log SLA event in Workday' },
565
+ ],
566
+ B: [
567
+ { id: 'B1', description: 'Create Workday onboarding record' },
568
+ { id: 'B2', description: 'Provision Jira access' },
569
+ { id: 'B3', description: 'Assign to Salesforce territory team' },
570
+ { id: 'B4', description: 'Create Zendesk agent profile' },
571
+ ],
572
+ C: [
573
+ { id: 'C1', description: 'Flag ACME-003 as churn risk' },
574
+ { id: 'C2', description: 'Query Zendesk support volume' },
575
+ { id: 'C3', description: 'Check Jira open bugs' },
576
+ { id: 'C4', description: 'Assign intervention owner' },
577
+ ],
578
+ };
579
+ const wfId = obs.workflow_id || this.selectedWorkflow;
580
+ this.allSteps = wfStepDefs[wfId] || [];
581
+ this.totalSteps= this.allSteps.length;
582
+ this.maxSteps = { A: 15, B: 20, C: 18 }[wfId] || 15;
583
+
584
+ // Reward breakdown
585
+ const rb = obs.reward_breakdown || {};
586
+ this.rewardComponents.forEach(c => {
587
+ c.value = rb[c.key] ?? 0;
588
+ });
589
+ },
590
+
591
+ // ----------------------------------------------------------------
592
+ // Log
593
+ // ----------------------------------------------------------------
594
+ _pushLog(entry) {
595
+ this.actionLog.push(entry);
596
+ // Auto-scroll to bottom
597
+ this.$nextTick(() => {
598
+ const el = document.getElementById('log-scroll');
599
+ if (el) el.scrollTop = el.scrollHeight;
600
+ });
601
+ },
602
+
603
+ // ----------------------------------------------------------------
604
+ // Chart
605
+ // ----------------------------------------------------------------
606
+ _initChart() {
607
+ const ctx = document.getElementById('rewardChart').getContext('2d');
608
+ return new Chart(ctx, {
609
+ type: 'line',
610
+ data: {
611
+ labels: [],
612
+ datasets: [{
613
+ data: [],
614
+ borderColor: '#38bdf8',
615
+ backgroundColor: 'rgba(56,189,248,0.08)',
616
+ borderWidth: 1.5,
617
+ pointRadius: 0,
618
+ tension: 0.3,
619
+ fill: true,
620
+ }],
621
+ },
622
+ options: {
623
+ animation: false,
624
+ responsive: true,
625
+ maintainAspectRatio: false,
626
+ plugins: { legend: { display: false }, tooltip: { enabled: false } },
627
+ scales: {
628
+ x: { display: false },
629
+ y: {
630
+ display: true,
631
+ grid: { color: 'rgba(255,255,255,0.04)' },
632
+ ticks: { color: '#475569', font: { size: 9 }, maxTicksLimit: 4 },
633
+ },
634
+ },
635
+ },
636
+ });
637
+ },
638
+
639
+ _updateChart() {
640
+ if (!this._chart) return;
641
+ const labels = this.rewardHistory.map((_, i) => i + 1);
642
+ this._chart.data.labels = labels;
643
+ this._chart.data.datasets[0].data = this.rewardHistory;
644
+ this._chart.update('none');
645
+ },
646
+ };
647
+ }
648
+ </script>
649
+
650
+ </body>
651
+ </html>