muskan singh commited on
Commit
dd5113f
·
1 Parent(s): 1519439

bug fixes, better generalization

Browse files
baseline_scores.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "scores": {
3
- "workflow_A": 0.7,
4
  "workflow_B": 0.563,
5
- "workflow_C": 0.625
6
  },
7
- "average": 0.6293
8
  }
 
1
  {
2
  "scores": {
3
+ "workflow_A": 0.137,
4
  "workflow_B": 0.563,
5
+ "workflow_C": 0.522
6
  },
7
+ "average": 0.4073
8
  }
server/app.py CHANGED
@@ -173,13 +173,19 @@ async def step(body: Dict[str, Any] = Body(...)):
173
  {"action": {"app": "...", "operation": "...", "args": {...}}, "timeout_s": 15}
174
  and direct format:
175
  {"app": "...", "operation": "...", "args": {...}}
 
176
  """
177
  action_data = body.get("action", body)
178
  try:
179
  action = OrgOSAction(**action_data)
180
  obs = env.step(action)
181
- except (TypeError, KeyError, Exception) as exc:
182
- raise HTTPException(status_code=400, detail=str(exc))
 
 
 
 
 
183
  return StepResponse(observation=obs, reward=obs.reward, done=obs.done)
184
 
185
 
 
173
  {"action": {"app": "...", "operation": "...", "args": {...}}, "timeout_s": 15}
174
  and direct format:
175
  {"app": "...", "operation": "...", "args": {...}}
176
+ Invalid or empty actions return a -0.05 penalty observation rather than HTTP 400.
177
  """
178
  action_data = body.get("action", body)
179
  try:
180
  action = OrgOSAction(**action_data)
181
  obs = env.step(action)
182
+ except Exception as exc:
183
+ # Return a graceful error observation so the inference loop can continue
184
+ obs = env._build_obs(
185
+ reward=-0.05,
186
+ done=False,
187
+ message=f"Invalid action format: {exc}. Use {{\"app\": \"...\", \"operation\": \"...\", \"args\": {{...}}}}",
188
+ )
189
  return StepResponse(observation=obs, reward=obs.reward, done=obs.done)
190
 
191
 
server/apps/workday.py CHANGED
@@ -92,11 +92,13 @@ class WorkdayApp(BaseApp):
92
 
93
  def _op_list_employees(self, department: Optional[str] = None,
94
  status: Optional[str] = None,
 
95
  limit: int = 10) -> Dict:
96
  matching = [
97
  r for r in self._records.values()
98
  if (department is None or r.get("department") == department)
99
  and (status is None or r.get("status") == status)
 
100
  ][:limit]
101
  drifted = [self._to_agent_view(r) for r in matching]
102
  keep = ["employee_id", "name",
@@ -106,9 +108,10 @@ class WorkdayApp(BaseApp):
106
  "department", "territory"]
107
  compact = [{k: v for k, v in r.items() if k in keep and v is not None}
108
  for r in drifted]
 
109
  return {"success": True, "data": compact,
110
  "message": f"Found {len(compact)} employees"
111
- + (f" in {department}" if department else "")}
112
 
113
  def _op_provision_access(self, employee_id: str, app_name: str,
114
  **kwargs) -> Dict:
 
92
 
93
  def _op_list_employees(self, department: Optional[str] = None,
94
  status: Optional[str] = None,
95
+ territory: Optional[str] = None,
96
  limit: int = 10) -> Dict:
97
  matching = [
98
  r for r in self._records.values()
99
  if (department is None or r.get("department") == department)
100
  and (status is None or r.get("status") == status)
101
+ and (territory is None or r.get("territory") == territory)
102
  ][:limit]
103
  drifted = [self._to_agent_view(r) for r in matching]
104
  keep = ["employee_id", "name",
 
108
  "department", "territory"]
109
  compact = [{k: v for k, v in r.items() if k in keep and v is not None}
110
  for r in drifted]
111
+ filters = [f for f in [department, territory, status] if f]
112
  return {"success": True, "data": compact,
113
  "message": f"Found {len(compact)} employees"
114
+ + (f" ({', '.join(filters)})" if filters else "")}
115
 
116
  def _op_provision_access(self, employee_id: str, app_name: str,
117
  **kwargs) -> Dict:
server/environment.py CHANGED
@@ -87,11 +87,14 @@ class OrgOSEnvironment:
87
  old_score = self._last_score
88
  extra_penalty = 0.0
89
 
 
 
 
90
  # 1. Validate app exists
91
  if action.app not in self._apps:
92
  return self._build_obs(
93
  reward=-0.05,
94
- done=False,
95
  message=f"Unknown app '{action.app}'. Valid apps: {list(self._apps)}",
96
  )
97
 
@@ -104,7 +107,7 @@ class OrgOSEnvironment:
104
  extra_penalty = rule_penalty
105
  return self._build_obs(
106
  reward=extra_penalty,
107
- done=False,
108
  message=f"Rule violation: {reason}",
109
  )
110
 
@@ -116,7 +119,7 @@ class OrgOSEnvironment:
116
  self._efficiency -= 0.02
117
  return self._build_obs(
118
  reward=-0.20,
119
- done=False,
120
  message=(
121
  f"Stale schema: field '{result['schema_error']}' is no longer valid. "
122
  "Check schema_hints for the current field name. "
@@ -128,7 +131,7 @@ class OrgOSEnvironment:
128
  self._efficiency -= 0.02 # penalize failed/no-op actions
129
  return self._build_obs(
130
  reward=-0.01,
131
- done=False,
132
  message=result.get("message", "Operation failed"),
133
  )
134
 
 
87
  old_score = self._last_score
88
  extra_penalty = 0.0
89
 
90
+ # Check max-steps first — applies regardless of action outcome
91
+ at_max = self._step_count >= self.MAX_STEPS[self._workflow_id]
92
+
93
  # 1. Validate app exists
94
  if action.app not in self._apps:
95
  return self._build_obs(
96
  reward=-0.05,
97
+ done=at_max,
98
  message=f"Unknown app '{action.app}'. Valid apps: {list(self._apps)}",
99
  )
100
 
 
107
  extra_penalty = rule_penalty
108
  return self._build_obs(
109
  reward=extra_penalty,
110
+ done=at_max,
111
  message=f"Rule violation: {reason}",
112
  )
113
 
 
119
  self._efficiency -= 0.02
120
  return self._build_obs(
121
  reward=-0.20,
122
+ done=at_max,
123
  message=(
124
  f"Stale schema: field '{result['schema_error']}' is no longer valid. "
125
  "Check schema_hints for the current field name. "
 
131
  self._efficiency -= 0.02 # penalize failed/no-op actions
132
  return self._build_obs(
133
  reward=-0.01,
134
+ done=at_max,
135
  message=result.get("message", "Operation failed"),
136
  )
137
 
server/workflow_engine.py CHANGED
@@ -20,27 +20,27 @@ class WorkflowStep:
20
  # ---------------------------------------------------------------------------
21
  WORKFLOW_A_STEPS = [
22
  WorkflowStep(
23
- "A1", "Acknowledge the incoming Zendesk ticket (ZD-001)",
24
  "zendesk", "acknowledge_ticket",
25
  lambda apps: apps["zendesk"].ticket_acknowledged(),
26
  ),
27
  WorkflowStep(
28
- "A2", "Escalate to Jira — create a new issue linked to ZD-001",
29
  "jira", "create_issue",
30
  lambda apps: apps["jira"].has_linked_issue(),
31
  ),
32
  WorkflowStep(
33
- "A3", "Verify the customer's account status in Salesforce (ACME-001)",
34
  "salesforce", "get_account",
35
  lambda apps: apps["salesforce"].account_checked(),
36
  ),
37
  WorkflowStep(
38
- "A4", "Assign the Jira issue to an engineer (JIRA-001)",
39
  "jira", "assign_owner",
40
  lambda apps: apps["jira"].issue_assigned(),
41
  ),
42
  WorkflowStep(
43
- "A5", "Log the SLA compliance event in Workday",
44
  "workday", "log_sla_event",
45
  lambda apps: apps["workday"].sla_logged(),
46
  ),
@@ -52,7 +52,7 @@ WORKFLOW_A_STEPS = [
52
  # ---------------------------------------------------------------------------
53
  WORKFLOW_B_STEPS = [
54
  WorkflowStep(
55
- "B1", "Create the new employee's onboarding record in Workday (EMP-NEW-001)",
56
  "workday", "create_onboarding_task",
57
  lambda apps: apps["workday"].employee_created(),
58
  ),
@@ -62,7 +62,7 @@ WORKFLOW_B_STEPS = [
62
  lambda apps: apps["workday"].access_provisioned("jira"),
63
  ),
64
  WorkflowStep(
65
- "B3", "Assign the new employee to the correct Salesforce territory team",
66
  "salesforce", "assign_account_owner",
67
  lambda apps: apps["salesforce"].team_assigned(),
68
  ),
@@ -79,22 +79,22 @@ WORKFLOW_B_STEPS = [
79
  # ---------------------------------------------------------------------------
80
  WORKFLOW_C_STEPS = [
81
  WorkflowStep(
82
- "C1", "Flag at-risk account ACME-003 as churn risk in Salesforce",
83
  "salesforce", "flag_churn_risk",
84
  lambda apps: apps["salesforce"].churn_flagged(),
85
  ),
86
  WorkflowStep(
87
- "C2", "Query recent support ticket volume for ACME-003 in Zendesk",
88
  "zendesk", "get_ticket",
89
  lambda apps: apps["zendesk"].support_queried("ACME-003"),
90
  ),
91
  WorkflowStep(
92
- "C3", "Check outstanding Jira bugs linked to ACME-003",
93
  "jira", "list_issues",
94
  lambda apps: apps["jira"].bugs_checked(),
95
  ),
96
  WorkflowStep(
97
- "C4", "Assign an intervention owner to ACME-003 in Salesforce",
98
  "salesforce", "assign_account_owner",
99
  lambda apps: apps["salesforce"].intervention_assigned(),
100
  ),
 
20
  # ---------------------------------------------------------------------------
21
  WORKFLOW_A_STEPS = [
22
  WorkflowStep(
23
+ "A1", "Find and acknowledge the new P1 support ticket in Zendesk",
24
  "zendesk", "acknowledge_ticket",
25
  lambda apps: apps["zendesk"].ticket_acknowledged(),
26
  ),
27
  WorkflowStep(
28
+ "A2", "Create a new Jira issue linked to that Zendesk ticket",
29
  "jira", "create_issue",
30
  lambda apps: apps["jira"].has_linked_issue(),
31
  ),
32
  WorkflowStep(
33
+ "A3", "Verify the customer's account status in Salesforce",
34
  "salesforce", "get_account",
35
  lambda apps: apps["salesforce"].account_checked(),
36
  ),
37
  WorkflowStep(
38
+ "A4", "Assign the pre-existing Jira bug for this customer to an engineer",
39
  "jira", "assign_owner",
40
  lambda apps: apps["jira"].issue_assigned(),
41
  ),
42
  WorkflowStep(
43
+ "A5", "Log the SLA compliance event in Workday using the ticket ID",
44
  "workday", "log_sla_event",
45
  lambda apps: apps["workday"].sla_logged(),
46
  ),
 
52
  # ---------------------------------------------------------------------------
53
  WORKFLOW_B_STEPS = [
54
  WorkflowStep(
55
+ "B1", "Find the pending new hire in Workday and create their onboarding record",
56
  "workday", "create_onboarding_task",
57
  lambda apps: apps["workday"].employee_created(),
58
  ),
 
62
  lambda apps: apps["workday"].access_provisioned("jira"),
63
  ),
64
  WorkflowStep(
65
+ "B3", "Assign the new employee to a Salesforce territory account (west region)",
66
  "salesforce", "assign_account_owner",
67
  lambda apps: apps["salesforce"].team_assigned(),
68
  ),
 
79
  # ---------------------------------------------------------------------------
80
  WORKFLOW_C_STEPS = [
81
  WorkflowStep(
82
+ "C1", "Identify and flag the at-risk account as churn risk in Salesforce",
83
  "salesforce", "flag_churn_risk",
84
  lambda apps: apps["salesforce"].churn_flagged(),
85
  ),
86
  WorkflowStep(
87
+ "C2", "Query recent support tickets for the at-risk account in Zendesk",
88
  "zendesk", "get_ticket",
89
  lambda apps: apps["zendesk"].support_queried("ACME-003"),
90
  ),
91
  WorkflowStep(
92
+ "C3", "List open Jira bugs linked to the at-risk account",
93
  "jira", "list_issues",
94
  lambda apps: apps["jira"].bugs_checked(),
95
  ),
96
  WorkflowStep(
97
+ "C4", "Assign an intervention owner to the at-risk account in Salesforce",
98
  "salesforce", "assign_account_owner",
99
  lambda apps: apps["salesforce"].intervention_assigned(),
100
  ),