Spaces:
Running
Running
muskan singh commited on
Commit ·
dd5113f
1
Parent(s): 1519439
bug fixes, better generalization
Browse files- baseline_scores.json +3 -3
- server/app.py +8 -2
- server/apps/workday.py +4 -1
- server/environment.py +7 -4
- server/workflow_engine.py +11 -11
baseline_scores.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"scores": {
|
| 3 |
-
"workflow_A": 0.
|
| 4 |
"workflow_B": 0.563,
|
| 5 |
-
"workflow_C": 0.
|
| 6 |
},
|
| 7 |
-
"average": 0.
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"scores": {
|
| 3 |
+
"workflow_A": 0.137,
|
| 4 |
"workflow_B": 0.563,
|
| 5 |
+
"workflow_C": 0.522
|
| 6 |
},
|
| 7 |
+
"average": 0.4073
|
| 8 |
}
|
server/app.py
CHANGED
|
@@ -173,13 +173,19 @@ async def step(body: Dict[str, Any] = Body(...)):
|
|
| 173 |
{"action": {"app": "...", "operation": "...", "args": {...}}, "timeout_s": 15}
|
| 174 |
and direct format:
|
| 175 |
{"app": "...", "operation": "...", "args": {...}}
|
|
|
|
| 176 |
"""
|
| 177 |
action_data = body.get("action", body)
|
| 178 |
try:
|
| 179 |
action = OrgOSAction(**action_data)
|
| 180 |
obs = env.step(action)
|
| 181 |
-
except
|
| 182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
return StepResponse(observation=obs, reward=obs.reward, done=obs.done)
|
| 184 |
|
| 185 |
|
|
|
|
| 173 |
{"action": {"app": "...", "operation": "...", "args": {...}}, "timeout_s": 15}
|
| 174 |
and direct format:
|
| 175 |
{"app": "...", "operation": "...", "args": {...}}
|
| 176 |
+
Invalid or empty actions return a -0.05 penalty observation rather than HTTP 400.
|
| 177 |
"""
|
| 178 |
action_data = body.get("action", body)
|
| 179 |
try:
|
| 180 |
action = OrgOSAction(**action_data)
|
| 181 |
obs = env.step(action)
|
| 182 |
+
except Exception as exc:
|
| 183 |
+
# Return a graceful error observation so the inference loop can continue
|
| 184 |
+
obs = env._build_obs(
|
| 185 |
+
reward=-0.05,
|
| 186 |
+
done=False,
|
| 187 |
+
message=f"Invalid action format: {exc}. Use {{\"app\": \"...\", \"operation\": \"...\", \"args\": {{...}}}}",
|
| 188 |
+
)
|
| 189 |
return StepResponse(observation=obs, reward=obs.reward, done=obs.done)
|
| 190 |
|
| 191 |
|
server/apps/workday.py
CHANGED
|
@@ -92,11 +92,13 @@ class WorkdayApp(BaseApp):
|
|
| 92 |
|
| 93 |
def _op_list_employees(self, department: Optional[str] = None,
|
| 94 |
status: Optional[str] = None,
|
|
|
|
| 95 |
limit: int = 10) -> Dict:
|
| 96 |
matching = [
|
| 97 |
r for r in self._records.values()
|
| 98 |
if (department is None or r.get("department") == department)
|
| 99 |
and (status is None or r.get("status") == status)
|
|
|
|
| 100 |
][:limit]
|
| 101 |
drifted = [self._to_agent_view(r) for r in matching]
|
| 102 |
keep = ["employee_id", "name",
|
|
@@ -106,9 +108,10 @@ class WorkdayApp(BaseApp):
|
|
| 106 |
"department", "territory"]
|
| 107 |
compact = [{k: v for k, v in r.items() if k in keep and v is not None}
|
| 108 |
for r in drifted]
|
|
|
|
| 109 |
return {"success": True, "data": compact,
|
| 110 |
"message": f"Found {len(compact)} employees"
|
| 111 |
-
+ (f"
|
| 112 |
|
| 113 |
def _op_provision_access(self, employee_id: str, app_name: str,
|
| 114 |
**kwargs) -> Dict:
|
|
|
|
| 92 |
|
| 93 |
def _op_list_employees(self, department: Optional[str] = None,
|
| 94 |
status: Optional[str] = None,
|
| 95 |
+
territory: Optional[str] = None,
|
| 96 |
limit: int = 10) -> Dict:
|
| 97 |
matching = [
|
| 98 |
r for r in self._records.values()
|
| 99 |
if (department is None or r.get("department") == department)
|
| 100 |
and (status is None or r.get("status") == status)
|
| 101 |
+
and (territory is None or r.get("territory") == territory)
|
| 102 |
][:limit]
|
| 103 |
drifted = [self._to_agent_view(r) for r in matching]
|
| 104 |
keep = ["employee_id", "name",
|
|
|
|
| 108 |
"department", "territory"]
|
| 109 |
compact = [{k: v for k, v in r.items() if k in keep and v is not None}
|
| 110 |
for r in drifted]
|
| 111 |
+
filters = [f for f in [department, territory, status] if f]
|
| 112 |
return {"success": True, "data": compact,
|
| 113 |
"message": f"Found {len(compact)} employees"
|
| 114 |
+
+ (f" ({', '.join(filters)})" if filters else "")}
|
| 115 |
|
| 116 |
def _op_provision_access(self, employee_id: str, app_name: str,
|
| 117 |
**kwargs) -> Dict:
|
server/environment.py
CHANGED
|
@@ -87,11 +87,14 @@ class OrgOSEnvironment:
|
|
| 87 |
old_score = self._last_score
|
| 88 |
extra_penalty = 0.0
|
| 89 |
|
|
|
|
|
|
|
|
|
|
| 90 |
# 1. Validate app exists
|
| 91 |
if action.app not in self._apps:
|
| 92 |
return self._build_obs(
|
| 93 |
reward=-0.05,
|
| 94 |
-
done=
|
| 95 |
message=f"Unknown app '{action.app}'. Valid apps: {list(self._apps)}",
|
| 96 |
)
|
| 97 |
|
|
@@ -104,7 +107,7 @@ class OrgOSEnvironment:
|
|
| 104 |
extra_penalty = rule_penalty
|
| 105 |
return self._build_obs(
|
| 106 |
reward=extra_penalty,
|
| 107 |
-
done=
|
| 108 |
message=f"Rule violation: {reason}",
|
| 109 |
)
|
| 110 |
|
|
@@ -116,7 +119,7 @@ class OrgOSEnvironment:
|
|
| 116 |
self._efficiency -= 0.02
|
| 117 |
return self._build_obs(
|
| 118 |
reward=-0.20,
|
| 119 |
-
done=
|
| 120 |
message=(
|
| 121 |
f"Stale schema: field '{result['schema_error']}' is no longer valid. "
|
| 122 |
"Check schema_hints for the current field name. "
|
|
@@ -128,7 +131,7 @@ class OrgOSEnvironment:
|
|
| 128 |
self._efficiency -= 0.02 # penalize failed/no-op actions
|
| 129 |
return self._build_obs(
|
| 130 |
reward=-0.01,
|
| 131 |
-
done=
|
| 132 |
message=result.get("message", "Operation failed"),
|
| 133 |
)
|
| 134 |
|
|
|
|
| 87 |
old_score = self._last_score
|
| 88 |
extra_penalty = 0.0
|
| 89 |
|
| 90 |
+
# Check max-steps first — applies regardless of action outcome
|
| 91 |
+
at_max = self._step_count >= self.MAX_STEPS[self._workflow_id]
|
| 92 |
+
|
| 93 |
# 1. Validate app exists
|
| 94 |
if action.app not in self._apps:
|
| 95 |
return self._build_obs(
|
| 96 |
reward=-0.05,
|
| 97 |
+
done=at_max,
|
| 98 |
message=f"Unknown app '{action.app}'. Valid apps: {list(self._apps)}",
|
| 99 |
)
|
| 100 |
|
|
|
|
| 107 |
extra_penalty = rule_penalty
|
| 108 |
return self._build_obs(
|
| 109 |
reward=extra_penalty,
|
| 110 |
+
done=at_max,
|
| 111 |
message=f"Rule violation: {reason}",
|
| 112 |
)
|
| 113 |
|
|
|
|
| 119 |
self._efficiency -= 0.02
|
| 120 |
return self._build_obs(
|
| 121 |
reward=-0.20,
|
| 122 |
+
done=at_max,
|
| 123 |
message=(
|
| 124 |
f"Stale schema: field '{result['schema_error']}' is no longer valid. "
|
| 125 |
"Check schema_hints for the current field name. "
|
|
|
|
| 131 |
self._efficiency -= 0.02 # penalize failed/no-op actions
|
| 132 |
return self._build_obs(
|
| 133 |
reward=-0.01,
|
| 134 |
+
done=at_max,
|
| 135 |
message=result.get("message", "Operation failed"),
|
| 136 |
)
|
| 137 |
|
server/workflow_engine.py
CHANGED
|
@@ -20,27 +20,27 @@ class WorkflowStep:
|
|
| 20 |
# ---------------------------------------------------------------------------
|
| 21 |
WORKFLOW_A_STEPS = [
|
| 22 |
WorkflowStep(
|
| 23 |
-
"A1", "
|
| 24 |
"zendesk", "acknowledge_ticket",
|
| 25 |
lambda apps: apps["zendesk"].ticket_acknowledged(),
|
| 26 |
),
|
| 27 |
WorkflowStep(
|
| 28 |
-
"A2", "
|
| 29 |
"jira", "create_issue",
|
| 30 |
lambda apps: apps["jira"].has_linked_issue(),
|
| 31 |
),
|
| 32 |
WorkflowStep(
|
| 33 |
-
"A3", "Verify the customer's account status in Salesforce
|
| 34 |
"salesforce", "get_account",
|
| 35 |
lambda apps: apps["salesforce"].account_checked(),
|
| 36 |
),
|
| 37 |
WorkflowStep(
|
| 38 |
-
"A4", "Assign the Jira
|
| 39 |
"jira", "assign_owner",
|
| 40 |
lambda apps: apps["jira"].issue_assigned(),
|
| 41 |
),
|
| 42 |
WorkflowStep(
|
| 43 |
-
"A5", "Log the SLA compliance event in Workday",
|
| 44 |
"workday", "log_sla_event",
|
| 45 |
lambda apps: apps["workday"].sla_logged(),
|
| 46 |
),
|
|
@@ -52,7 +52,7 @@ WORKFLOW_A_STEPS = [
|
|
| 52 |
# ---------------------------------------------------------------------------
|
| 53 |
WORKFLOW_B_STEPS = [
|
| 54 |
WorkflowStep(
|
| 55 |
-
"B1", "
|
| 56 |
"workday", "create_onboarding_task",
|
| 57 |
lambda apps: apps["workday"].employee_created(),
|
| 58 |
),
|
|
@@ -62,7 +62,7 @@ WORKFLOW_B_STEPS = [
|
|
| 62 |
lambda apps: apps["workday"].access_provisioned("jira"),
|
| 63 |
),
|
| 64 |
WorkflowStep(
|
| 65 |
-
"B3", "Assign the new employee to
|
| 66 |
"salesforce", "assign_account_owner",
|
| 67 |
lambda apps: apps["salesforce"].team_assigned(),
|
| 68 |
),
|
|
@@ -79,22 +79,22 @@ WORKFLOW_B_STEPS = [
|
|
| 79 |
# ---------------------------------------------------------------------------
|
| 80 |
WORKFLOW_C_STEPS = [
|
| 81 |
WorkflowStep(
|
| 82 |
-
"C1", "
|
| 83 |
"salesforce", "flag_churn_risk",
|
| 84 |
lambda apps: apps["salesforce"].churn_flagged(),
|
| 85 |
),
|
| 86 |
WorkflowStep(
|
| 87 |
-
"C2", "Query recent support
|
| 88 |
"zendesk", "get_ticket",
|
| 89 |
lambda apps: apps["zendesk"].support_queried("ACME-003"),
|
| 90 |
),
|
| 91 |
WorkflowStep(
|
| 92 |
-
"C3", "
|
| 93 |
"jira", "list_issues",
|
| 94 |
lambda apps: apps["jira"].bugs_checked(),
|
| 95 |
),
|
| 96 |
WorkflowStep(
|
| 97 |
-
"C4", "Assign an intervention owner to
|
| 98 |
"salesforce", "assign_account_owner",
|
| 99 |
lambda apps: apps["salesforce"].intervention_assigned(),
|
| 100 |
),
|
|
|
|
| 20 |
# ---------------------------------------------------------------------------
|
| 21 |
WORKFLOW_A_STEPS = [
|
| 22 |
WorkflowStep(
|
| 23 |
+
"A1", "Find and acknowledge the new P1 support ticket in Zendesk",
|
| 24 |
"zendesk", "acknowledge_ticket",
|
| 25 |
lambda apps: apps["zendesk"].ticket_acknowledged(),
|
| 26 |
),
|
| 27 |
WorkflowStep(
|
| 28 |
+
"A2", "Create a new Jira issue linked to that Zendesk ticket",
|
| 29 |
"jira", "create_issue",
|
| 30 |
lambda apps: apps["jira"].has_linked_issue(),
|
| 31 |
),
|
| 32 |
WorkflowStep(
|
| 33 |
+
"A3", "Verify the customer's account status in Salesforce",
|
| 34 |
"salesforce", "get_account",
|
| 35 |
lambda apps: apps["salesforce"].account_checked(),
|
| 36 |
),
|
| 37 |
WorkflowStep(
|
| 38 |
+
"A4", "Assign the pre-existing Jira bug for this customer to an engineer",
|
| 39 |
"jira", "assign_owner",
|
| 40 |
lambda apps: apps["jira"].issue_assigned(),
|
| 41 |
),
|
| 42 |
WorkflowStep(
|
| 43 |
+
"A5", "Log the SLA compliance event in Workday using the ticket ID",
|
| 44 |
"workday", "log_sla_event",
|
| 45 |
lambda apps: apps["workday"].sla_logged(),
|
| 46 |
),
|
|
|
|
| 52 |
# ---------------------------------------------------------------------------
|
| 53 |
WORKFLOW_B_STEPS = [
|
| 54 |
WorkflowStep(
|
| 55 |
+
"B1", "Find the pending new hire in Workday and create their onboarding record",
|
| 56 |
"workday", "create_onboarding_task",
|
| 57 |
lambda apps: apps["workday"].employee_created(),
|
| 58 |
),
|
|
|
|
| 62 |
lambda apps: apps["workday"].access_provisioned("jira"),
|
| 63 |
),
|
| 64 |
WorkflowStep(
|
| 65 |
+
"B3", "Assign the new employee to a Salesforce territory account (west region)",
|
| 66 |
"salesforce", "assign_account_owner",
|
| 67 |
lambda apps: apps["salesforce"].team_assigned(),
|
| 68 |
),
|
|
|
|
| 79 |
# ---------------------------------------------------------------------------
|
| 80 |
WORKFLOW_C_STEPS = [
|
| 81 |
WorkflowStep(
|
| 82 |
+
"C1", "Identify and flag the at-risk account as churn risk in Salesforce",
|
| 83 |
"salesforce", "flag_churn_risk",
|
| 84 |
lambda apps: apps["salesforce"].churn_flagged(),
|
| 85 |
),
|
| 86 |
WorkflowStep(
|
| 87 |
+
"C2", "Query recent support tickets for the at-risk account in Zendesk",
|
| 88 |
"zendesk", "get_ticket",
|
| 89 |
lambda apps: apps["zendesk"].support_queried("ACME-003"),
|
| 90 |
),
|
| 91 |
WorkflowStep(
|
| 92 |
+
"C3", "List open Jira bugs linked to the at-risk account",
|
| 93 |
"jira", "list_issues",
|
| 94 |
lambda apps: apps["jira"].bugs_checked(),
|
| 95 |
),
|
| 96 |
WorkflowStep(
|
| 97 |
+
"C4", "Assign an intervention owner to the at-risk account in Salesforce",
|
| 98 |
"salesforce", "assign_account_owner",
|
| 99 |
lambda apps: apps["salesforce"].intervention_assigned(),
|
| 100 |
),
|