Taniieeee83 commited on
Commit
1519439
·
1 Parent(s): 105c5c9

changed reward scores

Browse files
Files changed (2) hide show
  1. server/environment.py +8 -4
  2. server/schema_drift.py +12 -9
server/environment.py CHANGED
@@ -37,9 +37,9 @@ class OrgOSEnvironment:
37
 
38
  # Reward component trackers
39
  self._wf_score = 0.0 # workflow completion
40
- self._rule_score = 1.0 # compliance (starts perfect, penalized on violation)
41
  self._schema_score = 0.0 # schema adaptation successes
42
- self._efficiency = 1.0 # degrades with failed/no-op actions
43
  self._policy_score = 0.0 # policy drift handling bonus
44
 
45
  # ------------------------------------------------------------------
@@ -52,10 +52,10 @@ class OrgOSEnvironment:
52
  self._workflow_id = workflow_id or self.WORKFLOWS[(self._episode_num - 1) % 3]
53
  self._step_count = 0
54
  self._last_score = 0.001
55
- self._rule_score = 1.0
56
  self._wf_score = 0.0
57
  self._schema_score = 0.0
58
- self._efficiency = 1.0
59
  self._policy_score = 0.0
60
  self._policy_drift_applied = False
61
 
@@ -132,6 +132,10 @@ class OrgOSEnvironment:
132
  message=result.get("message", "Operation failed"),
133
  )
134
 
 
 
 
 
135
  # Schema adaptation bonus (agent used correct drifted field name)
136
  if result.get("schema_adapted"):
137
  self._schema_score = min(1.0, self._schema_score + 0.10)
 
37
 
38
  # Reward component trackers
39
  self._wf_score = 0.0 # workflow completion
40
+ self._rule_score = 0.0 # compliance earned +0.10 per successful action
41
  self._schema_score = 0.0 # schema adaptation successes
42
+ self._efficiency = 0.0 # efficiency earned +0.10 per successful action
43
  self._policy_score = 0.0 # policy drift handling bonus
44
 
45
  # ------------------------------------------------------------------
 
52
  self._workflow_id = workflow_id or self.WORKFLOWS[(self._episode_num - 1) % 3]
53
  self._step_count = 0
54
  self._last_score = 0.001
55
+ self._rule_score = 0.0
56
  self._wf_score = 0.0
57
  self._schema_score = 0.0
58
+ self._efficiency = 0.0
59
  self._policy_score = 0.0
60
  self._policy_drift_applied = False
61
 
 
132
  message=result.get("message", "Operation failed"),
133
  )
134
 
135
+ # Earn compliance + efficiency for every successful action
136
+ self._rule_score = min(1.0, self._rule_score + 0.10)
137
+ self._efficiency = min(1.0, self._efficiency + 0.10)
138
+
139
  # Schema adaptation bonus (agent used correct drifted field name)
140
  if result.get("schema_adapted"):
141
  self._schema_score = min(1.0, self._schema_score + 0.10)
server/schema_drift.py CHANGED
@@ -77,17 +77,20 @@ class SchemaDriftEngine:
77
  return None, False
78
 
79
  def get_hints(self) -> Dict[str, str]:
80
- """Return partial schema hints visible in observation.
81
- Reveals 1 changed field per app that has drift (agent must discover the rest)."""
82
- hints = {}
83
- rng = random.Random(self._seed)
84
  for app, version in self._versions.items():
85
  mapping = SCHEMA_MAP.get(app, {}).get(version, {})
86
- changed = {f"{app}.{k}": v for k, v in mapping.items() if k != v}
87
- if changed:
88
- key = rng.choice(list(changed.keys()))
89
- hints[key] = changed[key]
90
- return hints
 
 
 
 
91
 
92
  def get_all_changes(self) -> Dict[str, Dict[str, str]]:
93
  """Return all field changes for every app (used by UI schema drift viewer)."""
 
77
  return None, False
78
 
79
  def get_hints(self) -> Dict[str, str]:
80
+ """Return exactly 1 schema hint total across all apps.
81
+ Agent must probe with get_* / list_* to discover the rest of the drift."""
82
+ all_hints: Dict[str, str] = {}
 
83
  for app, version in self._versions.items():
84
  mapping = SCHEMA_MAP.get(app, {}).get(version, {})
85
+ all_hints.update(
86
+ {f"{app}.{k}": v for k, v in mapping.items() if k != v}
87
+ )
88
+ if not all_hints:
89
+ return {}
90
+ # Pick one hint deterministically — sorted for reproducibility
91
+ rng = random.Random(self._seed)
92
+ key = rng.choice(sorted(all_hints.keys()))
93
+ return {key: all_hints[key]}
94
 
95
  def get_all_changes(self) -> Dict[str, Dict[str, str]]:
96
  """Return all field changes for every app (used by UI schema drift viewer)."""