Spaces:
Sleeping
Sleeping
security patches
Browse files
server/legal_auditor_env_environment.py
CHANGED
|
@@ -115,8 +115,8 @@ class LegalAuditorEnvironment(
|
|
| 115 |
return LegalAuditorObservation(
|
| 116 |
clause_text = text,
|
| 117 |
clause_index = self.clause_index,
|
| 118 |
-
agent_reliability = round(max(0.
|
| 119 |
-
ai_analysis_grade = round(max(0.
|
| 120 |
is_risk_detected = False,
|
| 121 |
)
|
| 122 |
|
|
@@ -125,16 +125,20 @@ class LegalAuditorEnvironment(
|
|
| 125 |
return LegalAuditorState(
|
| 126 |
total_reward = round(float(self.total_agent_reward), 4),
|
| 127 |
processed_steps = self.clause_index,
|
| 128 |
-
current_reliability = round(max(0.
|
| 129 |
-
analysis_confidence = round(max(0.
|
| 130 |
)
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
def step(
|
| 133 |
self, action: LegalAuditorAction
|
| 134 |
) -> Tuple[LegalAuditorObservation, float, bool, Dict[str, Any]]:
|
| 135 |
# Handle terminal state boundary
|
| 136 |
if self.clause_index >= len(self.current_doc_clauses):
|
| 137 |
-
return self._get_current_obs(), 0.
|
| 138 |
|
| 139 |
text = self.current_doc_clauses[self.clause_index]
|
| 140 |
oracle_data = oracle_judge.evaluate_clause(text)
|
|
@@ -146,14 +150,14 @@ class LegalAuditorEnvironment(
|
|
| 146 |
elif action.action == 1 and label == 0: raw_reward = REWARD_FALSE_POSITIVE
|
| 147 |
else: raw_reward = REWARD_FALSE_NEGATIVE
|
| 148 |
|
| 149 |
-
# ── CRITICAL CHANGE: Clamp reward strictly between 0.
|
| 150 |
-
reward =
|
| 151 |
|
| 152 |
self.total_agent_reward += reward
|
| 153 |
self.clause_index += 1
|
| 154 |
|
| 155 |
# Recalculate reliability based on clamped rewards
|
| 156 |
-
self.current_reliability
|
| 157 |
self.analysis_confidence = reward
|
| 158 |
|
| 159 |
self.session_buffer.append({
|
|
@@ -177,7 +181,7 @@ class LegalAuditorEnvironment(
|
|
| 177 |
self._get_current_obs(),
|
| 178 |
reward,
|
| 179 |
done,
|
| 180 |
-
{"ai_grade": round(max(0.
|
| 181 |
)
|
| 182 |
|
| 183 |
|
|
|
|
| 115 |
return LegalAuditorObservation(
|
| 116 |
clause_text = text,
|
| 117 |
clause_index = self.clause_index,
|
| 118 |
+
agent_reliability = round(max(0.05, min(0.95, self.current_reliability)), 4),
|
| 119 |
+
ai_analysis_grade = round(max(0.05, min(0.95, self.analysis_confidence)), 4),
|
| 120 |
is_risk_detected = False,
|
| 121 |
)
|
| 122 |
|
|
|
|
| 125 |
return LegalAuditorState(
|
| 126 |
total_reward = round(float(self.total_agent_reward), 4),
|
| 127 |
processed_steps = self.clause_index,
|
| 128 |
+
current_reliability = round(max(0.05, min(0.95, self.current_reliability)), 4),
|
| 129 |
+
analysis_confidence = round(max(0.05, min(0.95, self.analysis_confidence)), 4),
|
| 130 |
)
|
| 131 |
+
def _normalize(self,val: float) -> float:
|
| 132 |
+
# Maps [-0.99, 0.99] range to [0, 1] range
|
| 133 |
+
norm = (val + 1.0) / 2.0
|
| 134 |
+
# Apply strict buffer to stay away from 0.0 and 1.0
|
| 135 |
+
return round(max(0.0512, min(0.9488, norm)), 4)
|
| 136 |
def step(
|
| 137 |
self, action: LegalAuditorAction
|
| 138 |
) -> Tuple[LegalAuditorObservation, float, bool, Dict[str, Any]]:
|
| 139 |
# Handle terminal state boundary
|
| 140 |
if self.clause_index >= len(self.current_doc_clauses):
|
| 141 |
+
return self._get_current_obs(), 0.05, True, {}
|
| 142 |
|
| 143 |
text = self.current_doc_clauses[self.clause_index]
|
| 144 |
oracle_data = oracle_judge.evaluate_clause(text)
|
|
|
|
| 150 |
elif action.action == 1 and label == 0: raw_reward = REWARD_FALSE_POSITIVE
|
| 151 |
else: raw_reward = REWARD_FALSE_NEGATIVE
|
| 152 |
|
| 153 |
+
# ── CRITICAL CHANGE: Clamp reward strictly between 0.05 and 0.95 ──
|
| 154 |
+
reward = self._normalize(raw_reward)
|
| 155 |
|
| 156 |
self.total_agent_reward += reward
|
| 157 |
self.clause_index += 1
|
| 158 |
|
| 159 |
# Recalculate reliability based on clamped rewards
|
| 160 |
+
self.current_reliability = self._normalize(self.total_agent_reward / self.clause_index if self.clause_index > 0 else 0)
|
| 161 |
self.analysis_confidence = reward
|
| 162 |
|
| 163 |
self.session_buffer.append({
|
|
|
|
| 181 |
self._get_current_obs(),
|
| 182 |
reward,
|
| 183 |
done,
|
| 184 |
+
{"ai_grade": round(max(0.05, min(0.95, self.analysis_confidence)), 4)}
|
| 185 |
)
|
| 186 |
|
| 187 |
|