M0SSHEAD commited on
Commit
361711a
·
1 Parent(s): ee28c01

security patches

Browse files
server/legal_auditor_env_environment.py CHANGED
@@ -115,8 +115,8 @@ class LegalAuditorEnvironment(
115
  return LegalAuditorObservation(
116
  clause_text = text,
117
  clause_index = self.clause_index,
118
- agent_reliability = round(max(0.01, min(0.99, self.current_reliability)), 4),
119
- ai_analysis_grade = round(max(0.01, min(0.99, self.analysis_confidence)), 4),
120
  is_risk_detected = False,
121
  )
122
 
@@ -125,16 +125,20 @@ class LegalAuditorEnvironment(
125
  return LegalAuditorState(
126
  total_reward = round(float(self.total_agent_reward), 4),
127
  processed_steps = self.clause_index,
128
- current_reliability = round(max(0.01, min(0.99, self.current_reliability)), 4),
129
- analysis_confidence = round(max(0.01, min(0.99, self.analysis_confidence)), 4),
130
  )
131
-
 
 
 
 
132
  def step(
133
  self, action: LegalAuditorAction
134
  ) -> Tuple[LegalAuditorObservation, float, bool, Dict[str, Any]]:
135
  # Handle terminal state boundary
136
  if self.clause_index >= len(self.current_doc_clauses):
137
- return self._get_current_obs(), 0.01, True, {}
138
 
139
  text = self.current_doc_clauses[self.clause_index]
140
  oracle_data = oracle_judge.evaluate_clause(text)
@@ -146,14 +150,14 @@ class LegalAuditorEnvironment(
146
  elif action.action == 1 and label == 0: raw_reward = REWARD_FALSE_POSITIVE
147
  else: raw_reward = REWARD_FALSE_NEGATIVE
148
 
149
- # ── CRITICAL CHANGE: Clamp reward strictly between 0.01 and 0.99 ──
150
- reward = round(max(0.01, min(0.99, raw_reward)), 4)
151
 
152
  self.total_agent_reward += reward
153
  self.clause_index += 1
154
 
155
  # Recalculate reliability based on clamped rewards
156
- self.current_reliability = self.total_agent_reward / self.clause_index
157
  self.analysis_confidence = reward
158
 
159
  self.session_buffer.append({
@@ -177,7 +181,7 @@ class LegalAuditorEnvironment(
177
  self._get_current_obs(),
178
  reward,
179
  done,
180
- {"ai_grade": round(max(0.01, min(0.99, self.analysis_confidence)), 4)}
181
  )
182
 
183
 
 
115
  return LegalAuditorObservation(
116
  clause_text = text,
117
  clause_index = self.clause_index,
118
+ agent_reliability = round(max(0.05, min(0.95, self.current_reliability)), 4),
119
+ ai_analysis_grade = round(max(0.05, min(0.95, self.analysis_confidence)), 4),
120
  is_risk_detected = False,
121
  )
122
 
 
125
  return LegalAuditorState(
126
  total_reward = round(float(self.total_agent_reward), 4),
127
  processed_steps = self.clause_index,
128
+ current_reliability = round(max(0.05, min(0.95, self.current_reliability)), 4),
129
+ analysis_confidence = round(max(0.05, min(0.95, self.analysis_confidence)), 4),
130
  )
131
+ def _normalize(self,val: float) -> float:
132
+ # Maps [-0.99, 0.99] range to [0, 1] range
133
+ norm = (val + 1.0) / 2.0
134
+ # Apply strict buffer to stay away from 0.0 and 1.0
135
+ return round(max(0.0512, min(0.9488, norm)), 4)
136
  def step(
137
  self, action: LegalAuditorAction
138
  ) -> Tuple[LegalAuditorObservation, float, bool, Dict[str, Any]]:
139
  # Handle terminal state boundary
140
  if self.clause_index >= len(self.current_doc_clauses):
141
+ return self._get_current_obs(), 0.05, True, {}
142
 
143
  text = self.current_doc_clauses[self.clause_index]
144
  oracle_data = oracle_judge.evaluate_clause(text)
 
150
  elif action.action == 1 and label == 0: raw_reward = REWARD_FALSE_POSITIVE
151
  else: raw_reward = REWARD_FALSE_NEGATIVE
152
 
153
+ # ── CRITICAL CHANGE: Clamp reward strictly between 0.05 and 0.95 ──
154
+ reward = self._normalize(raw_reward)
155
 
156
  self.total_agent_reward += reward
157
  self.clause_index += 1
158
 
159
  # Recalculate reliability based on clamped rewards
160
+ self.current_reliability = self._normalize(self.total_agent_reward / self.clause_index if self.clause_index > 0 else 0)
161
  self.analysis_confidence = reward
162
 
163
  self.session_buffer.append({
 
181
  self._get_current_obs(),
182
  reward,
183
  done,
184
+ {"ai_grade": round(max(0.05, min(0.95, self.analysis_confidence)), 4)}
185
  )
186
 
187