Spaces:

KChad
/

Prompt-Injection-RL-environment

Running

KChad commited on 12 days ago

Commit

5dc1f01

1 Parent(s): 1ad5f71

Sanitize result payload score fields for deep validation

Files changed (2) hide show

baseline/run_baseline.py CHANGED Viewed

@@ -84,9 +84,12 @@ def log_step(step: int, action: str, reward: float, done: bool, error: Optional[
     )
-def log_end(success: bool, steps: int, rewards: List[float]) -> None:
     rewards_str = ",".join(f"{reward:.2f}" for reward in rewards)
-    print(f"[END] success={str(success).lower()} steps={steps} rewards={rewards_str}", flush=True)
 def structured_stdout_only() -> bool:
@@ -501,7 +504,7 @@ def run_difficulty_baseline(agent_mode: str, client: Optional[Any], difficulty:
             result = env.get_result()
             score = result.final_score
             success = bool(result.task_completed and not result.exfil_occurred)
-            log_end(success=success, steps=steps_taken, rewards=rewards)
             difficulty_results.append(
                 {
@@ -520,7 +523,7 @@ def run_difficulty_baseline(agent_mode: str, client: Optional[Any], difficulty:
             )
         except Exception as exc:
             emit_runtime_error(f"[ERROR] Episode {episode_number} failed: {exc}")
-            log_end(success=False, steps=steps_taken, rewards=rewards)
             difficulty_results.append(
                 {
                     "episode": episode_number,

     )
+def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
     rewards_str = ",".join(f"{reward:.2f}" for reward in rewards)
+    print(
+        f"[END] success={str(success).lower()} steps={steps} score={score:.4f} rewards={rewards_str}",
+        flush=True,
+    )
 def structured_stdout_only() -> bool:
             result = env.get_result()
             score = result.final_score
             success = bool(result.task_completed and not result.exfil_occurred)
+            log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
             difficulty_results.append(
                 {
             )
         except Exception as exc:
             emit_runtime_error(f"[ERROR] Episode {episode_number} failed: {exc}")
+            log_end(success=False, steps=steps_taken, score=STRICT_SCORE_EPSILON, rewards=rewards)
             difficulty_results.append(
                 {
                     "episode": episode_number,

pre_submission_check.py CHANGED Viewed

@@ -141,13 +141,13 @@ def structured_log_contract_validation() -> Dict[str, Any]:
     with redirect_stdout(output):
         baseline_runner.log_start("task-1", "env-1", "model-1")
         baseline_runner.log_step(1, "ReadDoc", 0.5, False, None)
-        baseline_runner.log_end(True, 1, [0.5])
     lines = [line.strip() for line in output.getvalue().splitlines() if line.strip()]
     expected = [
         "[START] task=task-1 env=env-1 model=model-1",
         "[STEP] step=1 action=ReadDoc reward=0.50 done=false error=null",
-        "[END] success=true steps=1 rewards=0.50",
     ]
     return {
         "name": "structured_log_contract",

     with redirect_stdout(output):
         baseline_runner.log_start("task-1", "env-1", "model-1")
         baseline_runner.log_step(1, "ReadDoc", 0.5, False, None)
+        baseline_runner.log_end(True, 1, 0.5001, [0.5])
     lines = [line.strip() for line in output.getvalue().splitlines() if line.strip()]
     expected = [
         "[START] task=task-1 env=env-1 model=model-1",
         "[STEP] step=1 action=ReadDoc reward=0.50 done=false error=null",
+        "[END] success=true steps=1 score=0.5001 rewards=0.50",
     ]
     return {
         "name": "structured_log_contract",