kushalExplores commited on
Commit
9ff23e0
·
verified ·
1 Parent(s): 4d725fc

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. inference.py +10 -3
inference.py CHANGED
@@ -84,8 +84,9 @@ def log_payload_generator_methods(tool_name: str, generator_methods: list[dict[s
84
 
85
  def log_step(step: int, action: str, reward: float, done: bool, error: str | None) -> None:
86
  error_val = error if error else "null"
 
87
  print(
88
- f"[STEP] step={step} action={action} reward={reward:.2f} done={str(done).lower()} error={error_val}",
89
  flush=True,
90
  )
91
 
@@ -95,10 +96,16 @@ def bounded_task_score(score: float) -> float:
95
  return min(1.0 - SCORE_EPSILON, max(SCORE_EPSILON, score))
96
 
97
 
 
 
 
 
 
98
  def log_end(success: bool, steps: int, score: float, rewards: list[float]) -> None:
99
- rewards_str = ",".join(f"{reward:.2f}" for reward in rewards)
 
100
  print(
101
- f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}",
102
  flush=True,
103
  )
104
 
 
84
 
85
  def log_step(step: int, action: str, reward: float, done: bool, error: str | None) -> None:
86
  error_val = error if error else "null"
87
+ clipped_reward = bounded_log_reward(reward)
88
  print(
89
+ f"[STEP] step={step} action={action} reward={clipped_reward:.2f} done={str(done).lower()} error={error_val}",
90
  flush=True,
91
  )
92
 
 
96
  return min(1.0 - SCORE_EPSILON, max(SCORE_EPSILON, score))
97
 
98
 
99
+ def bounded_log_reward(reward: float) -> float:
100
+ """Clamp logged scores and rewards to [0.01, 0.99]."""
101
+ return min(0.99, max(0.01, reward))
102
+
103
+
104
  def log_end(success: bool, steps: int, score: float, rewards: list[float]) -> None:
105
+ clipped_score = bounded_log_reward(score)
106
+ rewards_str = ",".join(f"{bounded_log_reward(reward):.2f}" for reward in rewards)
107
  print(
108
+ f"[END] success={str(success).lower()} steps={steps} score={clipped_score:.2f} rewards={rewards_str}",
109
  flush=True,
110
  )
111