nevernever69 commited on
Commit
843f8ab
·
verified ·
1 Parent(s): 374b1d5

Upload inference.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. inference.py +6 -5
inference.py CHANGED
@@ -42,8 +42,8 @@ from redveil.grader import grade_task
42
  # Configuration
43
  # ---------------------------------------------------------------------------
44
 
45
- API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
46
- MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4o-mini")
47
  API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY") or os.getenv("OPENAI_API_KEY", "")
48
 
49
  BENCHMARK = "redveil"
@@ -101,10 +101,11 @@ def log_step(step: int, action: str, reward: float, done: bool, error: Optional[
101
  )
102
 
103
 
104
- def log_end(success: bool, steps: int, rewards: List[float]) -> None:
105
  rewards_str = ",".join(f"{_clamp_reward(r):.2f}" for r in rewards)
 
106
  print(
107
- f"[END] success={str(success).lower()} steps={steps} rewards={rewards_str}",
108
  flush=True,
109
  )
110
 
@@ -228,7 +229,7 @@ def run_task(env: RedVeilEnvironment, client: OpenAI, task_id: str) -> dict:
228
  score = min(max(score, 0.01), 0.99)
229
  success = score > 0.01
230
 
231
- log_end(success=success, steps=step_num, rewards=rewards)
232
 
233
  return {
234
  "task_id": task_id,
 
42
  # Configuration
43
  # ---------------------------------------------------------------------------
44
 
45
+ API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
46
+ MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
47
  API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY") or os.getenv("OPENAI_API_KEY", "")
48
 
49
  BENCHMARK = "redveil"
 
101
  )
102
 
103
 
104
+ def log_end(success: bool, steps: int, rewards: List[float], score: float) -> None:
105
  rewards_str = ",".join(f"{_clamp_reward(r):.2f}" for r in rewards)
106
+ clamped_score = _clamp_reward(score)
107
  print(
108
+ f"[END] success={str(success).lower()} steps={steps} score={clamped_score:.2f} rewards={rewards_str}",
109
  flush=True,
110
  )
111
 
 
229
  score = min(max(score, 0.01), 0.99)
230
  success = score > 0.01
231
 
232
+ log_end(success=success, steps=step_num, rewards=rewards, score=score)
233
 
234
  return {
235
  "task_id": task_id,