hiitsesh commited on
Commit
ffda6a0
·
1 Parent(s): 73b708a

fix: improve grading logic to handle NaN values and enforce score boundaries

Browse files
Files changed (2) hide show
  1. adv_rebuild.py +15 -1
  2. server/app.py +15 -1
adv_rebuild.py CHANGED
@@ -237,7 +237,21 @@ def grader():
237
  # Grade relative to typical maximum and minimum returns to generate a 0.0-1.0 range
238
  baseline_offset = env.config.max_steps * 1000.0 # Compensate for penalties
239
  scale_factor = env.config.max_steps * 1500.0
240
- score = max(0.001, min(0.999, (env.total_reward + baseline_offset) / scale_factor))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  return {"score": score}
242
 
243
  @app.post("/baseline")
 
237
  # Grade relative to typical maximum and minimum returns to generate a 0.0-1.0 range
238
  baseline_offset = env.config.max_steps * 1000.0 # Compensate for penalties
239
  scale_factor = env.config.max_steps * 1500.0
240
+ try:
241
+ raw_score = float(env.total_reward + baseline_offset) / scale_factor
242
+ import math
243
+ if math.isnan(raw_score):
244
+ score = 0.001
245
+ else:
246
+ score = float(max(0.001, min(0.999, raw_score)))
247
+ except:
248
+ score = 0.001
249
+
250
+ if score >= 1.0:
251
+ score = 0.999
252
+ elif score <= 0.0:
253
+ score = 0.001
254
+
255
  return {"score": score}
256
 
257
  @app.post("/baseline")
server/app.py CHANGED
@@ -48,7 +48,21 @@ def grader():
48
  # Grade relative to typical maximum and minimum returns to generate a 0.0-1.0 range
49
  baseline_offset = env.config.max_steps * 1000.0 # Compensate for penalties
50
  scale_factor = env.config.max_steps * 1500.0
51
- score = max(0.001, min(0.999, (env.total_reward + baseline_offset) / scale_factor))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  return {"score": score}
53
 
54
  @app.post("/baseline")
 
48
  # Grade relative to typical maximum and minimum returns to generate a 0.0-1.0 range
49
  baseline_offset = env.config.max_steps * 1000.0 # Compensate for penalties
50
  scale_factor = env.config.max_steps * 1500.0
51
+ try:
52
+ raw_score = float(env.total_reward + baseline_offset) / scale_factor
53
+ import math
54
+ if math.isnan(raw_score):
55
+ score = 0.001
56
+ else:
57
+ score = float(max(0.001, min(0.999, raw_score)))
58
+ except:
59
+ score = 0.001
60
+
61
+ if score >= 1.0:
62
+ score = 0.999
63
+ elif score <= 0.0:
64
+ score = 0.001
65
+
66
  return {"score": score}
67
 
68
  @app.post("/baseline")