Spaces:
Running
Running
fix: improve grading logic to handle NaN values and enforce score boundaries
Browse files- adv_rebuild.py +15 -1
- server/app.py +15 -1
adv_rebuild.py
CHANGED
|
@@ -237,7 +237,21 @@ def grader():
|
|
| 237 |
# Grade relative to typical maximum and minimum returns to generate a 0.0-1.0 range
|
| 238 |
baseline_offset = env.config.max_steps * 1000.0 # Compensate for penalties
|
| 239 |
scale_factor = env.config.max_steps * 1500.0
|
| 240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
return {"score": score}
|
| 242 |
|
| 243 |
@app.post("/baseline")
|
|
|
|
| 237 |
# Grade relative to typical maximum and minimum returns to generate a 0.0-1.0 range
|
| 238 |
baseline_offset = env.config.max_steps * 1000.0 # Compensate for penalties
|
| 239 |
scale_factor = env.config.max_steps * 1500.0
|
| 240 |
+
try:
|
| 241 |
+
raw_score = float(env.total_reward + baseline_offset) / scale_factor
|
| 242 |
+
import math
|
| 243 |
+
if math.isnan(raw_score):
|
| 244 |
+
score = 0.001
|
| 245 |
+
else:
|
| 246 |
+
score = float(max(0.001, min(0.999, raw_score)))
|
| 247 |
+
except:
|
| 248 |
+
score = 0.001
|
| 249 |
+
|
| 250 |
+
if score >= 1.0:
|
| 251 |
+
score = 0.999
|
| 252 |
+
elif score <= 0.0:
|
| 253 |
+
score = 0.001
|
| 254 |
+
|
| 255 |
return {"score": score}
|
| 256 |
|
| 257 |
@app.post("/baseline")
|
server/app.py
CHANGED
|
@@ -48,7 +48,21 @@ def grader():
|
|
| 48 |
# Grade relative to typical maximum and minimum returns to generate a 0.0-1.0 range
|
| 49 |
baseline_offset = env.config.max_steps * 1000.0 # Compensate for penalties
|
| 50 |
scale_factor = env.config.max_steps * 1500.0
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
return {"score": score}
|
| 53 |
|
| 54 |
@app.post("/baseline")
|
|
|
|
| 48 |
# Grade relative to typical maximum and minimum returns to generate a 0.0-1.0 range
|
| 49 |
baseline_offset = env.config.max_steps * 1000.0 # Compensate for penalties
|
| 50 |
scale_factor = env.config.max_steps * 1500.0
|
| 51 |
+
try:
|
| 52 |
+
raw_score = float(env.total_reward + baseline_offset) / scale_factor
|
| 53 |
+
import math
|
| 54 |
+
if math.isnan(raw_score):
|
| 55 |
+
score = 0.001
|
| 56 |
+
else:
|
| 57 |
+
score = float(max(0.001, min(0.999, raw_score)))
|
| 58 |
+
except:
|
| 59 |
+
score = 0.001
|
| 60 |
+
|
| 61 |
+
if score >= 1.0:
|
| 62 |
+
score = 0.999
|
| 63 |
+
elif score <= 0.0:
|
| 64 |
+
score = 0.001
|
| 65 |
+
|
| 66 |
return {"score": score}
|
| 67 |
|
| 68 |
@app.post("/baseline")
|