Spaces:

samrat-rm
/

WhyDidItFail

Sleeping

samrat-rm commited on 14 days ago

Commit

6b279f6

1 Parent(s): d3b224f

chore: update doc string

Files changed (2) hide show

models.py CHANGED Viewed

@@ -29,7 +29,7 @@ class WhyDidItFailState(State):
     required_sources: list[str] = Field(default_factory=list, description=
         "Sources the agent must inspect before submitting a valid diagnosis.")
     max_steps: int = Field(0, description=
-        "Hard step ceiling for this episode. Exceeding it terminates with score 0.")
 class WhyDidItFailObservation(Observation):

     required_sources: list[str] = Field(default_factory=list, description=
         "Sources the agent must inspect before submitting a valid diagnosis.")
     max_steps: int = Field(0, description=
+        "Hard step ceiling for this episode. Exceeding it terminates with score 0.10.")
 class WhyDidItFailObservation(Observation):

server/llm_judge.py CHANGED Viewed

@@ -63,7 +63,7 @@ def judge(
     scenario: dict,
     inspection_order: list[str],
 ) -> float | None:
-    """Score reasoning quality. Returns 0.0–1.0, or None if unavailable/failed.
     None signals the caller to skip judge weighting entirely and use the
     keyword score at full weight (1.0) rather than 0.85.

     scenario: dict,
     inspection_order: list[str],
 ) -> float | None:
+    """Score reasoning quality. Returns 0.10–0.90, or None if unavailable/failed.
     None signals the caller to skip judge weighting entirely and use the
     keyword score at full weight (1.0) rather than 0.85.