| --- a/benchmarks/validate.py | |
| +++ b/benchmarks/validate.py | |
| def make_mock(task_name): | |
| mock = MockLLMBackend() | |
| t = TASKS[task_name] | |
| def actor(msgs): | |
| text = " ".join(m.content for m in msgs) | |
| - has_h = "Learned Strategies" in text and "None yet" not in text | |
| + has_h = ("Learned Strategies" in text or "When:" in text) and "None yet" not in text | |
| code = t["good"] if has_h else t["bad"] | |