| from recap.reasoner import EXTRACT_SYSTEM, SYNTHESIZE_SYSTEM, two_stage |
|
|
|
|
| def test_two_stage_pipes_extract_into_synthesize(): |
| captured: dict = {} |
|
|
| def fake_extract(system, user): |
| captured["extract_system"] = system |
| captured["extract_user"] = user |
| return "Cr 1.4 mg/dL on 2022-03-14 [src:lab.pdf]" |
|
|
| def fake_synth(system, user): |
| captured["synth_system"] = system |
| captured["synth_user"] = user |
| return "Creatinine first crossed normal in March 2022 [src:lab.pdf]." |
|
|
| out = two_stage( |
| "When did kidney function decline?", |
| "Patient records block", |
| extract_fn=fake_extract, |
| synthesize_fn=fake_synth, |
| ) |
| assert out == "Creatinine first crossed normal in March 2022 [src:lab.pdf]." |
|
|
| |
| assert "Patient records block" in captured["extract_user"] |
| assert "When did kidney function decline?" in captured["extract_user"] |
| assert captured["extract_system"] == EXTRACT_SYSTEM |
|
|
| |
| assert "Cr 1.4 mg/dL on 2022-03-14 [src:lab.pdf]" in captured["synth_user"] |
| assert "When did kidney function decline?" in captured["synth_user"] |
| assert captured["synth_system"] == SYNTHESIZE_SYSTEM |
|
|
|
|
| def test_citations_survive_the_pipeline(): |
| """The whole point of two-stage is that MedGemma's [src:...] markers |
| flow through Qwen's synthesis intact, so the gateway can parse them.""" |
| def fake_extract(s, u): |
| return "Cr 1.4 [src:lab_2022.pdf#p1] eGFR 52 [src:lab_2022.pdf#p1]" |
|
|
| def fake_synth(s, u): |
| return "She crossed the CKD threshold [src:lab_2022.pdf#p1]." |
|
|
| out = two_stage("when?", "block", extract_fn=fake_extract, synthesize_fn=fake_synth) |
| assert "[src:lab_2022.pdf#p1]" in out |
|
|
|
|
| def test_evidence_string_is_passed_verbatim_to_synth(): |
| """If MedGemma returns text with leading/trailing whitespace, |
| we strip it before feeding to Qwen so no double-empty-lines slip through.""" |
| seen = [] |
|
|
| def fake_extract(s, u): |
| return " evidence text \n\n" |
|
|
| def fake_synth(s, u): |
| seen.append(u) |
| return "answer" |
|
|
| two_stage("q", "b", extract_fn=fake_extract, synthesize_fn=fake_synth) |
| assert " evidence text " not in seen[0] |
| assert "evidence text" in seen[0] |
|
|