Spaces:
Running
Running
| from app.models.chat import SourceRef | |
| from app.pipeline.nodes.generate import _dedup_sources, _reindex_citations_and_sources | |
| def test_reindex_citations_compacts_in_first_mention_order() -> None: | |
| sources = [ | |
| SourceRef(title="A", url="https://x/a", section="s", source_type="project"), | |
| SourceRef(title="B", url="https://x/b", section="s", source_type="project"), | |
| SourceRef(title="C", url="https://x/c", section="s", source_type="project"), | |
| SourceRef(title="D", url="https://x/d", section="s", source_type="project"), | |
| ] | |
| answer = "Uses D [4], then B [2], then D again [4]." | |
| new_answer, new_sources = _reindex_citations_and_sources(answer, sources) | |
| assert "[1]" in new_answer | |
| assert "[2]" in new_answer | |
| assert "[4]" not in new_answer | |
| assert [s.title for s in new_sources] == ["D", "B"] | |
| def test_reindex_citations_merges_same_document_indices() -> None: | |
| # Two different old indices point to the same document URL. | |
| sources = [ | |
| SourceRef(title="Resume", url="https://darshanchheda.com/resume", section="Experience", source_type="cv"), | |
| SourceRef(title="Resume", url="https://darshanchheda.com/resume", section="Skills", source_type="cv"), | |
| SourceRef(title="TextOps", url="https://darshanchheda.com/projects/textops", section="Overview", source_type="project"), | |
| ] | |
| answer = "He has QA experience [1][2] and built TextOps [3]." | |
| new_answer, new_sources = _reindex_citations_and_sources(answer, sources) | |
| assert "[1][1]" not in new_answer | |
| assert "[1]" in new_answer | |
| assert "[2]" in new_answer | |
| assert [s.url for s in new_sources] == [ | |
| "https://darshanchheda.com/resume", | |
| "https://darshanchheda.com/projects/textops", | |
| ] | |
| def test_reindex_citations_drops_out_of_range_markers() -> None: | |
| sources = [SourceRef(title="Only", url="https://x/only", section="s", source_type="project")] | |
| answer = "Valid [1], invalid [9]." | |
| new_answer, new_sources = _reindex_citations_and_sources(answer, sources) | |
| assert "[1]" in new_answer | |
| assert "[9]" not in new_answer | |
| assert len(new_sources) == 1 | |
| def test_reindex_citations_merges_url_variants_same_document() -> None: | |
| sources = [ | |
| SourceRef( | |
| title="Resume", | |
| url="https://darshanchheda.com/resume/", | |
| section="Experience", | |
| source_type="cv", | |
| ), | |
| SourceRef( | |
| title="Resume", | |
| url="https://www.darshanchheda.com/resume?ref=nav#top", | |
| section="Skills", | |
| source_type="cv", | |
| ), | |
| ] | |
| answer = "Resume evidence appears in both chunks [1][2]." | |
| new_answer, new_sources = _reindex_citations_and_sources(answer, sources) | |
| assert new_answer.count("[1]") == 1 | |
| assert "[2]" not in new_answer | |
| assert len(new_sources) == 1 | |
| assert new_sources[0].title == "Resume" | |
| def test_dedup_sources_merges_url_variants() -> None: | |
| sources = [ | |
| SourceRef(title="Resume", url="darshanchheda.com/resume", section="", source_type="cv"), | |
| SourceRef(title="Resume", url="https://darshanchheda.com/resume/", section="", source_type="cv"), | |
| SourceRef(title="Project", url="https://darshanchheda.com/projects/textops", section="", source_type="project"), | |
| ] | |
| deduped = _dedup_sources(sources) | |
| assert [s.title for s in deduped] == ["Resume", "Project"] | |