from app.models.chat import SourceRef from app.pipeline.nodes.generate import _dedup_sources, _reindex_citations_and_sources def test_reindex_citations_compacts_in_first_mention_order() -> None: sources = [ SourceRef(title="A", url="https://x/a", section="s", source_type="project"), SourceRef(title="B", url="https://x/b", section="s", source_type="project"), SourceRef(title="C", url="https://x/c", section="s", source_type="project"), SourceRef(title="D", url="https://x/d", section="s", source_type="project"), ] answer = "Uses D [4], then B [2], then D again [4]." new_answer, new_sources = _reindex_citations_and_sources(answer, sources) assert "[1]" in new_answer assert "[2]" in new_answer assert "[4]" not in new_answer assert [s.title for s in new_sources] == ["D", "B"] def test_reindex_citations_merges_same_document_indices() -> None: # Two different old indices point to the same document URL. sources = [ SourceRef(title="Resume", url="https://darshanchheda.com/resume", section="Experience", source_type="cv"), SourceRef(title="Resume", url="https://darshanchheda.com/resume", section="Skills", source_type="cv"), SourceRef(title="TextOps", url="https://darshanchheda.com/projects/textops", section="Overview", source_type="project"), ] answer = "He has QA experience [1][2] and built TextOps [3]." new_answer, new_sources = _reindex_citations_and_sources(answer, sources) assert "[1][1]" not in new_answer assert "[1]" in new_answer assert "[2]" in new_answer assert [s.url for s in new_sources] == [ "https://darshanchheda.com/resume", "https://darshanchheda.com/projects/textops", ] def test_reindex_citations_drops_out_of_range_markers() -> None: sources = [SourceRef(title="Only", url="https://x/only", section="s", source_type="project")] answer = "Valid [1], invalid [9]." new_answer, new_sources = _reindex_citations_and_sources(answer, sources) assert "[1]" in new_answer assert "[9]" not in new_answer assert len(new_sources) == 1 def test_reindex_citations_merges_url_variants_same_document() -> None: sources = [ SourceRef( title="Resume", url="https://darshanchheda.com/resume/", section="Experience", source_type="cv", ), SourceRef( title="Resume", url="https://www.darshanchheda.com/resume?ref=nav#top", section="Skills", source_type="cv", ), ] answer = "Resume evidence appears in both chunks [1][2]." new_answer, new_sources = _reindex_citations_and_sources(answer, sources) assert new_answer.count("[1]") == 1 assert "[2]" not in new_answer assert len(new_sources) == 1 assert new_sources[0].title == "Resume" def test_dedup_sources_merges_url_variants() -> None: sources = [ SourceRef(title="Resume", url="darshanchheda.com/resume", section="", source_type="cv"), SourceRef(title="Resume", url="https://darshanchheda.com/resume/", section="", source_type="cv"), SourceRef(title="Project", url="https://darshanchheda.com/projects/textops", section="", source_type="project"), ] deduped = _dedup_sources(sources) assert [s.title for s in deduped] == ["Resume", "Project"]