personabot-api / tests /test_generate_citation_reindex.py
GitHub Actions
Deploy fe36296
385ac95
from app.models.chat import SourceRef
from app.pipeline.nodes.generate import _dedup_sources, _reindex_citations_and_sources
def test_reindex_citations_compacts_in_first_mention_order() -> None:
sources = [
SourceRef(title="A", url="https://x/a", section="s", source_type="project"),
SourceRef(title="B", url="https://x/b", section="s", source_type="project"),
SourceRef(title="C", url="https://x/c", section="s", source_type="project"),
SourceRef(title="D", url="https://x/d", section="s", source_type="project"),
]
answer = "Uses D [4], then B [2], then D again [4]."
new_answer, new_sources = _reindex_citations_and_sources(answer, sources)
assert "[1]" in new_answer
assert "[2]" in new_answer
assert "[4]" not in new_answer
assert [s.title for s in new_sources] == ["D", "B"]
def test_reindex_citations_merges_same_document_indices() -> None:
# Two different old indices point to the same document URL.
sources = [
SourceRef(title="Resume", url="https://darshanchheda.com/resume", section="Experience", source_type="cv"),
SourceRef(title="Resume", url="https://darshanchheda.com/resume", section="Skills", source_type="cv"),
SourceRef(title="TextOps", url="https://darshanchheda.com/projects/textops", section="Overview", source_type="project"),
]
answer = "He has QA experience [1][2] and built TextOps [3]."
new_answer, new_sources = _reindex_citations_and_sources(answer, sources)
assert "[1][1]" not in new_answer
assert "[1]" in new_answer
assert "[2]" in new_answer
assert [s.url for s in new_sources] == [
"https://darshanchheda.com/resume",
"https://darshanchheda.com/projects/textops",
]
def test_reindex_citations_drops_out_of_range_markers() -> None:
sources = [SourceRef(title="Only", url="https://x/only", section="s", source_type="project")]
answer = "Valid [1], invalid [9]."
new_answer, new_sources = _reindex_citations_and_sources(answer, sources)
assert "[1]" in new_answer
assert "[9]" not in new_answer
assert len(new_sources) == 1
def test_reindex_citations_merges_url_variants_same_document() -> None:
sources = [
SourceRef(
title="Resume",
url="https://darshanchheda.com/resume/",
section="Experience",
source_type="cv",
),
SourceRef(
title="Resume",
url="https://www.darshanchheda.com/resume?ref=nav#top",
section="Skills",
source_type="cv",
),
]
answer = "Resume evidence appears in both chunks [1][2]."
new_answer, new_sources = _reindex_citations_and_sources(answer, sources)
assert new_answer.count("[1]") == 1
assert "[2]" not in new_answer
assert len(new_sources) == 1
assert new_sources[0].title == "Resume"
def test_dedup_sources_merges_url_variants() -> None:
sources = [
SourceRef(title="Resume", url="darshanchheda.com/resume", section="", source_type="cv"),
SourceRef(title="Resume", url="https://darshanchheda.com/resume/", section="", source_type="cv"),
SourceRef(title="Project", url="https://darshanchheda.com/projects/textops", section="", source_type="project"),
]
deduped = _dedup_sources(sources)
assert [s.title for s in deduped] == ["Resume", "Project"]