| """Unit tests for the embedding-text builder (canlex/embed.py).""" | |
| import unittest | |
| from canlex.embed import embed_text | |
| def _chunk(**kw): | |
| base = {"doc_type": "legislation", "act_short": "X", "marginal_note": "", | |
| "part": "", "heading": "", "text": ""} | |
| base.update(kw) | |
| return base | |
| class EmbedTextTests(unittest.TestCase): | |
| def test_memorandum_title_comes_from_part(self): | |
| # A memo's marginal note is generic; its subject lives in 'part'. | |
| out = embed_text(_chunk(doc_type="memorandum", act_short="D-Memo", | |
| marginal_note="Guidelines", | |
| part="Value for Duty", text="body")) | |
| self.assertIn("Value for Duty", out) | |
| def test_legislation_title_comes_from_marginal_note(self): | |
| out = embed_text(_chunk(marginal_note="Application for protection", | |
| part="PART 2", text="body")) | |
| self.assertIn("Application for protection", out) | |
| def test_title_is_repeated_for_emphasis(self): | |
| out = embed_text(_chunk(marginal_note="UNIQUEWORD", text="b")) | |
| self.assertEqual(out.count("UNIQUEWORD"), 2) | |
| if __name__ == "__main__": | |
| unittest.main() | |