| """Query-side expansion of legal abbreviations and informal terms. |
| |
| Statutes use formal wording -- "application for protection", "removal order" -- |
| but users (and a model drafting a search) reach for everyday shorthand: "PRRA", |
| "H&C", "deportation". Before retrieval, expand_query() appends the canonical |
| statutory terms for any abbreviation or nickname it recognises, so the BM25 and |
| semantic stages can match the provision's actual language. It only ever ADDS |
| words -- the user's own phrasing is left untouched -- and the cross-encoder |
| reranker still sees the original query, so precision is unaffected. |
| |
| python -m canlex.synonyms "PRRA eligibility and an H&C application" |
| """ |
| import re |
| import sys |
|
|
| |
| |
| |
| |
| _SYNONYMS = [ |
| |
| (r"prra", "pre-removal risk assessment application for protection"), |
| (r"pre[- ]removal risk assessment", "application for protection"), |
| (r"h\s*&\s*c", "humanitarian and compassionate"), |
| (r"rad", "refugee appeal division"), |
| (r"rpd", "refugee protection division"), |
| (r"iad", "immigration appeal division"), |
| (r"irb", "immigration and refugee board"), |
| (r"trp", "temporary resident permit"), |
| (r"deportation", "removal order"), |
| (r"misrep", "misrepresentation"), |
| (r"ircc", "immigration refugees and citizenship canada"), |
| |
| (r"cbsa", "canada border services agency"), |
| (r"bsos?", "border services officer"), |
| (r"amps", "administrative monetary penalty system"), |
| |
| (r"fintrac", "financial transactions and reports analysis centre"), |
| (r"njc", "national joint council"), |
| ] |
|
|
| _COMPILED = [(re.compile(rf"\b{trigger}\b", re.IGNORECASE), expansion) |
| for trigger, expansion in _SYNONYMS] |
|
|
|
|
| def expand_query(query): |
| """Return `query` with canonical statutory terms appended for every legal |
| abbreviation it contains; return it unchanged if it contains none.""" |
| additions = [exp for pattern, exp in _COMPILED if pattern.search(query)] |
| if not additions: |
| return query |
| return f"{query} {' '.join(additions)}" |
|
|
|
|
| def main(): |
| query = " ".join(sys.argv[1:]) or "PRRA eligibility and an H&C application" |
| print(f"query: {query}") |
| print(f"expanded: {expand_query(query)}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|