| """CanLII case citator -- live lookup of a Canadian case's citation graph. |
| |
| Uses the CanLII API (key in canlii_key.txt). The API has no name/topic search, |
| so a case is identified by its full canlii.org URL. Responses are cached on disk |
| and calls are throttled, because the API rate-limits aggressively. |
| """ |
| import json |
| import os |
| import re |
| import sys |
| import time |
| import urllib.error |
| import urllib.request |
|
|
| from .config import ROOT, DATA_DIR |
|
|
| API = "https://api.canlii.org/v1" |
| KEY_FILE = ROOT / "canlii_key.txt" |
| _PLACEHOLDER = "PASTE-YOUR-CANLII-API-KEY-ON-THIS-LINE" |
| _DBMAP_FILE = DATA_DIR / "citator_dbmap.json" |
| _CACHE_FILE = DATA_DIR / "citator_cache.json" |
| _THROTTLE = 3.0 |
| _MAX_LIST = 20 |
| _CASE_URL = re.compile(r"canlii\.org/[a-z]{2}/[a-z]+/([a-z0-9-]+)/doc/\d+/([a-z0-9-]+)") |
| _DB_URL = re.compile(r"canlii\.org/[a-z]{2}/[a-z]+/([a-z0-9-]+)/?$") |
|
|
| |
| _NEUTRAL = re.compile(r"\b(\d{4})\s+(SCC|FCA|FC)\s+(\d+)\b", re.IGNORECASE) |
| _CANLII_SEG = {"scc": "scc", "fca": "fca", "fc": "fct"} |
|
|
|
|
| def canlii_url_from_citation(text): |
| """Build a canlii.org case URL from a neutral citation, or '' if none found. |
| |
| Works for Supreme Court, Federal Court of Appeal and Federal Court neutral |
| citations -- e.g. "2019 SCC 65" -> .../en/ca/scc/doc/2019/2019scc65/... |
| """ |
| m = _NEUTRAL.search(text) |
| if not m: |
| return "" |
| year, court, num = m.group(1), m.group(2).lower(), m.group(3) |
| doc = f"{year}{court}{num}" |
| return (f"https://www.canlii.org/en/ca/{_CANLII_SEG[court]}/doc/" |
| f"{year}/{doc}/{doc}.html") |
|
|
|
|
| def api_key(): |
| """Return the configured CanLII API key, or '' if not set. |
| |
| The CANLII_API_KEY environment variable is checked first -- the remote |
| deployment injects the key as a secret rather than shipping the file. |
| A local run falls back to canlii_key.txt. |
| """ |
| env_key = os.environ.get("CANLII_API_KEY", "").strip() |
| if env_key and env_key != _PLACEHOLDER: |
| return env_key |
| if not KEY_FILE.exists(): |
| return "" |
| key = KEY_FILE.read_text(encoding="utf-8").strip() |
| return "" if not key or key == _PLACEHOLDER else key |
|
|
|
|
| def _load_json(path): |
| try: |
| return json.loads(path.read_text(encoding="utf-8")) |
| except Exception: |
| return {} |
|
|
|
|
| class Citator: |
| """Live CanLII citator with on-disk caching and rate-limit throttling.""" |
|
|
| def __init__(self): |
| self.key = api_key() |
| if not self.key: |
| raise RuntimeError(f"No CanLII API key -- put your key in {KEY_FILE}.") |
| self._dbmap = _load_json(_DBMAP_FILE) |
| self._cache = _load_json(_CACHE_FILE) |
|
|
| def _get(self, path): |
| """Throttled GET against the CanLII API, retrying on HTTP 429.""" |
| for attempt in range(3): |
| time.sleep(_THROTTLE if attempt == 0 else 15.0) |
| sep = "&" if "?" in path else "?" |
| url = f"{API}/{path}{sep}api_key={self.key}" |
| try: |
| with urllib.request.urlopen(url, timeout=45) as resp: |
| return json.loads(resp.read().decode("utf-8")) |
| except urllib.error.HTTPError as exc: |
| if exc.code == 429 and attempt < 2: |
| continue |
| raise |
| raise RuntimeError("CanLII API rate limit reached; retry shortly.") |
|
|
| def _ensure_dbmap(self): |
| if self._dbmap: |
| return |
| data = self._get("caseBrowse/en/") |
| dbmap = {} |
| for db in data.get("caseDatabases", []): |
| match = _DB_URL.search(db.get("url") or "") |
| if match and db.get("databaseId"): |
| dbmap[match.group(1)] = db["databaseId"] |
| self._dbmap = dbmap |
| DATA_DIR.mkdir(parents=True, exist_ok=True) |
| _DBMAP_FILE.write_text(json.dumps(dbmap), encoding="utf-8") |
|
|
| def _citator(self, db, case_id, kind): |
| data = self._get(f"caseCitator/en/{db}/{case_id}/{kind}") |
| items = data.get(kind, []) |
| return {"total": len(items), "items": items[:_MAX_LIST]} |
|
|
| def case_report(self, case_url): |
| """Return a citation-graph report for a case. |
| |
| Accepts a full canlii.org case URL, or a neutral citation (e.g. |
| "2019 SCC 65") for a Supreme Court / Federal Court of Appeal / Federal |
| Court decision. |
| """ |
| if not _CASE_URL.search(case_url): |
| case_url = canlii_url_from_citation(case_url) or case_url |
| if case_url in self._cache: |
| return self._cache[case_url] |
| match = _CASE_URL.search(case_url) |
| if not match: |
| return {"error": "Provide a full canlii.org case URL, or a neutral " |
| "citation such as '2019 SCC 65' (Supreme Court, Federal " |
| "Court of Appeal, or Federal Court)."} |
| self._ensure_dbmap() |
| segment, case_id = match.group(1), match.group(2) |
| db = self._dbmap.get(segment) |
| if not db: |
| return {"error": f"Unrecognized CanLII court segment '{segment}'."} |
| report = { |
| "meta": self._get(f"caseBrowse/en/{db}/{case_id}/"), |
| "citingCases": self._citator(db, case_id, "citingCases"), |
| "citedCases": self._citator(db, case_id, "citedCases"), |
| "citedLegislations": self._citator(db, case_id, "citedLegislations"), |
| } |
| self._cache[case_url] = report |
| DATA_DIR.mkdir(parents=True, exist_ok=True) |
| _CACHE_FILE.write_text(json.dumps(self._cache, ensure_ascii=False), |
| encoding="utf-8") |
| return report |
|
|
|
|
| def main(): |
| if len(sys.argv) < 2: |
| print('usage: python -m canlex.citator "<canlii-case-url>"') |
| return |
| report = Citator().case_report(sys.argv[1]) |
| if "error" in report: |
| print("ERROR:", report["error"]) |
| return |
| meta = report["meta"] |
| print(f"{meta.get('title')} -- {meta.get('citation')} ({meta.get('decisionDate')})") |
| print(f" cited by: {report['citingCases']['total']}") |
| print(f" cites: {report['citedCases']['total']}") |
| print(f" legislation cited: {report['citedLegislations']['total']}") |
| for item in report["citedLegislations"]["items"][:6]: |
| print(f" - {item.get('title')} ({item.get('citation')})") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|