File size: 6,497 Bytes
21626e7 b8c217b 21626e7 b8c217b 21626e7 b8c217b 21626e7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 | """CanLII case citator -- live lookup of a Canadian case's citation graph.
Uses the CanLII API (key in canlii_key.txt). The API has no name/topic search,
so a case is identified by its full canlii.org URL. Responses are cached on disk
and calls are throttled, because the API rate-limits aggressively.
"""
import json
import os
import re
import sys
import time
import urllib.error
import urllib.request
from .config import ROOT, DATA_DIR
API = "https://api.canlii.org/v1"
KEY_FILE = ROOT / "canlii_key.txt"
_PLACEHOLDER = "PASTE-YOUR-CANLII-API-KEY-ON-THIS-LINE"
_DBMAP_FILE = DATA_DIR / "citator_dbmap.json"
_CACHE_FILE = DATA_DIR / "citator_cache.json"
_THROTTLE = 3.0 # seconds between CanLII API calls (the API rate-limits hard)
_MAX_LIST = 20 # items shown per citator list (lists can run to thousands)
_CASE_URL = re.compile(r"canlii\.org/[a-z]{2}/[a-z]+/([a-z0-9-]+)/doc/\d+/([a-z0-9-]+)")
_DB_URL = re.compile(r"canlii\.org/[a-z]{2}/[a-z]+/([a-z0-9-]+)/?$")
# A neutral citation, e.g. "2019 SCC 65", and the CanLII URL segment per court.
_NEUTRAL = re.compile(r"\b(\d{4})\s+(SCC|FCA|FC)\s+(\d+)\b", re.IGNORECASE)
_CANLII_SEG = {"scc": "scc", "fca": "fca", "fc": "fct"}
def canlii_url_from_citation(text):
"""Build a canlii.org case URL from a neutral citation, or '' if none found.
Works for Supreme Court, Federal Court of Appeal and Federal Court neutral
citations -- e.g. "2019 SCC 65" -> .../en/ca/scc/doc/2019/2019scc65/...
"""
m = _NEUTRAL.search(text)
if not m:
return ""
year, court, num = m.group(1), m.group(2).lower(), m.group(3)
doc = f"{year}{court}{num}"
return (f"https://www.canlii.org/en/ca/{_CANLII_SEG[court]}/doc/"
f"{year}/{doc}/{doc}.html")
def api_key():
"""Return the configured CanLII API key, or '' if not set.
The CANLII_API_KEY environment variable is checked first -- the remote
deployment injects the key as a secret rather than shipping the file.
A local run falls back to canlii_key.txt.
"""
env_key = os.environ.get("CANLII_API_KEY", "").strip()
if env_key and env_key != _PLACEHOLDER:
return env_key
if not KEY_FILE.exists():
return ""
key = KEY_FILE.read_text(encoding="utf-8").strip()
return "" if not key or key == _PLACEHOLDER else key
def _load_json(path):
try:
return json.loads(path.read_text(encoding="utf-8"))
except Exception:
return {}
class Citator:
"""Live CanLII citator with on-disk caching and rate-limit throttling."""
def __init__(self):
self.key = api_key()
if not self.key:
raise RuntimeError(f"No CanLII API key -- put your key in {KEY_FILE}.")
self._dbmap = _load_json(_DBMAP_FILE) # URL court segment -> databaseId
self._cache = _load_json(_CACHE_FILE) # case URL -> report
def _get(self, path):
"""Throttled GET against the CanLII API, retrying on HTTP 429."""
for attempt in range(3):
time.sleep(_THROTTLE if attempt == 0 else 15.0)
sep = "&" if "?" in path else "?"
url = f"{API}/{path}{sep}api_key={self.key}"
try:
with urllib.request.urlopen(url, timeout=45) as resp:
return json.loads(resp.read().decode("utf-8"))
except urllib.error.HTTPError as exc:
if exc.code == 429 and attempt < 2:
continue
raise
raise RuntimeError("CanLII API rate limit reached; retry shortly.")
def _ensure_dbmap(self):
if self._dbmap:
return
data = self._get("caseBrowse/en/")
dbmap = {}
for db in data.get("caseDatabases", []):
match = _DB_URL.search(db.get("url") or "")
if match and db.get("databaseId"):
dbmap[match.group(1)] = db["databaseId"]
self._dbmap = dbmap
DATA_DIR.mkdir(parents=True, exist_ok=True)
_DBMAP_FILE.write_text(json.dumps(dbmap), encoding="utf-8")
def _citator(self, db, case_id, kind):
data = self._get(f"caseCitator/en/{db}/{case_id}/{kind}")
items = data.get(kind, [])
return {"total": len(items), "items": items[:_MAX_LIST]}
def case_report(self, case_url):
"""Return a citation-graph report for a case.
Accepts a full canlii.org case URL, or a neutral citation (e.g.
"2019 SCC 65") for a Supreme Court / Federal Court of Appeal / Federal
Court decision.
"""
if not _CASE_URL.search(case_url):
case_url = canlii_url_from_citation(case_url) or case_url
if case_url in self._cache:
return self._cache[case_url]
match = _CASE_URL.search(case_url)
if not match:
return {"error": "Provide a full canlii.org case URL, or a neutral "
"citation such as '2019 SCC 65' (Supreme Court, Federal "
"Court of Appeal, or Federal Court)."}
self._ensure_dbmap()
segment, case_id = match.group(1), match.group(2)
db = self._dbmap.get(segment)
if not db:
return {"error": f"Unrecognized CanLII court segment '{segment}'."}
report = {
"meta": self._get(f"caseBrowse/en/{db}/{case_id}/"),
"citingCases": self._citator(db, case_id, "citingCases"),
"citedCases": self._citator(db, case_id, "citedCases"),
"citedLegislations": self._citator(db, case_id, "citedLegislations"),
}
self._cache[case_url] = report
DATA_DIR.mkdir(parents=True, exist_ok=True)
_CACHE_FILE.write_text(json.dumps(self._cache, ensure_ascii=False),
encoding="utf-8")
return report
def main():
if len(sys.argv) < 2:
print('usage: python -m canlex.citator "<canlii-case-url>"')
return
report = Citator().case_report(sys.argv[1])
if "error" in report:
print("ERROR:", report["error"])
return
meta = report["meta"]
print(f"{meta.get('title')} -- {meta.get('citation')} ({meta.get('decisionDate')})")
print(f" cited by: {report['citingCases']['total']}")
print(f" cites: {report['citedCases']['total']}")
print(f" legislation cited: {report['citedLegislations']['total']}")
for item in report["citedLegislations"]["items"][:6]:
print(f" - {item.get('title')} ({item.get('citation')})")
if __name__ == "__main__":
main()
|