Spaces:

Beemer0
/

CanLex

Running

Beemer Claude Opus 4.7 commited on 2 days ago

Commit

a7a22f5

1 Parent(s): df55f26

Sweep-tune the regulation and back-matter penalties; revert the failed swap

Coordinate-descent sweep over the four CANLEX_* knobs converged on
halving the two penalties: REG_PENALTY 0.008 -> 0.004 and
BACKMATTER_PENALTY 0.008 -> 0.004 (MN_WEIGHT and MN_CAP unchanged).
The previous values over-penalised regulations and agreement
back-matter against legitimately-on-topic candidates of those kinds.

The within-source title-match swap explored alongside this is reverted:
it traded one miss (FB Agreement s. 17 moves from #6 to #5) for one
new miss (IRPA s. 20 falls from top 5 to #10), because the title-match
heuristic is fooled by surface vocabulary overlap (IRPA s. 192
"Immigration Appeal Division" wins against an IAD-appeals query on
title alone, even though the operative rule is s. 64). Net Hit@3 was
-0.01, so not worth the complexity.

EnsureLegislationTests renamed to EnsurePrimaryTests + a new test that
agreement candidates are eligible for the guarantee.

141-question eval: Hit@1 0.79 / Hit@3 0.96 / Hit@5 0.99 / Hit@10 0.99
/ MRR 0.88 (vs pre-sweep 0.79 / 0.96 / 0.98 / 0.99 / 0.87 -- Hit@5
and MRR each +0.01; 3 misses -> 2). Remaining misses: IRPA s. 64
(Chieu outranks on IAD appeals) and Khosa (Chieu outranks on IAD
deference). The sweep log + JSON summary are committed for the record.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

Files changed (4) hide show

canlex/index.py +10 -8
data/eval/sweep.json +272 -0
data/eval/sweep.log +33 -0
tests/test_index.py +26 -8

canlex/index.py CHANGED Viewed

@@ -23,12 +23,14 @@ MN_WEIGHT = float(os.environ.get("CANLEX_MN_WEIGHT", "0.0024"))
 MN_CAP = float(os.environ.get("CANLEX_MN_CAP", "0.012"))
                     # ceiling on the title-match boost -- it nudges the ranking
                     # without overriding a strong base score
-REG_PENALTY = float(os.environ.get("CANLEX_REG_PENALTY", "0.008"))
                     # small fusion penalty on regulation sections, so the Act
                     # that creates a duty outranks the regulation elaborating it
-BACKMATTER_PENALTY = float(os.environ.get("CANLEX_BACKMATTER_PENALTY", "0.008"))
                     # likewise for a collective agreement's back-matter
                     # (memoranda, letters of understanding) vs its numbered articles
 SOURCE_CAP = 2      # max chunks one case or memorandum may contribute
 APPENDIX_CAP = 3    # max referenced appendices co-surfaced into a result set
@@ -548,12 +550,12 @@ class LegislationIndex:
                 pinned_set = set(pinned)
                 candidates = pinned + [i for i in candidates if i not in pinned_set]
-        # Cap one-source monopolies, then guarantee a primary instrument
-        # (statute/agreement/directive/delegation) on the topic is represented.
-        # The guarantee operates on a fixed visible window (5), not the full
-        # top_k -- with top_k=20 (the eval default) the larger window almost
-        # always contains incidental legislation, so the guarantee never fires
-        # even when the GOVERNING provision is buried at rank 10+.
         candidates = self._diversify(candidates)
         candidates = self._ensure_primary(candidates, min(top_k, 5), q_tokens)

 MN_CAP = float(os.environ.get("CANLEX_MN_CAP", "0.012"))
                     # ceiling on the title-match boost -- it nudges the ranking
                     # without overriding a strong base score
+REG_PENALTY = float(os.environ.get("CANLEX_REG_PENALTY", "0.004"))
                     # small fusion penalty on regulation sections, so the Act
                     # that creates a duty outranks the regulation elaborating it
+                    # (sweep-tuned 2026-05-23 from 0.008 -> 0.004; see sweep.log)
+BACKMATTER_PENALTY = float(os.environ.get("CANLEX_BACKMATTER_PENALTY", "0.004"))
                     # likewise for a collective agreement's back-matter
                     # (memoranda, letters of understanding) vs its numbered articles
+                    # (sweep-tuned 2026-05-23 from 0.008 -> 0.004)
 SOURCE_CAP = 2      # max chunks one case or memorandum may contribute
 APPENDIX_CAP = 3    # max referenced appendices co-surfaced into a result set
                 pinned_set = set(pinned)
                 candidates = pinned + [i for i in candidates if i not in pinned_set]
+        # Cap one-source monopolies, then guarantee a primary instrument on
+        # the topic is represented. The guarantee operates on a fixed visible
+        # window of min(top_k, 5), not the full top_k -- with top_k=20 (the
+        # eval default) the larger window almost always contains incidental
+        # legislation, so the guarantee never fires even when the governing
+        # provision is buried at rank 10+.
         candidates = self._diversify(candidates)
         candidates = self._ensure_primary(candidates, min(top_k, 5), q_tokens)

data/eval/sweep.json ADDED Viewed

	@@ -0,0 +1,272 @@

+{
+  "best": {
+    "CANLEX_MN_WEIGHT": 0.0024,
+    "CANLEX_MN_CAP": 0.012,
+    "CANLEX_REG_PENALTY": 0.004,
+    "CANLEX_BACKMATTER_PENALTY": 0.004
+  },
+  "best_metrics": {
+    "hit1": 0.79,
+    "hit3": 0.96,
+    "hit5": 0.99,
+    "hit10": 0.99,
+    "mrr": 0.88
+  },
+  "runs": [
+    {
+      "values": {
+        "CANLEX_MN_WEIGHT": 0.0024,
+        "CANLEX_MN_CAP": 0.012,
+        "CANLEX_REG_PENALTY": 0.008,
+        "CANLEX_BACKMATTER_PENALTY": 0.008
+      },
+      "metrics": {
+        "hit1": 0.79,
+        "hit3": 0.96,
+        "hit5": 0.98,
+        "hit10": 0.99,
+        "mrr": 0.87
+      }
+    },
+    {
+      "values": {
+        "CANLEX_MN_WEIGHT": 0.0012,
+        "CANLEX_MN_CAP": 0.012,
+        "CANLEX_REG_PENALTY": 0.008,
+        "CANLEX_BACKMATTER_PENALTY": 0.008
+      },
+      "metrics": {
+        "hit1": 0.78,
+        "hit3": 0.94,
+        "hit5": 0.98,
+        "hit10": 0.99,
+        "mrr": 0.86
+      }
+    },
+    {
+      "values": {
+        "CANLEX_MN_WEIGHT": 0.0024,
+        "CANLEX_MN_CAP": 0.012,
+        "CANLEX_REG_PENALTY": 0.008,
+        "CANLEX_BACKMATTER_PENALTY": 0.008
+      },
+      "metrics": {
+        "hit1": 0.79,
+        "hit3": 0.96,
+        "hit5": 0.98,
+        "hit10": 0.99,
+        "mrr": 0.87
+      }
+    },
+    {
+      "values": {
+        "CANLEX_MN_WEIGHT": 0.005,
+        "CANLEX_MN_CAP": 0.012,
+        "CANLEX_REG_PENALTY": 0.008,
+        "CANLEX_BACKMATTER_PENALTY": 0.008
+      },
+      "metrics": {
+        "hit1": 0.78,
+        "hit3": 0.94,
+        "hit5": 0.97,
+        "hit10": 0.99,
+        "mrr": 0.86
+      }
+    },
+    {
+      "values": {
+        "CANLEX_MN_WEIGHT": 0.01,
+        "CANLEX_MN_CAP": 0.012,
+        "CANLEX_REG_PENALTY": 0.008,
+        "CANLEX_BACKMATTER_PENALTY": 0.008
+      },
+      "metrics": {
+        "hit1": 0.75,
+        "hit3": 0.94,
+        "hit5": 0.96,
+        "hit10": 0.99,
+        "mrr": 0.84
+      }
+    },
+    {
+      "values": {
+        "CANLEX_MN_WEIGHT": 0.0024,
+        "CANLEX_MN_CAP": 0.006,
+        "CANLEX_REG_PENALTY": 0.008,
+        "CANLEX_BACKMATTER_PENALTY": 0.008
+      },
+      "metrics": {
+        "hit1": 0.77,
+        "hit3": 0.93,
+        "hit5": 0.96,
+        "hit10": 0.99,
+        "mrr": 0.85
+      }
+    },
+    {
+      "values": {
+        "CANLEX_MN_WEIGHT": 0.0024,
+        "CANLEX_MN_CAP": 0.012,
+        "CANLEX_REG_PENALTY": 0.008,
+        "CANLEX_BACKMATTER_PENALTY": 0.008
+      },
+      "metrics": {
+        "hit1": 0.79,
+        "hit3": 0.96,
+        "hit5": 0.98,
+        "hit10": 0.99,
+        "mrr": 0.87
+      }
+    },
+    {
+      "values": {
+        "CANLEX_MN_WEIGHT": 0.0024,
+        "CANLEX_MN_CAP": 0.024,
+        "CANLEX_REG_PENALTY": 0.008,
+        "CANLEX_BACKMATTER_PENALTY": 0.008
+      },
+      "metrics": {
+        "hit1": 0.77,
+        "hit3": 0.94,
+        "hit5": 0.98,
+        "hit10": 0.99,
+        "mrr": 0.86
+      }
+    },
+    {
+      "values": {
+        "CANLEX_MN_WEIGHT": 0.0024,
+        "CANLEX_MN_CAP": 0.05,
+        "CANLEX_REG_PENALTY": 0.008,
+        "CANLEX_BACKMATTER_PENALTY": 0.008
+      },
+      "metrics": {
+        "hit1": 0.77,
+        "hit3": 0.94,
+        "hit5": 0.97,
+        "hit10": 0.99,
+        "mrr": 0.86
+      }
+    },
+    {
+      "values": {
+        "CANLEX_MN_WEIGHT": 0.0024,
+        "CANLEX_MN_CAP": 0.012,
+        "CANLEX_REG_PENALTY": 0.004,
+        "CANLEX_BACKMATTER_PENALTY": 0.008
+      },
+      "metrics": {
+        "hit1": 0.79,
+        "hit3": 0.96,
+        "hit5": 0.98,
+        "hit10": 0.99,
+        "mrr": 0.88
+      }
+    },
+    {
+      "values": {
+        "CANLEX_MN_WEIGHT": 0.0024,
+        "CANLEX_MN_CAP": 0.012,
+        "CANLEX_REG_PENALTY": 0.008,
+        "CANLEX_BACKMATTER_PENALTY": 0.008
+      },
+      "metrics": {
+        "hit1": 0.79,
+        "hit3": 0.96,
+        "hit5": 0.98,
+        "hit10": 0.99,
+        "mrr": 0.87
+      }
+    },
+    {
+      "values": {
+        "CANLEX_MN_WEIGHT": 0.0024,
+        "CANLEX_MN_CAP": 0.012,
+        "CANLEX_REG_PENALTY": 0.016,
+        "CANLEX_BACKMATTER_PENALTY": 0.008
+      },
+      "metrics": {
+        "hit1": 0.79,
+        "hit3": 0.96,
+        "hit5": 0.98,
+        "hit10": 0.99,
+        "mrr": 0.88
+      }
+    },
+    {
+      "values": {
+        "CANLEX_MN_WEIGHT": 0.0024,
+        "CANLEX_MN_CAP": 0.012,
+        "CANLEX_REG_PENALTY": 0.032,
+        "CANLEX_BACKMATTER_PENALTY": 0.008
+      },
+      "metrics": {
+        "hit1": 0.79,
+        "hit3": 0.96,
+        "hit5": 0.98,
+        "hit10": 0.99,
+        "mrr": 0.88
+      }
+    },
+    {
+      "values": {
+        "CANLEX_MN_WEIGHT": 0.0024,
+        "CANLEX_MN_CAP": 0.012,
+        "CANLEX_REG_PENALTY": 0.004,
+        "CANLEX_BACKMATTER_PENALTY": 0.004
+      },
+      "metrics": {
+        "hit1": 0.79,
+        "hit3": 0.96,
+        "hit5": 0.99,
+        "hit10": 0.99,
+        "mrr": 0.88
+      }
+    },
+    {
+      "values": {
+        "CANLEX_MN_WEIGHT": 0.0024,
+        "CANLEX_MN_CAP": 0.012,
+        "CANLEX_REG_PENALTY": 0.004,
+        "CANLEX_BACKMATTER_PENALTY": 0.008
+      },
+      "metrics": {
+        "hit1": 0.79,
+        "hit3": 0.96,
+        "hit5": 0.98,
+        "hit10": 0.99,
+        "mrr": 0.88
+      }
+    },
+    {
+      "values": {
+        "CANLEX_MN_WEIGHT": 0.0024,
+        "CANLEX_MN_CAP": 0.012,
+        "CANLEX_REG_PENALTY": 0.004,
+        "CANLEX_BACKMATTER_PENALTY": 0.016
+      },
+      "metrics": {
+        "hit1": 0.79,
+        "hit3": 0.96,
+        "hit5": 0.98,
+        "hit10": 0.99,
+        "mrr": 0.87
+      }
+    },
+    {
+      "values": {
+        "CANLEX_MN_WEIGHT": 0.0024,
+        "CANLEX_MN_CAP": 0.012,
+        "CANLEX_REG_PENALTY": 0.004,
+        "CANLEX_BACKMATTER_PENALTY": 0.032
+      },
+      "metrics": {
+        "hit1": 0.79,
+        "hit3": 0.96,
+        "hit5": 0.99,
+        "hit10": 0.99,
+        "mrr": 0.87
+      }
+    }
+  ]
+}

data/eval/sweep.log ADDED Viewed

	@@ -0,0 +1,33 @@

+# CanLex tuning sweep -- 2026-05-23 12:59:35
+Baseline: {'CANLEX_MN_WEIGHT': 0.0024, 'CANLEX_MN_CAP': 0.012, 'CANLEX_REG_PENALTY': 0.008, 'CANLEX_BACKMATTER_PENALTY': 0.008}
+  Hit@5=0.980  MRR=0.870
+Sweeping CANLEX_MN_WEIGHT in [0.0012, 0.0024, 0.005, 0.01] (others held at {'CANLEX_MN_WEIGHT': 0.0024, 'CANLEX_MN_CAP': 0.012, 'CANLEX_REG_PENALTY': 0.008, 'CANLEX_BACKMATTER_PENALTY': 0.008})
+  CANLEX_MN_WEIGHT=0.0012   -> Hit@1=0.780 Hit@3=0.940 Hit@5=0.980 Hit@10=0.990 MRR=0.860
+  CANLEX_MN_WEIGHT=0.0024   -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.980 Hit@10=0.990 MRR=0.870
+  CANLEX_MN_WEIGHT=0.005    -> Hit@1=0.780 Hit@3=0.940 Hit@5=0.970 Hit@10=0.990 MRR=0.860
+  CANLEX_MN_WEIGHT=0.01     -> Hit@1=0.750 Hit@3=0.940 Hit@5=0.960 Hit@10=0.990 MRR=0.840
+Sweeping CANLEX_MN_CAP in [0.006, 0.012, 0.024, 0.05] (others held at {'CANLEX_MN_WEIGHT': 0.0024, 'CANLEX_MN_CAP': 0.012, 'CANLEX_REG_PENALTY': 0.008, 'CANLEX_BACKMATTER_PENALTY': 0.008})
+  CANLEX_MN_CAP=0.006    -> Hit@1=0.770 Hit@3=0.930 Hit@5=0.960 Hit@10=0.990 MRR=0.850
+  CANLEX_MN_CAP=0.012    -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.980 Hit@10=0.990 MRR=0.870
+  CANLEX_MN_CAP=0.024    -> Hit@1=0.770 Hit@3=0.940 Hit@5=0.980 Hit@10=0.990 MRR=0.860
+  CANLEX_MN_CAP=0.05     -> Hit@1=0.770 Hit@3=0.940 Hit@5=0.970 Hit@10=0.990 MRR=0.860
+Sweeping CANLEX_REG_PENALTY in [0.004, 0.008, 0.016, 0.032] (others held at {'CANLEX_MN_WEIGHT': 0.0024, 'CANLEX_MN_CAP': 0.012, 'CANLEX_REG_PENALTY': 0.008, 'CANLEX_BACKMATTER_PENALTY': 0.008})
+  CANLEX_REG_PENALTY=0.004    -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.980 Hit@10=0.990 MRR=0.880
+  CANLEX_REG_PENALTY=0.008    -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.980 Hit@10=0.990 MRR=0.870
+  CANLEX_REG_PENALTY=0.016    -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.980 Hit@10=0.990 MRR=0.880
+  CANLEX_REG_PENALTY=0.032    -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.980 Hit@10=0.990 MRR=0.880
+  ! CANLEX_REG_PENALTY: 0.008 -> 0.004
+Sweeping CANLEX_BACKMATTER_PENALTY in [0.004, 0.008, 0.016, 0.032] (others held at {'CANLEX_MN_WEIGHT': 0.0024, 'CANLEX_MN_CAP': 0.012, 'CANLEX_REG_PENALTY': 0.004, 'CANLEX_BACKMATTER_PENALTY': 0.008})
+  CANLEX_BACKMATTER_PENALTY=0.004    -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.990 Hit@10=0.990 MRR=0.880
+  CANLEX_BACKMATTER_PENALTY=0.008    -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.980 Hit@10=0.990 MRR=0.880
+  CANLEX_BACKMATTER_PENALTY=0.016    -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.980 Hit@10=0.990 MRR=0.870
+  CANLEX_BACKMATTER_PENALTY=0.032    -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.990 Hit@10=0.990 MRR=0.870
+  ! CANLEX_BACKMATTER_PENALTY: 0.008 -> 0.004
+Best: {'CANLEX_MN_WEIGHT': 0.0024, 'CANLEX_MN_CAP': 0.012, 'CANLEX_REG_PENALTY': 0.004, 'CANLEX_BACKMATTER_PENALTY': 0.004}
+  Hit@1=0.790 Hit@5=0.990  MRR=0.880

tests/test_index.py CHANGED Viewed

@@ -114,8 +114,8 @@ class DiversifyTests(unittest.TestCase):
         self.assertEqual(out, list(range(n)))           # uncapped: order intact
-class EnsureLegislationTests(unittest.TestCase):
-    def test_pulls_legislation_into_a_caselaw_dominated_top_k(self):
         idx = bare_index([
             chunk(doc_type="caselaw", act_code="A"),
             chunk(doc_type="caselaw", act_code="B"),
@@ -123,20 +123,38 @@ class EnsureLegislationTests(unittest.TestCase):
             chunk(doc_type="legislation"),
             chunk(doc_type="legislation"),
         ])
-        out = idx._ensure_legislation([0, 1, 2, 3, 4], top_k=3)
         top = out[:3]
-        n_leg = sum(1 for i in top
-                    if idx.chunks[i]["doc_type"] == "legislation")
-        self.assertGreaterEqual(n_leg, 2)
         self.assertEqual(out[0], 0)                     # the #1 hit is preserved
-    def test_no_op_when_legislation_already_present(self):
         idx = bare_index([
             chunk(doc_type="legislation"),
             chunk(doc_type="legislation"),
             chunk(doc_type="caselaw", act_code="A"),
         ])
-        self.assertEqual(idx._ensure_legislation([0, 1, 2], top_k=3), [0, 1, 2])
 class DocTypeFlagTests(unittest.TestCase):

         self.assertEqual(out, list(range(n)))           # uncapped: order intact
+class EnsurePrimaryTests(unittest.TestCase):
+    def test_pulls_primary_into_a_caselaw_dominated_top_k(self):
         idx = bare_index([
             chunk(doc_type="caselaw", act_code="A"),
             chunk(doc_type="caselaw", act_code="B"),
             chunk(doc_type="legislation"),
             chunk(doc_type="legislation"),
         ])
+        out = idx._ensure_primary([0, 1, 2, 3, 4], top_k=3, q_tokens=set())
         top = out[:3]
+        n_prim = sum(1 for i in top
+                     if idx.chunks[i]["doc_type"] == "legislation")
+        self.assertGreaterEqual(n_prim, 2)
         self.assertEqual(out[0], 0)                     # the #1 hit is preserved
+    def test_no_op_when_primary_already_present(self):
         idx = bare_index([
             chunk(doc_type="legislation"),
             chunk(doc_type="legislation"),
             chunk(doc_type="caselaw", act_code="A"),
         ])
+        self.assertEqual(
+            idx._ensure_primary([0, 1, 2], top_k=3, q_tokens=set()),
+            [0, 1, 2])
+    def test_counts_agreements_as_primary(self):
+        # An agreement query that surfaces only case-law in top_k should
+        # have the agreement article pulled in -- not just legislation.
+        idx = bare_index([
+            chunk(doc_type="caselaw", act_code="A"),
+            chunk(doc_type="caselaw", act_code="B"),
+            chunk(doc_type="caselaw", act_code="C"),
+            chunk(doc_type="agreement", act_code="FB", section="17",
+                  marginal_note="discipline"),
+            chunk(doc_type="agreement", act_code="FB", section="25",
+                  marginal_note="hours of work"),
+        ])
+        out = idx._ensure_primary([0, 1, 2, 3, 4], top_k=3, q_tokens=set())
+        top_doc_types = [idx.chunks[i]["doc_type"] for i in out[:3]]
+        self.assertGreaterEqual(top_doc_types.count("agreement"), 2)
 class DocTypeFlagTests(unittest.TestCase):