Beemer Claude Opus 4.7 commited on
Commit
a7a22f5
·
1 Parent(s): df55f26

Sweep-tune the regulation and back-matter penalties; revert the failed swap

Browse files

Coordinate-descent sweep over the four CANLEX_* knobs converged on
halving the two penalties: REG_PENALTY 0.008 -> 0.004 and
BACKMATTER_PENALTY 0.008 -> 0.004 (MN_WEIGHT and MN_CAP unchanged).
The previous values over-penalised regulations and agreement
back-matter against legitimately-on-topic candidates of those kinds.

The within-source title-match swap explored alongside this is reverted:
it traded one miss (FB Agreement s. 17 moves from #6 to #5) for one
new miss (IRPA s. 20 falls from top 5 to #10), because the title-match
heuristic is fooled by surface vocabulary overlap (IRPA s. 192
"Immigration Appeal Division" wins against an IAD-appeals query on
title alone, even though the operative rule is s. 64). Net Hit@3 was
-0.01, so not worth the complexity.

EnsureLegislationTests renamed to EnsurePrimaryTests + a new test that
agreement candidates are eligible for the guarantee.

141-question eval: Hit@1 0.79 / Hit@3 0.96 / Hit@5 0.99 / Hit@10 0.99
/ MRR 0.88 (vs pre-sweep 0.79 / 0.96 / 0.98 / 0.99 / 0.87 -- Hit@5
and MRR each +0.01; 3 misses -> 2). Remaining misses: IRPA s. 64
(Chieu outranks on IAD appeals) and Khosa (Chieu outranks on IAD
deference). The sweep log + JSON summary are committed for the record.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

Files changed (4) hide show
  1. canlex/index.py +10 -8
  2. data/eval/sweep.json +272 -0
  3. data/eval/sweep.log +33 -0
  4. tests/test_index.py +26 -8
canlex/index.py CHANGED
@@ -23,12 +23,14 @@ MN_WEIGHT = float(os.environ.get("CANLEX_MN_WEIGHT", "0.0024"))
23
  MN_CAP = float(os.environ.get("CANLEX_MN_CAP", "0.012"))
24
  # ceiling on the title-match boost -- it nudges the ranking
25
  # without overriding a strong base score
26
- REG_PENALTY = float(os.environ.get("CANLEX_REG_PENALTY", "0.008"))
27
  # small fusion penalty on regulation sections, so the Act
28
  # that creates a duty outranks the regulation elaborating it
29
- BACKMATTER_PENALTY = float(os.environ.get("CANLEX_BACKMATTER_PENALTY", "0.008"))
 
30
  # likewise for a collective agreement's back-matter
31
  # (memoranda, letters of understanding) vs its numbered articles
 
32
  SOURCE_CAP = 2 # max chunks one case or memorandum may contribute
33
  APPENDIX_CAP = 3 # max referenced appendices co-surfaced into a result set
34
 
@@ -548,12 +550,12 @@ class LegislationIndex:
548
  pinned_set = set(pinned)
549
  candidates = pinned + [i for i in candidates if i not in pinned_set]
550
 
551
- # Cap one-source monopolies, then guarantee a primary instrument
552
- # (statute/agreement/directive/delegation) on the topic is represented.
553
- # The guarantee operates on a fixed visible window (5), not the full
554
- # top_k -- with top_k=20 (the eval default) the larger window almost
555
- # always contains incidental legislation, so the guarantee never fires
556
- # even when the GOVERNING provision is buried at rank 10+.
557
  candidates = self._diversify(candidates)
558
  candidates = self._ensure_primary(candidates, min(top_k, 5), q_tokens)
559
 
 
23
  MN_CAP = float(os.environ.get("CANLEX_MN_CAP", "0.012"))
24
  # ceiling on the title-match boost -- it nudges the ranking
25
  # without overriding a strong base score
26
+ REG_PENALTY = float(os.environ.get("CANLEX_REG_PENALTY", "0.004"))
27
  # small fusion penalty on regulation sections, so the Act
28
  # that creates a duty outranks the regulation elaborating it
29
+ # (sweep-tuned 2026-05-23 from 0.008 -> 0.004; see sweep.log)
30
+ BACKMATTER_PENALTY = float(os.environ.get("CANLEX_BACKMATTER_PENALTY", "0.004"))
31
  # likewise for a collective agreement's back-matter
32
  # (memoranda, letters of understanding) vs its numbered articles
33
+ # (sweep-tuned 2026-05-23 from 0.008 -> 0.004)
34
  SOURCE_CAP = 2 # max chunks one case or memorandum may contribute
35
  APPENDIX_CAP = 3 # max referenced appendices co-surfaced into a result set
36
 
 
550
  pinned_set = set(pinned)
551
  candidates = pinned + [i for i in candidates if i not in pinned_set]
552
 
553
+ # Cap one-source monopolies, then guarantee a primary instrument on
554
+ # the topic is represented. The guarantee operates on a fixed visible
555
+ # window of min(top_k, 5), not the full top_k -- with top_k=20 (the
556
+ # eval default) the larger window almost always contains incidental
557
+ # legislation, so the guarantee never fires even when the governing
558
+ # provision is buried at rank 10+.
559
  candidates = self._diversify(candidates)
560
  candidates = self._ensure_primary(candidates, min(top_k, 5), q_tokens)
561
 
data/eval/sweep.json ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best": {
3
+ "CANLEX_MN_WEIGHT": 0.0024,
4
+ "CANLEX_MN_CAP": 0.012,
5
+ "CANLEX_REG_PENALTY": 0.004,
6
+ "CANLEX_BACKMATTER_PENALTY": 0.004
7
+ },
8
+ "best_metrics": {
9
+ "hit1": 0.79,
10
+ "hit3": 0.96,
11
+ "hit5": 0.99,
12
+ "hit10": 0.99,
13
+ "mrr": 0.88
14
+ },
15
+ "runs": [
16
+ {
17
+ "values": {
18
+ "CANLEX_MN_WEIGHT": 0.0024,
19
+ "CANLEX_MN_CAP": 0.012,
20
+ "CANLEX_REG_PENALTY": 0.008,
21
+ "CANLEX_BACKMATTER_PENALTY": 0.008
22
+ },
23
+ "metrics": {
24
+ "hit1": 0.79,
25
+ "hit3": 0.96,
26
+ "hit5": 0.98,
27
+ "hit10": 0.99,
28
+ "mrr": 0.87
29
+ }
30
+ },
31
+ {
32
+ "values": {
33
+ "CANLEX_MN_WEIGHT": 0.0012,
34
+ "CANLEX_MN_CAP": 0.012,
35
+ "CANLEX_REG_PENALTY": 0.008,
36
+ "CANLEX_BACKMATTER_PENALTY": 0.008
37
+ },
38
+ "metrics": {
39
+ "hit1": 0.78,
40
+ "hit3": 0.94,
41
+ "hit5": 0.98,
42
+ "hit10": 0.99,
43
+ "mrr": 0.86
44
+ }
45
+ },
46
+ {
47
+ "values": {
48
+ "CANLEX_MN_WEIGHT": 0.0024,
49
+ "CANLEX_MN_CAP": 0.012,
50
+ "CANLEX_REG_PENALTY": 0.008,
51
+ "CANLEX_BACKMATTER_PENALTY": 0.008
52
+ },
53
+ "metrics": {
54
+ "hit1": 0.79,
55
+ "hit3": 0.96,
56
+ "hit5": 0.98,
57
+ "hit10": 0.99,
58
+ "mrr": 0.87
59
+ }
60
+ },
61
+ {
62
+ "values": {
63
+ "CANLEX_MN_WEIGHT": 0.005,
64
+ "CANLEX_MN_CAP": 0.012,
65
+ "CANLEX_REG_PENALTY": 0.008,
66
+ "CANLEX_BACKMATTER_PENALTY": 0.008
67
+ },
68
+ "metrics": {
69
+ "hit1": 0.78,
70
+ "hit3": 0.94,
71
+ "hit5": 0.97,
72
+ "hit10": 0.99,
73
+ "mrr": 0.86
74
+ }
75
+ },
76
+ {
77
+ "values": {
78
+ "CANLEX_MN_WEIGHT": 0.01,
79
+ "CANLEX_MN_CAP": 0.012,
80
+ "CANLEX_REG_PENALTY": 0.008,
81
+ "CANLEX_BACKMATTER_PENALTY": 0.008
82
+ },
83
+ "metrics": {
84
+ "hit1": 0.75,
85
+ "hit3": 0.94,
86
+ "hit5": 0.96,
87
+ "hit10": 0.99,
88
+ "mrr": 0.84
89
+ }
90
+ },
91
+ {
92
+ "values": {
93
+ "CANLEX_MN_WEIGHT": 0.0024,
94
+ "CANLEX_MN_CAP": 0.006,
95
+ "CANLEX_REG_PENALTY": 0.008,
96
+ "CANLEX_BACKMATTER_PENALTY": 0.008
97
+ },
98
+ "metrics": {
99
+ "hit1": 0.77,
100
+ "hit3": 0.93,
101
+ "hit5": 0.96,
102
+ "hit10": 0.99,
103
+ "mrr": 0.85
104
+ }
105
+ },
106
+ {
107
+ "values": {
108
+ "CANLEX_MN_WEIGHT": 0.0024,
109
+ "CANLEX_MN_CAP": 0.012,
110
+ "CANLEX_REG_PENALTY": 0.008,
111
+ "CANLEX_BACKMATTER_PENALTY": 0.008
112
+ },
113
+ "metrics": {
114
+ "hit1": 0.79,
115
+ "hit3": 0.96,
116
+ "hit5": 0.98,
117
+ "hit10": 0.99,
118
+ "mrr": 0.87
119
+ }
120
+ },
121
+ {
122
+ "values": {
123
+ "CANLEX_MN_WEIGHT": 0.0024,
124
+ "CANLEX_MN_CAP": 0.024,
125
+ "CANLEX_REG_PENALTY": 0.008,
126
+ "CANLEX_BACKMATTER_PENALTY": 0.008
127
+ },
128
+ "metrics": {
129
+ "hit1": 0.77,
130
+ "hit3": 0.94,
131
+ "hit5": 0.98,
132
+ "hit10": 0.99,
133
+ "mrr": 0.86
134
+ }
135
+ },
136
+ {
137
+ "values": {
138
+ "CANLEX_MN_WEIGHT": 0.0024,
139
+ "CANLEX_MN_CAP": 0.05,
140
+ "CANLEX_REG_PENALTY": 0.008,
141
+ "CANLEX_BACKMATTER_PENALTY": 0.008
142
+ },
143
+ "metrics": {
144
+ "hit1": 0.77,
145
+ "hit3": 0.94,
146
+ "hit5": 0.97,
147
+ "hit10": 0.99,
148
+ "mrr": 0.86
149
+ }
150
+ },
151
+ {
152
+ "values": {
153
+ "CANLEX_MN_WEIGHT": 0.0024,
154
+ "CANLEX_MN_CAP": 0.012,
155
+ "CANLEX_REG_PENALTY": 0.004,
156
+ "CANLEX_BACKMATTER_PENALTY": 0.008
157
+ },
158
+ "metrics": {
159
+ "hit1": 0.79,
160
+ "hit3": 0.96,
161
+ "hit5": 0.98,
162
+ "hit10": 0.99,
163
+ "mrr": 0.88
164
+ }
165
+ },
166
+ {
167
+ "values": {
168
+ "CANLEX_MN_WEIGHT": 0.0024,
169
+ "CANLEX_MN_CAP": 0.012,
170
+ "CANLEX_REG_PENALTY": 0.008,
171
+ "CANLEX_BACKMATTER_PENALTY": 0.008
172
+ },
173
+ "metrics": {
174
+ "hit1": 0.79,
175
+ "hit3": 0.96,
176
+ "hit5": 0.98,
177
+ "hit10": 0.99,
178
+ "mrr": 0.87
179
+ }
180
+ },
181
+ {
182
+ "values": {
183
+ "CANLEX_MN_WEIGHT": 0.0024,
184
+ "CANLEX_MN_CAP": 0.012,
185
+ "CANLEX_REG_PENALTY": 0.016,
186
+ "CANLEX_BACKMATTER_PENALTY": 0.008
187
+ },
188
+ "metrics": {
189
+ "hit1": 0.79,
190
+ "hit3": 0.96,
191
+ "hit5": 0.98,
192
+ "hit10": 0.99,
193
+ "mrr": 0.88
194
+ }
195
+ },
196
+ {
197
+ "values": {
198
+ "CANLEX_MN_WEIGHT": 0.0024,
199
+ "CANLEX_MN_CAP": 0.012,
200
+ "CANLEX_REG_PENALTY": 0.032,
201
+ "CANLEX_BACKMATTER_PENALTY": 0.008
202
+ },
203
+ "metrics": {
204
+ "hit1": 0.79,
205
+ "hit3": 0.96,
206
+ "hit5": 0.98,
207
+ "hit10": 0.99,
208
+ "mrr": 0.88
209
+ }
210
+ },
211
+ {
212
+ "values": {
213
+ "CANLEX_MN_WEIGHT": 0.0024,
214
+ "CANLEX_MN_CAP": 0.012,
215
+ "CANLEX_REG_PENALTY": 0.004,
216
+ "CANLEX_BACKMATTER_PENALTY": 0.004
217
+ },
218
+ "metrics": {
219
+ "hit1": 0.79,
220
+ "hit3": 0.96,
221
+ "hit5": 0.99,
222
+ "hit10": 0.99,
223
+ "mrr": 0.88
224
+ }
225
+ },
226
+ {
227
+ "values": {
228
+ "CANLEX_MN_WEIGHT": 0.0024,
229
+ "CANLEX_MN_CAP": 0.012,
230
+ "CANLEX_REG_PENALTY": 0.004,
231
+ "CANLEX_BACKMATTER_PENALTY": 0.008
232
+ },
233
+ "metrics": {
234
+ "hit1": 0.79,
235
+ "hit3": 0.96,
236
+ "hit5": 0.98,
237
+ "hit10": 0.99,
238
+ "mrr": 0.88
239
+ }
240
+ },
241
+ {
242
+ "values": {
243
+ "CANLEX_MN_WEIGHT": 0.0024,
244
+ "CANLEX_MN_CAP": 0.012,
245
+ "CANLEX_REG_PENALTY": 0.004,
246
+ "CANLEX_BACKMATTER_PENALTY": 0.016
247
+ },
248
+ "metrics": {
249
+ "hit1": 0.79,
250
+ "hit3": 0.96,
251
+ "hit5": 0.98,
252
+ "hit10": 0.99,
253
+ "mrr": 0.87
254
+ }
255
+ },
256
+ {
257
+ "values": {
258
+ "CANLEX_MN_WEIGHT": 0.0024,
259
+ "CANLEX_MN_CAP": 0.012,
260
+ "CANLEX_REG_PENALTY": 0.004,
261
+ "CANLEX_BACKMATTER_PENALTY": 0.032
262
+ },
263
+ "metrics": {
264
+ "hit1": 0.79,
265
+ "hit3": 0.96,
266
+ "hit5": 0.99,
267
+ "hit10": 0.99,
268
+ "mrr": 0.87
269
+ }
270
+ }
271
+ ]
272
+ }
data/eval/sweep.log ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CanLex tuning sweep -- 2026-05-23 12:59:35
2
+
3
+ Baseline: {'CANLEX_MN_WEIGHT': 0.0024, 'CANLEX_MN_CAP': 0.012, 'CANLEX_REG_PENALTY': 0.008, 'CANLEX_BACKMATTER_PENALTY': 0.008}
4
+ Hit@5=0.980 MRR=0.870
5
+
6
+ Sweeping CANLEX_MN_WEIGHT in [0.0012, 0.0024, 0.005, 0.01] (others held at {'CANLEX_MN_WEIGHT': 0.0024, 'CANLEX_MN_CAP': 0.012, 'CANLEX_REG_PENALTY': 0.008, 'CANLEX_BACKMATTER_PENALTY': 0.008})
7
+ CANLEX_MN_WEIGHT=0.0012 -> Hit@1=0.780 Hit@3=0.940 Hit@5=0.980 Hit@10=0.990 MRR=0.860
8
+ CANLEX_MN_WEIGHT=0.0024 -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.980 Hit@10=0.990 MRR=0.870
9
+ CANLEX_MN_WEIGHT=0.005 -> Hit@1=0.780 Hit@3=0.940 Hit@5=0.970 Hit@10=0.990 MRR=0.860
10
+ CANLEX_MN_WEIGHT=0.01 -> Hit@1=0.750 Hit@3=0.940 Hit@5=0.960 Hit@10=0.990 MRR=0.840
11
+
12
+ Sweeping CANLEX_MN_CAP in [0.006, 0.012, 0.024, 0.05] (others held at {'CANLEX_MN_WEIGHT': 0.0024, 'CANLEX_MN_CAP': 0.012, 'CANLEX_REG_PENALTY': 0.008, 'CANLEX_BACKMATTER_PENALTY': 0.008})
13
+ CANLEX_MN_CAP=0.006 -> Hit@1=0.770 Hit@3=0.930 Hit@5=0.960 Hit@10=0.990 MRR=0.850
14
+ CANLEX_MN_CAP=0.012 -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.980 Hit@10=0.990 MRR=0.870
15
+ CANLEX_MN_CAP=0.024 -> Hit@1=0.770 Hit@3=0.940 Hit@5=0.980 Hit@10=0.990 MRR=0.860
16
+ CANLEX_MN_CAP=0.05 -> Hit@1=0.770 Hit@3=0.940 Hit@5=0.970 Hit@10=0.990 MRR=0.860
17
+
18
+ Sweeping CANLEX_REG_PENALTY in [0.004, 0.008, 0.016, 0.032] (others held at {'CANLEX_MN_WEIGHT': 0.0024, 'CANLEX_MN_CAP': 0.012, 'CANLEX_REG_PENALTY': 0.008, 'CANLEX_BACKMATTER_PENALTY': 0.008})
19
+ CANLEX_REG_PENALTY=0.004 -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.980 Hit@10=0.990 MRR=0.880
20
+ CANLEX_REG_PENALTY=0.008 -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.980 Hit@10=0.990 MRR=0.870
21
+ CANLEX_REG_PENALTY=0.016 -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.980 Hit@10=0.990 MRR=0.880
22
+ CANLEX_REG_PENALTY=0.032 -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.980 Hit@10=0.990 MRR=0.880
23
+ ! CANLEX_REG_PENALTY: 0.008 -> 0.004
24
+
25
+ Sweeping CANLEX_BACKMATTER_PENALTY in [0.004, 0.008, 0.016, 0.032] (others held at {'CANLEX_MN_WEIGHT': 0.0024, 'CANLEX_MN_CAP': 0.012, 'CANLEX_REG_PENALTY': 0.004, 'CANLEX_BACKMATTER_PENALTY': 0.008})
26
+ CANLEX_BACKMATTER_PENALTY=0.004 -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.990 Hit@10=0.990 MRR=0.880
27
+ CANLEX_BACKMATTER_PENALTY=0.008 -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.980 Hit@10=0.990 MRR=0.880
28
+ CANLEX_BACKMATTER_PENALTY=0.016 -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.980 Hit@10=0.990 MRR=0.870
29
+ CANLEX_BACKMATTER_PENALTY=0.032 -> Hit@1=0.790 Hit@3=0.960 Hit@5=0.990 Hit@10=0.990 MRR=0.870
30
+ ! CANLEX_BACKMATTER_PENALTY: 0.008 -> 0.004
31
+
32
+ Best: {'CANLEX_MN_WEIGHT': 0.0024, 'CANLEX_MN_CAP': 0.012, 'CANLEX_REG_PENALTY': 0.004, 'CANLEX_BACKMATTER_PENALTY': 0.004}
33
+ Hit@1=0.790 Hit@5=0.990 MRR=0.880
tests/test_index.py CHANGED
@@ -114,8 +114,8 @@ class DiversifyTests(unittest.TestCase):
114
  self.assertEqual(out, list(range(n))) # uncapped: order intact
115
 
116
 
117
- class EnsureLegislationTests(unittest.TestCase):
118
- def test_pulls_legislation_into_a_caselaw_dominated_top_k(self):
119
  idx = bare_index([
120
  chunk(doc_type="caselaw", act_code="A"),
121
  chunk(doc_type="caselaw", act_code="B"),
@@ -123,20 +123,38 @@ class EnsureLegislationTests(unittest.TestCase):
123
  chunk(doc_type="legislation"),
124
  chunk(doc_type="legislation"),
125
  ])
126
- out = idx._ensure_legislation([0, 1, 2, 3, 4], top_k=3)
127
  top = out[:3]
128
- n_leg = sum(1 for i in top
129
- if idx.chunks[i]["doc_type"] == "legislation")
130
- self.assertGreaterEqual(n_leg, 2)
131
  self.assertEqual(out[0], 0) # the #1 hit is preserved
132
 
133
- def test_no_op_when_legislation_already_present(self):
134
  idx = bare_index([
135
  chunk(doc_type="legislation"),
136
  chunk(doc_type="legislation"),
137
  chunk(doc_type="caselaw", act_code="A"),
138
  ])
139
- self.assertEqual(idx._ensure_legislation([0, 1, 2], top_k=3), [0, 1, 2])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
 
142
  class DocTypeFlagTests(unittest.TestCase):
 
114
  self.assertEqual(out, list(range(n))) # uncapped: order intact
115
 
116
 
117
+ class EnsurePrimaryTests(unittest.TestCase):
118
+ def test_pulls_primary_into_a_caselaw_dominated_top_k(self):
119
  idx = bare_index([
120
  chunk(doc_type="caselaw", act_code="A"),
121
  chunk(doc_type="caselaw", act_code="B"),
 
123
  chunk(doc_type="legislation"),
124
  chunk(doc_type="legislation"),
125
  ])
126
+ out = idx._ensure_primary([0, 1, 2, 3, 4], top_k=3, q_tokens=set())
127
  top = out[:3]
128
+ n_prim = sum(1 for i in top
129
+ if idx.chunks[i]["doc_type"] == "legislation")
130
+ self.assertGreaterEqual(n_prim, 2)
131
  self.assertEqual(out[0], 0) # the #1 hit is preserved
132
 
133
+ def test_no_op_when_primary_already_present(self):
134
  idx = bare_index([
135
  chunk(doc_type="legislation"),
136
  chunk(doc_type="legislation"),
137
  chunk(doc_type="caselaw", act_code="A"),
138
  ])
139
+ self.assertEqual(
140
+ idx._ensure_primary([0, 1, 2], top_k=3, q_tokens=set()),
141
+ [0, 1, 2])
142
+
143
+ def test_counts_agreements_as_primary(self):
144
+ # An agreement query that surfaces only case-law in top_k should
145
+ # have the agreement article pulled in -- not just legislation.
146
+ idx = bare_index([
147
+ chunk(doc_type="caselaw", act_code="A"),
148
+ chunk(doc_type="caselaw", act_code="B"),
149
+ chunk(doc_type="caselaw", act_code="C"),
150
+ chunk(doc_type="agreement", act_code="FB", section="17",
151
+ marginal_note="discipline"),
152
+ chunk(doc_type="agreement", act_code="FB", section="25",
153
+ marginal_note="hours of work"),
154
+ ])
155
+ out = idx._ensure_primary([0, 1, 2, 3, 4], top_k=3, q_tokens=set())
156
+ top_doc_types = [idx.chunks[i]["doc_type"] for i in out[:3]]
157
+ self.assertGreaterEqual(top_doc_types.count("agreement"), 2)
158
 
159
 
160
  class DocTypeFlagTests(unittest.TestCase):