shreyask commited on
Commit
eb89325
·
verified ·
1 Parent(s): bf69e75

feat: align browser demo with qmd pipeline

Browse files
src/App.tsx CHANGED
@@ -1,8 +1,8 @@
1
- import { useState, useEffect, useCallback } from 'react';
2
  import type { Document, Chunk, EmbeddedChunk, ModelState } from './types';
3
  import { loadAllModels, isAllModelsReady } from './pipeline/models';
4
  import { chunkDocument, extractTitle } from './pipeline/chunking';
5
- import { embedDocChunk } from './pipeline/embeddings';
6
  import { BM25Index } from './pipeline/bm25';
7
  import { runPipeline } from './pipeline/orchestrator';
8
  import type { PipelineState } from './components/PipelineView';
@@ -11,7 +11,6 @@ import ModelStatus from './components/ModelStatus';
11
  import PipelineView from './components/PipelineView';
12
  import DocumentManager from './components/DocumentManager';
13
 
14
- // Sample doc filenames to load from public/eval-docs/
15
  const SAMPLE_DOCS = [
16
  'api-design-principles.md',
17
  'distributed-systems-overview.md',
@@ -19,6 +18,19 @@ const SAMPLE_DOCS = [
19
  'history-of-coffee.md',
20
  ];
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  const INITIAL_PIPELINE: PipelineState = {
23
  expansion: { status: 'idle' },
24
  search: { status: 'idle' },
@@ -27,6 +39,51 @@ const INITIAL_PIPELINE: PipelineState = {
27
  blend: { status: 'idle' },
28
  };
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  function App() {
31
  const [models, setModels] = useState<ModelState[]>([
32
  { name: 'embedding', status: 'pending', progress: 0 },
@@ -39,87 +96,150 @@ function App() {
39
  const [bm25Index, setBm25Index] = useState<BM25Index | null>(null);
40
  const [pipeline, setPipeline] = useState<PipelineState>(INITIAL_PIPELINE);
41
  const [indexing, setIndexing] = useState(false);
 
42
  const [query, setQuery] = useState('');
 
 
 
 
 
 
43
 
44
- // Load models on mount
45
  useEffect(() => {
46
  loadAllModels((state) => {
47
- setModels(prev => prev.map(m => m.name === state.name ? state : m));
 
 
48
  }).catch(console.error);
49
  }, []);
50
 
51
- // Load sample documents
52
  useEffect(() => {
53
  async function loadSampleDocs() {
54
- const docs: Document[] = [];
55
- for (const filename of SAMPLE_DOCS) {
56
- const resp = await fetch(`/eval-docs/${filename}`);
57
- const body = await resp.text();
58
- const title = extractTitle(body, filename);
59
- docs.push({ id: filename, title, body, filepath: filename });
 
 
 
 
 
 
60
  }
61
- setDocuments(docs);
62
  }
 
63
  loadSampleDocs();
64
  }, []);
65
 
66
- // When documents change, chunk them and build BM25 index
67
- // When embedding model becomes ready, embed the chunks
68
  useEffect(() => {
69
- if (documents.length === 0) return;
 
 
 
 
 
 
 
70
 
71
- const allChunks = documents.flatMap(doc => chunkDocument(doc));
72
- setChunks(allChunks);
73
- setBm25Index(new BM25Index(allChunks));
 
 
 
 
74
 
75
- // Check if embedding model is ready for embedding
76
- const embeddingReady = models.find(m => m.name === 'embedding')?.status === 'ready';
77
- if (embeddingReady && allChunks.length > 0) {
 
 
 
 
 
 
 
78
  setIndexing(true);
79
- (async () => {
80
- const embedded: EmbeddedChunk[] = [];
81
- for (const chunk of allChunks) {
82
- const embedding = await embedDocChunk(chunk.title, chunk.text);
 
 
 
 
 
 
 
 
 
 
 
83
  embedded.push({ ...chunk, embedding });
84
  }
85
- setEmbeddedChunks(embedded);
86
- setIndexing(false);
87
- })();
 
 
 
 
 
 
 
 
88
  }
89
- }, [documents, models]);
90
 
91
- // Handle user upload
 
 
 
 
 
 
 
 
 
 
 
92
  const handleUpload = useCallback(async (files: FileList) => {
93
- const newDocs: Document[] = [];
94
- for (const file of Array.from(files)) {
95
- const body = await file.text();
96
- const title = extractTitle(body, file.name);
97
- newDocs.push({ id: file.name, title, body, filepath: file.name });
98
- }
99
- setDocuments(prev => [...prev, ...newDocs]);
 
 
100
  }, []);
101
 
102
- // Handle paste
103
  const handlePaste = useCallback((text: string, filename: string) => {
104
  const title = extractTitle(text, filename);
105
- setDocuments(prev => [...prev, { id: filename, title, body: text, filepath: filename }]);
 
 
106
  }, []);
107
 
108
- // Run search pipeline
109
  const handleSearch = useCallback(async (searchQuery: string) => {
110
  if (!bm25Index || embeddedChunks.length === 0) return;
111
 
 
112
  setQuery(searchQuery);
113
  setPipeline(INITIAL_PIPELINE);
114
 
115
- const gen = runPipeline({
116
  query: searchQuery,
117
  embeddedChunks,
118
  bm25Index,
119
  });
120
 
121
- for await (const event of gen) {
122
- setPipeline(prev => ({
 
 
123
  ...prev,
124
  [event.stage]: {
125
  status: event.status,
@@ -128,16 +248,12 @@ function App() {
128
  },
129
  }));
130
  }
131
- }, [bm25Index, embeddedChunks, chunks]);
132
 
133
  const allReady = isAllModelsReady() && embeddedChunks.length > 0 && !indexing;
134
 
135
- const [dark, setDark] = useState(() =>
136
- document.documentElement.getAttribute('data-theme') === 'dark'
137
- );
138
-
139
  const toggleTheme = useCallback(() => {
140
- setDark(prev => {
141
  const next = !prev;
142
  document.documentElement.setAttribute('data-theme', next ? 'dark' : 'light');
143
  localStorage.setItem('qmd-theme', next ? 'dark' : 'light');
@@ -146,11 +262,78 @@ function App() {
146
  }, []);
147
 
148
  return (
149
- <div style={{ fontFamily: 'system-ui, -apple-system, sans-serif', maxWidth: 1400, margin: '0 auto', padding: '1rem' }}>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  <header style={{ marginBottom: '1.5rem' }}>
151
- <div style={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between' }}>
152
- <h1 style={{ margin: 0, fontSize: '1.5rem', color: 'var(--text)' }}>QMD Web Demo</h1>
153
- <div style={{ display: 'flex', alignItems: 'center', gap: '0.75rem' }}>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  <a
155
  href="https://github.com/tobi/qmd"
156
  target="_blank"
@@ -159,24 +342,25 @@ function App() {
159
  fontSize: '0.78rem',
160
  color: 'var(--text-secondary)',
161
  textDecoration: 'none',
162
- padding: '0.3rem 0.6rem',
163
  border: '1px solid var(--border)',
164
- borderRadius: '5px',
165
  fontFamily: 'system-ui, -apple-system, sans-serif',
 
166
  }}
167
- onMouseEnter={e => { (e.currentTarget as HTMLAnchorElement).style.color = '#4285F4'; }}
168
- onMouseLeave={e => { (e.currentTarget as HTMLAnchorElement).style.color = 'var(--text-secondary)'; }}
169
  >
170
- GitHub
171
  </a>
172
  <button
173
  onClick={toggleTheme}
174
  title={dark ? 'Switch to light mode' : 'Switch to dark mode'}
175
  style={{
176
- background: 'none',
177
  border: '1px solid var(--border)',
178
- borderRadius: '5px',
179
- padding: '0.3rem 0.55rem',
180
  cursor: 'pointer',
181
  fontSize: '1rem',
182
  lineHeight: 1,
@@ -187,22 +371,29 @@ function App() {
187
  </button>
188
  </div>
189
  </div>
190
- <p style={{ margin: '0.25rem 0 0', color: 'var(--text-secondary)', fontSize: '0.85rem', lineHeight: 1.5 }}>
191
- In-browser hybrid search pipeline running entirely on WebGPU.
192
- Three ONNX models (embedding, reranker, query expansion) power a full
193
- search stack: query expansion, BM25 + vector search, RRF fusion, and cross-encoder reranking.
194
- Built with{' '}
195
- <a href="https://github.com/tobi/qmd" target="_blank" rel="noopener noreferrer" style={{ color: '#4285F4', textDecoration: 'none' }}>QMD</a>
196
- {' '}and{' '}
197
- <a href="https://huggingface.co/docs/transformers.js" target="_blank" rel="noopener noreferrer" style={{ color: '#4285F4', textDecoration: 'none' }}>Transformers.js</a>.
198
- </p>
199
  </header>
200
 
201
  <ModelStatus models={models} />
202
 
203
  {indexing && (
204
- <div style={{ padding: '0.5rem 1rem', background: 'var(--indexing-bg)', borderRadius: 6, marginBottom: '1rem', fontSize: '0.85rem', color: 'var(--text)' }}>
205
- Indexing documents (embedding chunks)...
 
 
 
 
 
 
 
 
 
 
206
  </div>
207
  )}
208
 
@@ -211,7 +402,7 @@ function App() {
211
  {query && <PipelineView state={pipeline} query={query} />}
212
 
213
  <DocumentManager
214
- documents={documents.map(d => ({ id: d.id, title: d.title, filepath: d.filepath }))}
215
  onUpload={handleUpload}
216
  onPaste={handlePaste}
217
  />
 
1
+ import { useState, useEffect, useCallback, useRef } from 'react';
2
  import type { Document, Chunk, EmbeddedChunk, ModelState } from './types';
3
  import { loadAllModels, isAllModelsReady } from './pipeline/models';
4
  import { chunkDocument, extractTitle } from './pipeline/chunking';
5
+ import { embedDocChunksBatch } from './pipeline/embeddings';
6
  import { BM25Index } from './pipeline/bm25';
7
  import { runPipeline } from './pipeline/orchestrator';
8
  import type { PipelineState } from './components/PipelineView';
 
11
  import PipelineView from './components/PipelineView';
12
  import DocumentManager from './components/DocumentManager';
13
 
 
14
  const SAMPLE_DOCS = [
15
  'api-design-principles.md',
16
  'distributed-systems-overview.md',
 
18
  'history-of-coffee.md',
19
  ];
20
 
21
+ const SHOWCASE_CARDS = [
22
+ {
23
+ title: 'Faithful to qmd',
24
+ body: 'BM25, vector search, query expansion, RRF fusion, and reranking follow the upstream retrieval recipe instead of flattening everything into one model call.',
25
+ },
26
+ {
27
+ title: 'Browser-native bits',
28
+ body: 'Transformers.js and WebGPU run the pipeline locally, cache model weights in the browser, and expose each stage so the search system stays inspectable.',
29
+ },
30
+ ];
31
+
32
+ const INDEX_BATCH_SIZE = 8;
33
+
34
  const INITIAL_PIPELINE: PipelineState = {
35
  expansion: { status: 'idle' },
36
  search: { status: 'idle' },
 
39
  blend: { status: 'idle' },
40
  };
41
 
42
+ function upsertDocuments(current: Document[], incoming: Document[]): Document[] {
43
+ const merged = new Map(current.map((doc) => [doc.id, doc]));
44
+ for (const doc of incoming) {
45
+ merged.set(doc.id, doc);
46
+ }
47
+ return [...merged.values()];
48
+ }
49
+
50
+ function ShowcaseCard({ title, body }: { title: string; body: string }) {
51
+ return (
52
+ <div
53
+ style={{
54
+ padding: '0.9rem 1rem',
55
+ background: 'var(--bg-card)',
56
+ border: '1px solid var(--border)',
57
+ borderRadius: '10px',
58
+ boxShadow: '0 2px 12px var(--shadow)',
59
+ }}
60
+ >
61
+ <div
62
+ style={{
63
+ marginBottom: '0.35rem',
64
+ fontSize: '0.74rem',
65
+ fontWeight: 700,
66
+ letterSpacing: '0.08em',
67
+ textTransform: 'uppercase',
68
+ color: '#4285F4',
69
+ }}
70
+ >
71
+ {title}
72
+ </div>
73
+ <p
74
+ style={{
75
+ margin: 0,
76
+ fontSize: '0.84rem',
77
+ lineHeight: 1.6,
78
+ color: 'var(--text-secondary)',
79
+ }}
80
+ >
81
+ {body}
82
+ </p>
83
+ </div>
84
+ );
85
+ }
86
+
87
  function App() {
88
  const [models, setModels] = useState<ModelState[]>([
89
  { name: 'embedding', status: 'pending', progress: 0 },
 
96
  const [bm25Index, setBm25Index] = useState<BM25Index | null>(null);
97
  const [pipeline, setPipeline] = useState<PipelineState>(INITIAL_PIPELINE);
98
  const [indexing, setIndexing] = useState(false);
99
+ const [indexingProgress, setIndexingProgress] = useState({ completed: 0, total: 0 });
100
  const [query, setQuery] = useState('');
101
+ const [dark, setDark] = useState(() =>
102
+ document.documentElement.getAttribute('data-theme') === 'dark',
103
+ );
104
+ const searchRunIdRef = useRef(0);
105
+
106
+ const embeddingReady = models.find((model) => model.name === 'embedding')?.status === 'ready';
107
 
 
108
  useEffect(() => {
109
  loadAllModels((state) => {
110
+ setModels((prev) => prev.map((model) => (
111
+ model.name === state.name ? state : model
112
+ )));
113
  }).catch(console.error);
114
  }, []);
115
 
 
116
  useEffect(() => {
117
  async function loadSampleDocs() {
118
+ try {
119
+ const loadedDocs = await Promise.all(
120
+ SAMPLE_DOCS.map(async (filename) => {
121
+ const response = await fetch(`/eval-docs/${filename}`);
122
+ const body = await response.text();
123
+ const title = extractTitle(body, filename);
124
+ return { id: filename, title, body, filepath: filename };
125
+ }),
126
+ );
127
+ setDocuments((prev) => upsertDocuments(prev, loadedDocs));
128
+ } catch (error) {
129
+ console.error(error);
130
  }
 
131
  }
132
+
133
  loadSampleDocs();
134
  }, []);
135
 
 
 
136
  useEffect(() => {
137
+ if (documents.length === 0) {
138
+ setChunks([]);
139
+ setEmbeddedChunks([]);
140
+ setBm25Index(null);
141
+ setIndexing(false);
142
+ setIndexingProgress({ completed: 0, total: 0 });
143
+ return;
144
+ }
145
 
146
+ const nextChunks = documents.flatMap((doc) => chunkDocument(doc));
147
+ setChunks(nextChunks);
148
+ setBm25Index(new BM25Index(nextChunks));
149
+ }, [documents]);
150
+
151
+ useEffect(() => {
152
+ let cancelled = false;
153
 
154
+ if (!embeddingReady || chunks.length === 0) {
155
+ setEmbeddedChunks([]);
156
+ setIndexing(false);
157
+ setIndexingProgress({ completed: 0, total: chunks.length });
158
+ return () => {
159
+ cancelled = true;
160
+ };
161
+ }
162
+
163
+ async function embedChunks() {
164
  setIndexing(true);
165
+ setIndexingProgress({ completed: 0, total: chunks.length });
166
+
167
+ const embedded: EmbeddedChunk[] = [];
168
+ for (let i = 0; i < chunks.length; i += INDEX_BATCH_SIZE) {
169
+ const batch = chunks.slice(i, i + INDEX_BATCH_SIZE);
170
+ const embeddings = await embedDocChunksBatch(
171
+ batch.map((chunk) => ({ title: chunk.title, text: chunk.text })),
172
+ );
173
+
174
+ if (cancelled) return;
175
+
176
+ for (let j = 0; j < batch.length; j++) {
177
+ const chunk = batch[j];
178
+ const embedding = embeddings[j];
179
+ if (!chunk || !embedding) continue;
180
  embedded.push({ ...chunk, embedding });
181
  }
182
+
183
+ setIndexingProgress({
184
+ completed: Math.min(i + batch.length, chunks.length),
185
+ total: chunks.length,
186
+ });
187
+ }
188
+
189
+ if (cancelled) return;
190
+
191
+ setEmbeddedChunks(embedded);
192
+ setIndexing(false);
193
  }
 
194
 
195
+ embedChunks().catch((error) => {
196
+ if (cancelled) return;
197
+ console.error(error);
198
+ setEmbeddedChunks([]);
199
+ setIndexing(false);
200
+ });
201
+
202
+ return () => {
203
+ cancelled = true;
204
+ };
205
+ }, [chunks, embeddingReady]);
206
+
207
  const handleUpload = useCallback(async (files: FileList) => {
208
+ const uploadedDocs = await Promise.all(
209
+ Array.from(files).map(async (file) => {
210
+ const body = await file.text();
211
+ const title = extractTitle(body, file.name);
212
+ return { id: file.name, title, body, filepath: file.name };
213
+ }),
214
+ );
215
+
216
+ setDocuments((prev) => upsertDocuments(prev, uploadedDocs));
217
  }, []);
218
 
 
219
  const handlePaste = useCallback((text: string, filename: string) => {
220
  const title = extractTitle(text, filename);
221
+ setDocuments((prev) => upsertDocuments(prev, [
222
+ { id: filename, title, body: text, filepath: filename },
223
+ ]));
224
  }, []);
225
 
 
226
  const handleSearch = useCallback(async (searchQuery: string) => {
227
  if (!bm25Index || embeddedChunks.length === 0) return;
228
 
229
+ const runId = ++searchRunIdRef.current;
230
  setQuery(searchQuery);
231
  setPipeline(INITIAL_PIPELINE);
232
 
233
+ const generator = runPipeline({
234
  query: searchQuery,
235
  embeddedChunks,
236
  bm25Index,
237
  });
238
 
239
+ for await (const event of generator) {
240
+ if (searchRunIdRef.current !== runId) return;
241
+
242
+ setPipeline((prev) => ({
243
  ...prev,
244
  [event.stage]: {
245
  status: event.status,
 
248
  },
249
  }));
250
  }
251
+ }, [bm25Index, embeddedChunks]);
252
 
253
  const allReady = isAllModelsReady() && embeddedChunks.length > 0 && !indexing;
254
 
 
 
 
 
255
  const toggleTheme = useCallback(() => {
256
+ setDark((prev) => {
257
  const next = !prev;
258
  document.documentElement.setAttribute('data-theme', next ? 'dark' : 'light');
259
  localStorage.setItem('qmd-theme', next ? 'dark' : 'light');
 
262
  }, []);
263
 
264
  return (
265
+ <div
266
+ style={{
267
+ fontFamily: 'system-ui, -apple-system, sans-serif',
268
+ maxWidth: 1400,
269
+ margin: '0 auto',
270
+ padding: '1.25rem 1rem 2rem',
271
+ }}
272
+ >
273
+ <style>{`
274
+ .showcase-grid {
275
+ display: grid;
276
+ grid-template-columns: repeat(2, minmax(0, 1fr));
277
+ gap: 0.85rem;
278
+ margin-top: 1rem;
279
+ }
280
+
281
+ @media (max-width: 900px) {
282
+ .showcase-grid {
283
+ grid-template-columns: 1fr;
284
+ }
285
+ }
286
+ `}</style>
287
+
288
  <header style={{ marginBottom: '1.5rem' }}>
289
+ <div style={{ display: 'flex', alignItems: 'flex-start', justifyContent: 'space-between', gap: '1rem' }}>
290
+ <div style={{ flex: 1 }}>
291
+ <div
292
+ style={{
293
+ marginBottom: '0.4rem',
294
+ fontSize: '0.74rem',
295
+ fontWeight: 700,
296
+ letterSpacing: '0.08em',
297
+ textTransform: 'uppercase',
298
+ color: '#4285F4',
299
+ }}
300
+ >
301
+ QMD in the browser
302
+ </div>
303
+ <h1 style={{ margin: 0, fontSize: '1.7rem', color: 'var(--text)' }}>
304
+ QMD Web Sandbox
305
+ </h1>
306
+ <p style={{ margin: '0.45rem 0 0', color: 'var(--text-secondary)', fontSize: '0.9rem', lineHeight: 1.65, maxWidth: 860 }}>
307
+ A browser-native sandbox that recreates the core{' '}
308
+ <a href="https://github.com/tobi/qmd" target="_blank" rel="noopener noreferrer" style={{ color: '#4285F4', textDecoration: 'none' }}>qmd</a>
309
+ {' '}retrieval pipeline with Transformers.js, while making the local WebGPU execution path visible.
310
+ Documents are chunked, embedded, searched, fused, reranked, and inspected entirely in the browser.
311
+ </p>
312
+ <div
313
+ style={{
314
+ marginTop: '0.7rem',
315
+ display: 'inline-flex',
316
+ alignItems: 'center',
317
+ gap: '0.45rem',
318
+ padding: '0.4rem 0.7rem',
319
+ borderRadius: '999px',
320
+ border: '1px solid var(--border)',
321
+ background: 'var(--bg-card)',
322
+ color: 'var(--text-secondary)',
323
+ fontSize: '0.78rem',
324
+ boxShadow: '0 2px 10px var(--shadow)',
325
+ }}
326
+ >
327
+ <span style={{ color: '#34a853', fontWeight: 700 }}>Browser bits:</span>
328
+ <span>WebGPU inference</span>
329
+ <span style={{ color: 'var(--text-muted)' }}>•</span>
330
+ <span>local model cache</span>
331
+ <span style={{ color: 'var(--text-muted)' }}>•</span>
332
+ <span>transparent pipeline</span>
333
+ </div>
334
+ </div>
335
+
336
+ <div style={{ display: 'flex', alignItems: 'center', gap: '0.75rem', flexShrink: 0 }}>
337
  <a
338
  href="https://github.com/tobi/qmd"
339
  target="_blank"
 
342
  fontSize: '0.78rem',
343
  color: 'var(--text-secondary)',
344
  textDecoration: 'none',
345
+ padding: '0.35rem 0.7rem',
346
  border: '1px solid var(--border)',
347
+ borderRadius: '999px',
348
  fontFamily: 'system-ui, -apple-system, sans-serif',
349
+ background: 'var(--bg-card)',
350
  }}
351
+ onMouseEnter={(event) => { event.currentTarget.style.color = '#4285F4'; }}
352
+ onMouseLeave={(event) => { event.currentTarget.style.color = 'var(--text-secondary)'; }}
353
  >
354
+ Original qmd
355
  </a>
356
  <button
357
  onClick={toggleTheme}
358
  title={dark ? 'Switch to light mode' : 'Switch to dark mode'}
359
  style={{
360
+ background: 'var(--bg-card)',
361
  border: '1px solid var(--border)',
362
+ borderRadius: '999px',
363
+ padding: '0.35rem 0.6rem',
364
  cursor: 'pointer',
365
  fontSize: '1rem',
366
  lineHeight: 1,
 
371
  </button>
372
  </div>
373
  </div>
374
+
375
+ <div className="showcase-grid">
376
+ {SHOWCASE_CARDS.map((card) => (
377
+ <ShowcaseCard key={card.title} title={card.title} body={card.body} />
378
+ ))}
379
+ </div>
 
 
 
380
  </header>
381
 
382
  <ModelStatus models={models} />
383
 
384
  {indexing && (
385
+ <div
386
+ style={{
387
+ padding: '0.6rem 1rem',
388
+ background: 'var(--indexing-bg)',
389
+ borderRadius: 8,
390
+ marginBottom: '1rem',
391
+ fontSize: '0.84rem',
392
+ color: 'var(--text)',
393
+ border: '1px solid var(--border)',
394
+ }}
395
+ >
396
+ Indexing local chunks in the browser ({indexingProgress.completed}/{indexingProgress.total})...
397
  </div>
398
  )}
399
 
 
402
  {query && <PipelineView state={pipeline} query={query} />}
403
 
404
  <DocumentManager
405
+ documents={documents.map((doc) => ({ id: doc.id, title: doc.title, filepath: doc.filepath }))}
406
  onUpload={handleUpload}
407
  onPaste={handlePaste}
408
  />
src/components/DocumentManager.tsx CHANGED
@@ -221,7 +221,7 @@ export default function DocumentManager({ documents, onUpload, onPaste }: Docume
221
 
222
  {documents.length === 0 ? (
223
  <p style={{ fontSize: '0.82rem', color: 'var(--text-muted)', margin: 0 }}>
224
- No documents loaded. Upload .md or .txt files, or paste text.
225
  </p>
226
  ) : (
227
  <div style={{ maxHeight: '180px', overflowY: 'auto' }}>
 
221
 
222
  {documents.length === 0 ? (
223
  <p style={{ fontSize: '0.82rem', color: 'var(--text-muted)', margin: 0 }}>
224
+ No documents loaded. Upload .md or .txt files, or paste text. They stay local to this browser session.
225
  </p>
226
  ) : (
227
  <div style={{ maxHeight: '180px', overflowY: 'auto' }}>
src/components/ExpansionColumn.tsx CHANGED
@@ -61,6 +61,24 @@ function ExpansionCard({ label, content }: { label: string; content: string | st
61
  );
62
  }
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  export default function ExpansionColumn({ state, info }: ExpansionColumnProps) {
65
  const isIdle = state.status === 'idle';
66
  const isRunning = state.status === 'running';
@@ -129,9 +147,19 @@ export default function ExpansionColumn({ state, info }: ExpansionColumnProps) {
129
 
130
  {isDone && state.data && (
131
  <>
132
- <ExpansionCard label="HyDE (Hypothetical Document)" content={state.data.hyde} />
133
- <ExpansionCard label="Vec Sentences" content={state.data.vec} />
134
- <ExpansionCard label="Lex Keywords" content={state.data.lex} />
 
 
 
 
 
 
 
 
 
 
135
  </>
136
  )}
137
  </div>
 
61
  );
62
  }
63
 
64
+ function ExpansionNote({ text }: { text: string }) {
65
+ return (
66
+ <div style={{
67
+ padding: '0.65rem 0.8rem',
68
+ marginBottom: '0.6rem',
69
+ background: 'var(--bg-card)',
70
+ border: '1px solid var(--border)',
71
+ borderRadius: '6px',
72
+ fontFamily: 'system-ui, -apple-system, sans-serif',
73
+ fontSize: '0.8rem',
74
+ color: 'var(--text-secondary)',
75
+ lineHeight: 1.55,
76
+ }}>
77
+ {text}
78
+ </div>
79
+ );
80
+ }
81
+
82
  export default function ExpansionColumn({ state, info }: ExpansionColumnProps) {
83
  const isIdle = state.status === 'idle';
84
  const isRunning = state.status === 'running';
 
147
 
148
  {isDone && state.data && (
149
  <>
150
+ {state.data.note && <ExpansionNote text={state.data.note} />}
151
+ {state.data.hyde.trim() && (
152
+ <ExpansionCard label="HyDE (Hypothetical Document)" content={state.data.hyde} />
153
+ )}
154
+ {state.data.vec.length > 0 && (
155
+ <ExpansionCard label="Vec Sentences" content={state.data.vec} />
156
+ )}
157
+ {state.data.lex.trim() && (
158
+ <ExpansionCard label="Lex Keywords" content={state.data.lex} />
159
+ )}
160
+ {!state.data.note && !state.data.hyde.trim() && state.data.vec.length === 0 && !state.data.lex.trim() && (
161
+ <ExpansionNote text="No additional query variants were generated." />
162
+ )}
163
  </>
164
  )}
165
  </div>
src/components/FusionColumn.tsx CHANGED
@@ -96,7 +96,7 @@ function RRFRow({ result, rank }: { result: RRFResult; rank: number }) {
96
 
97
  function BeforeAfterComparison({ before, after }: { before: RRFResult[]; after: RerankedResult[] }) {
98
  const top5before = before.slice(0, 5);
99
- const top5after = [...after].sort((a, b) => b.blendedScore - a.blendedScore).slice(0, 5);
100
 
101
  return (
102
  <div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: '0.5rem' }}>
 
96
 
97
  function BeforeAfterComparison({ before, after }: { before: RRFResult[]; after: RerankedResult[] }) {
98
  const top5before = before.slice(0, 5);
99
+ const top5after = [...after].sort((a, b) => b.rerankScore - a.rerankScore).slice(0, 5);
100
 
101
  return (
102
  <div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: '0.5rem' }}>
src/components/ModelStatus.tsx CHANGED
@@ -99,7 +99,11 @@ function ModelRow({ model }: { model: ModelState }) {
99
  }
100
 
101
  export default function ModelStatus({ models }: ModelStatusProps) {
102
- const allReady = models.length > 0 && models.every(m => m.status === 'ready');
 
 
 
 
103
 
104
  return (
105
  <div style={{
@@ -126,18 +130,18 @@ export default function ModelStatus({ models }: ModelStatusProps) {
126
  }}>
127
  Models
128
  </h3>
129
- {allReady && (
130
  <span style={{
131
  fontSize: '0.75rem',
132
  fontFamily: 'system-ui, -apple-system, sans-serif',
133
  color: '#388e3c',
134
  fontWeight: 600,
135
  }}>
136
- All ready
137
  </span>
138
  )}
139
  </div>
140
- {!allReady && (
141
  <p style={{
142
  margin: '0 0 0.5rem',
143
  fontSize: '0.75rem',
@@ -145,7 +149,29 @@ export default function ModelStatus({ models }: ModelStatusProps) {
145
  color: 'var(--text-secondary)',
146
  lineHeight: 1.4,
147
  }}>
148
- First load downloads ~4 GB of model weights. Subsequent visits use the browser cache.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  </p>
150
  )}
151
  {models.map(m => (
 
99
  }
100
 
101
  export default function ModelStatus({ models }: ModelStatusProps) {
102
+ const coreModels = models.filter((model) => model.name !== 'expansion');
103
+ const expansionModel = models.find((model) => model.name === 'expansion');
104
+ const coreReady = coreModels.length > 0 && coreModels.every((model) => model.status === 'ready');
105
+ const expansionReady = expansionModel?.status === 'ready';
106
+ const expansionUnavailable = expansionModel?.status === 'error';
107
 
108
  return (
109
  <div style={{
 
130
  }}>
131
  Models
132
  </h3>
133
+ {coreReady && (
134
  <span style={{
135
  fontSize: '0.75rem',
136
  fontFamily: 'system-ui, -apple-system, sans-serif',
137
  color: '#388e3c',
138
  fontWeight: 600,
139
  }}>
140
+ Search ready
141
  </span>
142
  )}
143
  </div>
144
+ {!coreReady && (
145
  <p style={{
146
  margin: '0 0 0.5rem',
147
  fontSize: '0.75rem',
 
149
  color: 'var(--text-secondary)',
150
  lineHeight: 1.4,
151
  }}>
152
+ First load downloads several GB of model weights. Subsequent visits use the browser cache.
153
+ </p>
154
+ )}
155
+ {coreReady && !expansionReady && !expansionUnavailable && (
156
+ <p style={{
157
+ margin: '0 0 0.5rem',
158
+ fontSize: '0.75rem',
159
+ fontFamily: 'system-ui, -apple-system, sans-serif',
160
+ color: 'var(--text-secondary)',
161
+ lineHeight: 1.4,
162
+ }}>
163
+ Embedding and reranker are ready. Expansion is optional and will join when it finishes loading.
164
+ </p>
165
+ )}
166
+ {coreReady && expansionUnavailable && (
167
+ <p style={{
168
+ margin: '0 0 0.5rem',
169
+ fontSize: '0.75rem',
170
+ fontFamily: 'system-ui, -apple-system, sans-serif',
171
+ color: '#c62828',
172
+ lineHeight: 1.4,
173
+ }}>
174
+ Expansion is optional. Search still works with the original query when the expansion model is unavailable.
175
  </p>
176
  )}
177
  {models.map(m => (
src/components/PipelineView.tsx CHANGED
@@ -28,19 +28,19 @@ const COLUMNS = [
28
  label: 'Query Expansion',
29
  bg: 'var(--col-expansion)',
30
  headerColor: '#f57f17',
31
- info: 'A fine-tuned 1.7B LLM generates three query variants: lexical keywords (lex) for BM25, semantic sentences (vec) for vector search, and a hypothetical document (HyDE) to improve recall.',
32
  },
33
  {
34
  label: 'Parallel Search',
35
  bg: 'var(--col-search)',
36
  headerColor: '#00897b',
37
- info: 'Two search strategies run simultaneously: BM25 keyword search (exact term matching) and vector similarity search (semantic meaning via embeddings). Each finds relevant document chunks independently.',
38
  },
39
  {
40
  label: 'Fusion & Reranking',
41
  bg: 'var(--col-fusion)',
42
  headerColor: '#388e3c',
43
- info: 'Results are merged via Reciprocal Rank Fusion (RRF), then a cross-encoder reranker (Qwen3-Reranker-0.6B) re-scores the top candidates for precision. Final scores blend RRF and reranker signals.',
44
  },
45
  ];
46
 
@@ -161,7 +161,9 @@ export default function PipelineView({ state, query }: PipelineViewProps) {
161
  }
162
  `}</style>
163
 
164
- <div style={{
 
 
165
  display: 'grid',
166
  gridTemplateColumns: 'minmax(100px, 0.6fr) minmax(120px, 0.8fr) minmax(200px, 1.5fr) minmax(200px, 2fr)',
167
  gap: '0',
@@ -169,10 +171,12 @@ export default function PipelineView({ state, query }: PipelineViewProps) {
169
  overflow: 'hidden',
170
  border: '1px solid var(--border)',
171
  boxShadow: '0 2px 12px var(--shadow)',
172
- }}>
 
173
  {COLUMNS.map((col, i) => (
174
  <div
175
  key={col.label}
 
176
  style={{
177
  background: col.bg,
178
  padding: '1rem',
@@ -200,6 +204,15 @@ export default function PipelineView({ state, query }: PipelineViewProps) {
200
  .pipeline-grid {
201
  grid-template-columns: 1fr !important;
202
  }
 
 
 
 
 
 
 
 
 
203
  }
204
  `}</style>
205
  </>
 
28
  label: 'Query Expansion',
29
  bg: 'var(--col-expansion)',
30
  headerColor: '#f57f17',
31
+ info: 'A fine-tuned 1.7B LLM can generate lexical keywords (lex), semantic sentences (vec), and a hypothetical document (HyDE). When BM25 already has a strong exact match, expansion is skipped to stay closer to qmd.',
32
  },
33
  {
34
  label: 'Parallel Search',
35
  bg: 'var(--col-search)',
36
  headerColor: '#00897b',
37
+ info: 'The original query always runs through BM25 and vector search. Lex variants route only to BM25, while vec and HyDE variants route to vector search, mirroring qmd’s typed retrieval flow.',
38
  },
39
  {
40
  label: 'Fusion & Reranking',
41
  bg: 'var(--col-fusion)',
42
  headerColor: '#388e3c',
43
+ info: 'Results are merged via Reciprocal Rank Fusion (RRF), then a cross-encoder reranker (Qwen3-Reranker-0.6B) re-scores the top candidates. Final ranking blends reranker confidence with RRF position, not raw retrieval scores.',
44
  },
45
  ];
46
 
 
161
  }
162
  `}</style>
163
 
164
+ <div
165
+ className="pipeline-grid"
166
+ style={{
167
  display: 'grid',
168
  gridTemplateColumns: 'minmax(100px, 0.6fr) minmax(120px, 0.8fr) minmax(200px, 1.5fr) minmax(200px, 2fr)',
169
  gap: '0',
 
171
  overflow: 'hidden',
172
  border: '1px solid var(--border)',
173
  boxShadow: '0 2px 12px var(--shadow)',
174
+ }}
175
+ >
176
  {COLUMNS.map((col, i) => (
177
  <div
178
  key={col.label}
179
+ className="pipeline-cell"
180
  style={{
181
  background: col.bg,
182
  padding: '1rem',
 
204
  .pipeline-grid {
205
  grid-template-columns: 1fr !important;
206
  }
207
+
208
+ .pipeline-cell {
209
+ border-right: none !important;
210
+ border-bottom: 1px solid var(--border);
211
+ }
212
+
213
+ .pipeline-cell:last-child {
214
+ border-bottom: none;
215
+ }
216
  }
217
  `}</style>
218
  </>
src/components/QueryInput.tsx CHANGED
@@ -28,7 +28,7 @@ export default function QueryInput({ onSearch, disabled }: QueryInputProps) {
28
  value={query}
29
  onChange={e => setQuery(e.target.value)}
30
  disabled={disabled}
31
- placeholder={disabled ? 'Loading models\u2026' : 'Enter a search query\u2026'}
32
  style={{
33
  flex: 1,
34
  padding: '0.6rem 0.9rem',
@@ -66,7 +66,7 @@ export default function QueryInput({ onSearch, disabled }: QueryInputProps) {
66
 
67
  <div style={{ marginTop: '0.6rem', display: 'flex', gap: '0.4rem', flexWrap: 'wrap', alignItems: 'center' }}>
68
  <span style={{ fontSize: '0.8rem', color: 'var(--text-secondary)', fontFamily: 'system-ui, -apple-system, sans-serif' }}>
69
- Examples:
70
  </span>
71
  {EXAMPLE_QUERIES.map(q => (
72
  <button
 
28
  value={query}
29
  onChange={e => setQuery(e.target.value)}
30
  disabled={disabled}
31
+ placeholder={disabled ? 'Loading browser models\u2026' : 'Enter a search query\u2026'}
32
  style={{
33
  flex: 1,
34
  padding: '0.6rem 0.9rem',
 
66
 
67
  <div style={{ marginTop: '0.6rem', display: 'flex', gap: '0.4rem', flexWrap: 'wrap', alignItems: 'center' }}>
68
  <span style={{ fontSize: '0.8rem', color: 'var(--text-secondary)', fontFamily: 'system-ui, -apple-system, sans-serif' }}>
69
+ Demo queries:
70
  </span>
71
  {EXAMPLE_QUERIES.map(q => (
72
  <button
src/constants.ts CHANGED
@@ -11,6 +11,15 @@ export const RRF_SECONDARY_WEIGHT = 1.0;
11
  export const RRF_RANK1_BONUS = 0.05;
12
  export const RRF_RANK2_BONUS = 0.02;
13
 
 
 
 
 
 
 
 
 
 
14
 
15
  // BM25
16
  export const BM25_K1 = 1.2;
 
11
  export const RRF_RANK1_BONUS = 0.05;
12
  export const RRF_RANK2_BONUS = 0.02;
13
 
14
+ // Strong lexical match detection
15
+ export const STRONG_SIGNAL_MIN_SCORE = 0.85;
16
+ export const STRONG_SIGNAL_MIN_GAP = 0.15;
17
+
18
+ // Position-aware blending
19
+ export const BLEND_TOP3_RRF_WEIGHT = 0.75;
20
+ export const BLEND_TOP10_RRF_WEIGHT = 0.6;
21
+ export const BLEND_TAIL_RRF_WEIGHT = 0.4;
22
+
23
 
24
  // BM25
25
  export const BM25_K1 = 1.2;
src/pipeline/blend.test.ts CHANGED
@@ -1,12 +1,12 @@
1
  import { describe, it, expect } from "vitest";
2
  import { blendScores } from "./blend";
3
  import type { RRFResult } from "../types";
 
 
 
 
 
4
 
5
- const RRF_W = 0.8; // must match BLEND_RRF_WEIGHT in blend.ts
6
-
7
- // ---------------------------------------------------------------------------
8
- // Helpers
9
- // ---------------------------------------------------------------------------
10
  function makeRRFResult(
11
  docId: string,
12
  score: number,
@@ -22,110 +22,111 @@ function makeRRFResult(
22
  };
23
  }
24
 
25
- // ---------------------------------------------------------------------------
26
- // blendScores
27
- // ---------------------------------------------------------------------------
28
  describe("blendScores", () => {
29
  it("returns empty array for empty input", () => {
30
  expect(blendScores([], new Map())).toEqual([]);
31
  });
32
 
33
- it("applies uniform 70/30 weight to all ranks", () => {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  const rrfResults = [
35
  makeRRFResult("doc1", 0.5),
36
  makeRRFResult("doc2", 0.4),
37
  makeRRFResult("doc3", 0.3),
 
38
  ];
39
- const rerankScores = new Map([
40
- ["doc1", 0.9],
41
- ["doc2", 0.8],
42
- ["doc3", 0.7],
43
- ]);
44
- const results = blendScores(rrfResults, rerankScores);
45
- // doc1: normalized RRF = 1.0, rerank = 0.9 -> 0.7*1.0 + 0.3*0.9 = 0.97
46
- const doc1 = results.find((r) => r.docId === "doc1")!;
47
- expect(doc1.score).toBeCloseTo(RRF_W * 1.0 + (1 - RRF_W) * 0.9, 10);
48
  });
49
 
50
- it("defaults missing rerank scores to 0", () => {
51
- const rrfResults = [makeRRFResult("doc1", 0.5)];
52
- const rerankScores = new Map<string, number>();
53
- const results = blendScores(rrfResults, rerankScores);
54
- // score = 0.7 * 1.0 + 0.3 * 0 = 0.7 (normalized RRF = 1.0 for single result)
55
- expect(results[0].score).toBeCloseTo(RRF_W, 10);
 
 
 
56
  });
57
 
58
- it("sorts by blended score descending", () => {
59
  const rrfResults = [
60
  makeRRFResult("doc1", 0.5),
61
  makeRRFResult("doc2", 0.4),
62
  makeRRFResult("doc3", 0.3),
 
63
  ];
 
64
  const rerankScores = new Map([
65
- ["doc1", 0.1],
66
- ["doc2", 0.2],
67
- ["doc3", 0.99],
 
68
  ]);
 
69
  const results = blendScores(rrfResults, rerankScores);
70
- for (let i = 1; i < results.length; i++) {
71
- expect(results[i].score).toBeLessThanOrEqual(results[i - 1].score);
72
- }
73
  });
74
 
75
- it("can reorder results when reranker disagrees with RRF", () => {
76
  const rrfResults = [
77
  makeRRFResult("doc1", 0.5),
78
- makeRRFResult("doc2", 0.49),
 
79
  ];
80
  const rerankScores = new Map([
81
- ["doc1", 0.0],
82
- ["doc2", 1.0],
 
83
  ]);
 
84
  const results = blendScores(rrfResults, rerankScores);
85
- // doc1: 0.7*1.0 + 0.3*0.0 = 0.700
86
- // doc2: 0.7*(0.49/0.5) + 0.3*1.0 = 0.686 + 0.3 = 0.986
87
- expect(results[0].docId).toBe("doc2");
88
  });
89
 
90
- it("preserves filepath, title, bestChunk in final results", () => {
91
- const rrfResults = [makeRRFResult("doc1", 0.5, "My Title")];
92
- const results = blendScores(rrfResults, new Map());
93
  expect(results[0].filepath).toBe("doc1");
94
  expect(results[0].title).toBe("My Title");
95
  expect(results[0].bestChunk).toBe("chunk from doc1");
96
  expect(results[0].docId).toBe("doc1");
97
  });
98
 
99
- it("deduplicates by docId, keeping highest blended score", () => {
100
- const rrfResults = [
101
- makeRRFResult("doc1", 0.5),
102
- makeRRFResult("doc1", 0.3),
103
- ];
104
- const rerankScores = new Map([["doc1", 0.8]]);
105
- const results = blendScores(rrfResults, rerankScores);
106
- expect(results).toHaveLength(1);
107
- expect(results[0].docId).toBe("doc1");
108
- });
109
 
110
- it("uniform weight does not cause rank leapfrogging from reranker noise", () => {
111
- // The bug: with position-aware weights, rank 4 got 40% reranker weight
112
- // while rank 3 got only 25%, causing irrelevant docs to jump up.
113
- // With uniform weights, a low-RRF doc needs a very high reranker score to leapfrog.
114
- const rrfResults = [
115
- makeRRFResult("doc1", 0.12), // rank 1 — relevant
116
- makeRRFResult("doc2", 0.07), // rank 2 — relevant
117
- makeRRFResult("doc3", 0.05), // rank 3 — relevant
118
- makeRRFResult("doc4", 0.047), // rank 4 — irrelevant (Taj Mahal)
119
- ];
120
- const rerankScores = new Map([
121
- ["doc1", 0.0],
122
- ["doc2", 0.0],
123
- ["doc3", 0.0],
124
- ["doc4", 0.66], // noisy reranker gives moderate score to irrelevant doc
125
- ]);
126
- const results = blendScores(rrfResults, rerankScores);
127
- // doc4 should NOT be at position 2
128
  expect(results[0].docId).toBe("doc1");
129
- expect(results[1].docId).not.toBe("doc4");
130
  });
131
  });
 
1
  import { describe, it, expect } from "vitest";
2
  import { blendScores } from "./blend";
3
  import type { RRFResult } from "../types";
4
+ import {
5
+ BLEND_TAIL_RRF_WEIGHT,
6
+ BLEND_TOP10_RRF_WEIGHT,
7
+ BLEND_TOP3_RRF_WEIGHT,
8
+ } from "../constants";
9
 
 
 
 
 
 
10
  function makeRRFResult(
11
  docId: string,
12
  score: number,
 
22
  };
23
  }
24
 
 
 
 
25
  describe("blendScores", () => {
26
  it("returns empty array for empty input", () => {
27
  expect(blendScores([], new Map())).toEqual([]);
28
  });
29
 
30
+ it("uses the top-3 RRF weight for the highest-ranked documents", () => {
31
+ const results = blendScores(
32
+ [makeRRFResult("doc1", 0.5)],
33
+ new Map([["doc1", 0.9]]),
34
+ );
35
+
36
+ expect(results[0].score).toBeCloseTo(
37
+ BLEND_TOP3_RRF_WEIGHT * 1 + (1 - BLEND_TOP3_RRF_WEIGHT) * 0.9,
38
+ 10,
39
+ );
40
+ });
41
+
42
+ it("falls back to rank-only RRF when rerank scores are missing", () => {
43
+ const results = blendScores(
44
+ [makeRRFResult("doc1", 0.5), makeRRFResult("doc2", 0.4)],
45
+ new Map<string, number>(),
46
+ );
47
+
48
+ expect(results[0].score).toBeCloseTo(BLEND_TOP3_RRF_WEIGHT * 1, 10);
49
+ expect(results[1].score).toBeCloseTo(BLEND_TOP3_RRF_WEIGHT * 0.5, 10);
50
+ });
51
+
52
+ it("switches to the top-10 weight after rank 3", () => {
53
  const rrfResults = [
54
  makeRRFResult("doc1", 0.5),
55
  makeRRFResult("doc2", 0.4),
56
  makeRRFResult("doc3", 0.3),
57
+ makeRRFResult("doc4", 0.2),
58
  ];
59
+
60
+ const results = blendScores(rrfResults, new Map());
61
+ const doc4 = results.find((result) => result.docId === "doc4")!;
62
+
63
+ expect(doc4.score).toBeCloseTo(BLEND_TOP10_RRF_WEIGHT * 0.25, 10);
 
 
 
 
64
  });
65
 
66
+ it("uses the tail weight after rank 10", () => {
67
+ const rrfResults = Array.from({ length: 11 }, (_, index) =>
68
+ makeRRFResult(`doc${index + 1}`, 1 - index * 0.01),
69
+ );
70
+
71
+ const results = blendScores(rrfResults, new Map());
72
+ const tailDoc = results.find((result) => result.docId === "doc11")!;
73
+
74
+ expect(tailDoc.score).toBeCloseTo(BLEND_TAIL_RRF_WEIGHT * (1 / 11), 10);
75
  });
76
 
77
+ it("can reorder results when reranker strongly disagrees", () => {
78
  const rrfResults = [
79
  makeRRFResult("doc1", 0.5),
80
  makeRRFResult("doc2", 0.4),
81
  makeRRFResult("doc3", 0.3),
82
+ makeRRFResult("doc4", 0.2),
83
  ];
84
+
85
  const rerankScores = new Map([
86
+ ["doc1", 0],
87
+ ["doc2", 0],
88
+ ["doc3", 0],
89
+ ["doc4", 1],
90
  ]);
91
+
92
  const results = blendScores(rrfResults, rerankScores);
93
+ expect(results[0].docId).toBe("doc1");
94
+ expect(results[1].docId).toBe("doc4");
 
95
  });
96
 
97
+ it("sorts final results by blended score descending", () => {
98
  const rrfResults = [
99
  makeRRFResult("doc1", 0.5),
100
+ makeRRFResult("doc2", 0.4),
101
+ makeRRFResult("doc3", 0.3),
102
  ];
103
  const rerankScores = new Map([
104
+ ["doc1", 0.2],
105
+ ["doc2", 0.8],
106
+ ["doc3", 0.1],
107
  ]);
108
+
109
  const results = blendScores(rrfResults, rerankScores);
110
+ for (let i = 1; i < results.length; i++) {
111
+ expect(results[i].score).toBeLessThanOrEqual(results[i - 1].score);
112
+ }
113
  });
114
 
115
+ it("preserves filepath, title, bestChunk, and docId", () => {
116
+ const results = blendScores([makeRRFResult("doc1", 0.5, "My Title")], new Map());
 
117
  expect(results[0].filepath).toBe("doc1");
118
  expect(results[0].title).toBe("My Title");
119
  expect(results[0].bestChunk).toBe("chunk from doc1");
120
  expect(results[0].docId).toBe("doc1");
121
  });
122
 
123
+ it("deduplicates by docId, keeping the highest blended score", () => {
124
+ const results = blendScores(
125
+ [makeRRFResult("doc1", 0.5), makeRRFResult("doc1", 0.3)],
126
+ new Map([["doc1", 0.8]]),
127
+ );
 
 
 
 
 
128
 
129
+ expect(results).toHaveLength(1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  expect(results[0].docId).toBe("doc1");
 
131
  });
132
  });
src/pipeline/blend.ts CHANGED
@@ -1,24 +1,28 @@
1
  import type { RRFResult, RerankedResult, FinalResult } from "../types";
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- const BLEND_RRF_WEIGHT = 0.8; // uniform 80% RRF / 20% reranker
4
-
5
- // Blend RRF score with reranker score using uniform weights.
6
- // Browser-sized reranker models are noisy — position-aware weights
7
- // (which gave tail ranks MORE reranker influence) caused irrelevant
8
- // docs to leapfrog relevant ones.
9
  export function blendScores(
10
  rrfResults: RRFResult[],
11
  rerankScores: Map<string, number>, // docId -> rerank score
12
  ): FinalResult[] {
13
- // Normalize RRF scores to [0,1] range so they're comparable with reranker scores (0-1)
14
- const maxRRF = Math.max(...rrfResults.map(r => r.score), 1e-9);
15
-
16
- const blended: RerankedResult[] = rrfResults.map((result) => {
17
  const rerankScore = rerankScores.get(result.docId) ?? 0;
18
- const normalizedRRF = result.score / maxRRF;
19
-
20
  const blendedScore =
21
- BLEND_RRF_WEIGHT * normalizedRRF + (1 - BLEND_RRF_WEIGHT) * rerankScore;
22
 
23
  return {
24
  ...result,
 
1
  import type { RRFResult, RerankedResult, FinalResult } from "../types";
2
+ import {
3
+ BLEND_TAIL_RRF_WEIGHT,
4
+ BLEND_TOP10_RRF_WEIGHT,
5
+ BLEND_TOP3_RRF_WEIGHT,
6
+ } from "../constants";
7
+
8
+ function getRrfWeight(rank: number): number {
9
+ if (rank <= 3) return BLEND_TOP3_RRF_WEIGHT;
10
+ if (rank <= 10) return BLEND_TOP10_RRF_WEIGHT;
11
+ return BLEND_TAIL_RRF_WEIGHT;
12
+ }
13
 
14
+ // Blend RRF rank position with reranker score using qmd's position-aware weights.
 
 
 
 
 
15
  export function blendScores(
16
  rrfResults: RRFResult[],
17
  rerankScores: Map<string, number>, // docId -> rerank score
18
  ): FinalResult[] {
19
+ const blended: RerankedResult[] = rrfResults.map((result, index) => {
20
+ const rank = index + 1;
21
+ const rrfWeight = getRrfWeight(rank);
22
+ const positionScore = 1 / rank;
23
  const rerankScore = rerankScores.get(result.docId) ?? 0;
 
 
24
  const blendedScore =
25
+ rrfWeight * positionScore + (1 - rrfWeight) * rerankScore;
26
 
27
  return {
28
  ...result,
src/pipeline/embeddings.ts CHANGED
@@ -25,6 +25,15 @@ export async function embedBatch(
25
  return output.tolist().map((arr: number[]) => new Float32Array(arr));
26
  }
27
 
 
 
 
 
 
 
 
 
 
28
  /**
29
  * Embed a document chunk using the doc template.
30
  * Uses the doc template: "title: {title} | text: {body}"
@@ -39,3 +48,12 @@ export async function embedDocChunk(
39
  const output = await pipe(text, { pooling: "mean", normalize: true });
40
  return new Float32Array(output.tolist()[0]);
41
  }
 
 
 
 
 
 
 
 
 
 
25
  return output.tolist().map((arr: number[]) => new Float32Array(arr));
26
  }
27
 
28
+ /**
29
+ * Embed multiple query strings using the query template.
30
+ */
31
+ export async function embedQueries(
32
+ queries: string[],
33
+ ): Promise<Float32Array[]> {
34
+ return embedBatch(queries.map(EMBED_QUERY_TEMPLATE));
35
+ }
36
+
37
  /**
38
  * Embed a document chunk using the doc template.
39
  * Uses the doc template: "title: {title} | text: {body}"
 
48
  const output = await pipe(text, { pooling: "mean", normalize: true });
49
  return new Float32Array(output.tolist()[0]);
50
  }
51
+
52
+ /**
53
+ * Embed multiple document chunks using the document template.
54
+ */
55
+ export async function embedDocChunksBatch(
56
+ chunks: Array<{ title: string; text: string }>,
57
+ ): Promise<Float32Array[]> {
58
+ return embedBatch(chunks.map((chunk) => EMBED_DOC_TEMPLATE(chunk.title, chunk.text)));
59
+ }
src/pipeline/expansion.test.ts CHANGED
@@ -9,47 +9,45 @@ const QUERY = "test query";
9
  describe("parseExpansionOutput", () => {
10
  it("parses all three variants from well-formed output", () => {
11
  const text = [
12
- "lex: sqlite, fts5, full-text search",
13
- "vec: How to perform full-text search using SQLite FTS5",
14
- "hyde: This document explains the SQLite FTS5 extension for full-text search indexing.",
15
  ].join("\n");
16
 
17
  const result = parseExpansionOutput(text, QUERY);
18
- expect(result.lex).toBe("sqlite, fts5, full-text search");
19
  expect(result.vec).toEqual([
20
- "How to perform full-text search using SQLite FTS5",
21
  ]);
22
- expect(result.hyde).toBe(
23
- "This document explains the SQLite FTS5 extension for full-text search indexing.",
24
- );
25
  });
26
 
27
  it("handles multiple vec lines", () => {
28
  const text = [
29
- "lex: embeddings, vector search",
30
- "vec: Generating vector embeddings for semantic search",
31
- "vec: Using cosine similarity to find related documents",
32
- "hyde: This guide covers the creation of vector embeddings.",
33
  ].join("\n");
34
 
35
  const result = parseExpansionOutput(text, QUERY);
36
  expect(result.vec).toEqual([
37
- "Generating vector embeddings for semantic search",
38
- "Using cosine similarity to find related documents",
39
  ]);
40
  });
41
 
42
  it("is case-insensitive for prefixes", () => {
43
  const text = [
44
- "LEX: uppercase keywords",
45
- "Vec: Mixed case sentence",
46
- "HYDE: All caps hypothetical document",
47
  ].join("\n");
48
 
49
  const result = parseExpansionOutput(text, QUERY);
50
- expect(result.lex).toBe("uppercase keywords");
51
- expect(result.vec).toEqual(["Mixed case sentence"]);
52
- expect(result.hyde).toBe("All caps hypothetical document");
53
  });
54
 
55
  it("falls back to query when no prefixes are found", () => {
@@ -61,9 +59,9 @@ describe("parseExpansionOutput", () => {
61
  });
62
 
63
  it("uses query for missing variants", () => {
64
- const text = "lex: only keywords here";
65
  const result = parseExpansionOutput(text, QUERY);
66
- expect(result.lex).toBe("only keywords here");
67
  expect(result.vec).toEqual([QUERY]);
68
  expect(result.hyde).toBe(`Information about ${QUERY}`);
69
  });
@@ -71,24 +69,37 @@ describe("parseExpansionOutput", () => {
71
  it("handles extra whitespace and blank lines", () => {
72
  const text = [
73
  "",
74
- " lex: spaced keywords ",
75
  "",
76
- " vec: spaced sentence ",
77
- " hyde: spaced hypothetical ",
78
  "",
79
  ].join("\n");
80
 
81
  const result = parseExpansionOutput(text, QUERY);
82
- expect(result.lex).toBe("spaced keywords");
83
- expect(result.vec).toEqual(["spaced sentence"]);
84
- expect(result.hyde).toBe("spaced hypothetical");
85
  });
86
 
87
  it("handles only vec present", () => {
88
- const text = "vec: just a vector sentence";
89
  const result = parseExpansionOutput(text, QUERY);
90
- expect(result.vec).toEqual(["just a vector sentence"]);
91
  expect(result.lex).toBe(QUERY);
92
  expect(result.hyde).toBe(`Information about ${QUERY}`);
93
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  });
 
9
  describe("parseExpansionOutput", () => {
10
  it("parses all three variants from well-formed output", () => {
11
  const text = [
12
+ "lex: test query keywords",
13
+ "vec: How to improve results for a test query",
14
+ "hyde: This document explains how to answer a test query well.",
15
  ].join("\n");
16
 
17
  const result = parseExpansionOutput(text, QUERY);
18
+ expect(result.lex).toBe("test query keywords");
19
  expect(result.vec).toEqual([
20
+ "How to improve results for a test query",
21
  ]);
22
+ expect(result.hyde).toBe("This document explains how to answer a test query well.");
 
 
23
  });
24
 
25
  it("handles multiple vec lines", () => {
26
  const text = [
27
+ "lex: test query embeddings",
28
+ "vec: Generating vector embeddings for a test query",
29
+ "vec: Using cosine similarity to answer the query better",
30
+ "hyde: This guide covers the creation of test query embeddings.",
31
  ].join("\n");
32
 
33
  const result = parseExpansionOutput(text, QUERY);
34
  expect(result.vec).toEqual([
35
+ "Generating vector embeddings for a test query",
36
+ "Using cosine similarity to answer the query better",
37
  ]);
38
  });
39
 
40
  it("is case-insensitive for prefixes", () => {
41
  const text = [
42
+ "LEX: uppercase test keywords",
43
+ "Vec: Mixed case query sentence",
44
+ "HYDE: All caps test hypothetical document",
45
  ].join("\n");
46
 
47
  const result = parseExpansionOutput(text, QUERY);
48
+ expect(result.lex).toBe("uppercase test keywords");
49
+ expect(result.vec).toEqual(["Mixed case query sentence"]);
50
+ expect(result.hyde).toBe("All caps test hypothetical document");
51
  });
52
 
53
  it("falls back to query when no prefixes are found", () => {
 
59
  });
60
 
61
  it("uses query for missing variants", () => {
62
+ const text = "lex: only test query keywords here";
63
  const result = parseExpansionOutput(text, QUERY);
64
+ expect(result.lex).toBe("only test query keywords here");
65
  expect(result.vec).toEqual([QUERY]);
66
  expect(result.hyde).toBe(`Information about ${QUERY}`);
67
  });
 
69
  it("handles extra whitespace and blank lines", () => {
70
  const text = [
71
  "",
72
+ " lex: spaced test keywords ",
73
  "",
74
+ " vec: spaced query sentence ",
75
+ " hyde: spaced test hypothetical ",
76
  "",
77
  ].join("\n");
78
 
79
  const result = parseExpansionOutput(text, QUERY);
80
+ expect(result.lex).toBe("spaced test keywords");
81
+ expect(result.vec).toEqual(["spaced query sentence"]);
82
+ expect(result.hyde).toBe("spaced test hypothetical");
83
  });
84
 
85
  it("handles only vec present", () => {
86
+ const text = "vec: just a test query vector sentence";
87
  const result = parseExpansionOutput(text, QUERY);
88
+ expect(result.vec).toEqual(["just a test query vector sentence"]);
89
  expect(result.lex).toBe(QUERY);
90
  expect(result.hyde).toBe(`Information about ${QUERY}`);
91
  });
92
+
93
+ it("filters expansion lines that drift away from the original query", () => {
94
+ const text = [
95
+ "lex: unrelated cooking keywords",
96
+ "vec: another unrelated sentence",
97
+ "hyde: test query overview with useful detail",
98
+ ].join("\n");
99
+
100
+ const result = parseExpansionOutput(text, QUERY);
101
+ expect(result.lex).toBe(QUERY);
102
+ expect(result.vec).toEqual([QUERY]);
103
+ expect(result.hyde).toBe("test query overview with useful detail");
104
+ });
105
  });
src/pipeline/expansion.ts CHANGED
@@ -1,4 +1,8 @@
1
- import { Tensor } from "@huggingface/transformers";
 
 
 
 
2
  import { getExpansionModel, getExpansionTokenizer } from "./models";
3
  import type { ExpandedQuery } from "../types";
4
 
@@ -17,6 +21,14 @@ const TOP_P = 0.8;
17
  // Parse the model's output text into structured ExpandedQuery
18
  function parseExpansionOutput(text: string, query: string): ExpandedQuery {
19
  const lines = text.trim().split("\n");
 
 
 
 
 
 
 
 
20
  let lex = "";
21
  const vec: string[] = [];
22
  let hyde = "";
@@ -24,11 +36,14 @@ function parseExpansionOutput(text: string, query: string): ExpandedQuery {
24
  for (const line of lines) {
25
  const trimmed = line.trim();
26
  if (trimmed.toLowerCase().startsWith("lex:")) {
27
- lex = trimmed.slice(4).trim();
 
28
  } else if (trimmed.toLowerCase().startsWith("vec:")) {
29
- vec.push(trimmed.slice(4).trim());
 
30
  } else if (trimmed.toLowerCase().startsWith("hyde:")) {
31
- hyde = trimmed.slice(5).trim();
 
32
  }
33
  }
34
 
@@ -56,7 +71,7 @@ function sampleToken(logitsData: Float32Array | Float64Array, vocabSize: number)
56
  const topKIndices = indices.slice(0, TOP_K);
57
 
58
  // Softmax over top-k
59
- let maxLogit = scaled[topKIndices[0]];
60
  const exps = topKIndices.map(i => Math.exp(scaled[i] - maxLogit));
61
  const sumExp = exps.reduce((a, b) => a + b, 0);
62
  const probs = exps.map(e => e / sumExp);
@@ -89,10 +104,10 @@ function sampleToken(logitsData: Float32Array | Float64Array, vocabSize: number)
89
  // Manual autoregressive generation — bypasses Transformers.js generate()
90
  // which breaks when the ONNX model was exported without KV cache tensors.
91
  async function manualGenerate(
92
- model: any,
93
  inputIds: bigint[],
94
  eosTokenId: number,
95
- tokenizer: any,
96
  ): Promise<bigint[]> {
97
  const generated: bigint[] = [...inputIds];
98
  const newTokens: bigint[] = [];
@@ -101,7 +116,15 @@ async function manualGenerate(
101
  const idsTensor = new Tensor("int64", BigInt64Array.from(generated), [1, generated.length]);
102
  const maskTensor = new Tensor("int64", new BigInt64Array(generated.length).fill(1n), [1, generated.length]);
103
 
104
- const output = await model({ input_ids: idsTensor, attention_mask: maskTensor });
 
 
 
 
 
 
 
 
105
 
106
  const logits = output.logits;
107
  const vocabSize = logits.dims[2];
@@ -154,12 +177,20 @@ export async function expandQuery(query: string): Promise<ExpandedQuery> {
154
  [{ role: "user", content: prompt }],
155
  { add_generation_prompt: true, tokenize: false },
156
  ) as string;
157
- const inputs = tokenizer(chatPrompt, { return_tensor: true });
 
 
158
  const inputIds = Array.from(inputs.input_ids.data as BigInt64Array);
159
 
160
- const eosTokenId = (tokenizer as any).model?.config?.eos_token_id
161
- ?? (tokenizer as any).eos_token_id
162
- ?? 151643; // Qwen default
 
 
 
 
 
 
163
 
164
  const allIds = await manualGenerate(model, inputIds, eosTokenId, tokenizer);
165
 
 
1
+ import {
2
+ Tensor,
3
+ type PreTrainedModel,
4
+ type PreTrainedTokenizer,
5
+ } from "@huggingface/transformers";
6
  import { getExpansionModel, getExpansionTokenizer } from "./models";
7
  import type { ExpandedQuery } from "../types";
8
 
 
21
  // Parse the model's output text into structured ExpandedQuery
22
  function parseExpansionOutput(text: string, query: string): ExpandedQuery {
23
  const lines = text.trim().split("\n");
24
+ const queryTerms = query
25
+ .toLowerCase()
26
+ .replace(/[^a-z0-9\s]/g, " ")
27
+ .split(/\s+/)
28
+ .filter(Boolean);
29
+ const hasQueryTerm = (value: string) =>
30
+ queryTerms.length === 0 ||
31
+ queryTerms.some((term) => value.toLowerCase().includes(term));
32
  let lex = "";
33
  const vec: string[] = [];
34
  let hyde = "";
 
36
  for (const line of lines) {
37
  const trimmed = line.trim();
38
  if (trimmed.toLowerCase().startsWith("lex:")) {
39
+ const value = trimmed.slice(4).trim();
40
+ if (value && hasQueryTerm(value)) lex = value;
41
  } else if (trimmed.toLowerCase().startsWith("vec:")) {
42
+ const value = trimmed.slice(4).trim();
43
+ if (value && hasQueryTerm(value)) vec.push(value);
44
  } else if (trimmed.toLowerCase().startsWith("hyde:")) {
45
+ const value = trimmed.slice(5).trim();
46
+ if (value && hasQueryTerm(value)) hyde = value;
47
  }
48
  }
49
 
 
71
  const topKIndices = indices.slice(0, TOP_K);
72
 
73
  // Softmax over top-k
74
+ const maxLogit = scaled[topKIndices[0]];
75
  const exps = topKIndices.map(i => Math.exp(scaled[i] - maxLogit));
76
  const sumExp = exps.reduce((a, b) => a + b, 0);
77
  const probs = exps.map(e => e / sumExp);
 
104
  // Manual autoregressive generation — bypasses Transformers.js generate()
105
  // which breaks when the ONNX model was exported without KV cache tensors.
106
  async function manualGenerate(
107
+ model: PreTrainedModel,
108
  inputIds: bigint[],
109
  eosTokenId: number,
110
+ tokenizer: PreTrainedTokenizer,
111
  ): Promise<bigint[]> {
112
  const generated: bigint[] = [...inputIds];
113
  const newTokens: bigint[] = [];
 
116
  const idsTensor = new Tensor("int64", BigInt64Array.from(generated), [1, generated.length]);
117
  const maskTensor = new Tensor("int64", new BigInt64Array(generated.length).fill(1n), [1, generated.length]);
118
 
119
+ const output = await model({
120
+ input_ids: idsTensor,
121
+ attention_mask: maskTensor,
122
+ }) as {
123
+ logits: {
124
+ dims: number[];
125
+ data: Float32Array | Float64Array;
126
+ };
127
+ };
128
 
129
  const logits = output.logits;
130
  const vocabSize = logits.dims[2];
 
177
  [{ role: "user", content: prompt }],
178
  { add_generation_prompt: true, tokenize: false },
179
  ) as string;
180
+ const inputs = tokenizer(chatPrompt, {
181
+ return_tensor: true,
182
+ }) as { input_ids: Tensor };
183
  const inputIds = Array.from(inputs.input_ids.data as BigInt64Array);
184
 
185
+ const tokenizerWithConfig = tokenizer as PreTrainedTokenizer & {
186
+ model?: { config?: { eos_token_id?: number | number[] } };
187
+ eos_token_id?: number | number[];
188
+ };
189
+ const eosTokenValue =
190
+ tokenizerWithConfig.model?.config?.eos_token_id ??
191
+ tokenizerWithConfig.eos_token_id;
192
+ const eosTokenId =
193
+ Array.isArray(eosTokenValue) ? eosTokenValue[0] ?? 151643 : eosTokenValue ?? 151643;
194
 
195
  const allIds = await manualGenerate(model, inputIds, eosTokenId, tokenizer);
196
 
src/pipeline/orchestrator.ts CHANGED
@@ -1,11 +1,16 @@
1
  import type {
2
  EmbeddedChunk,
3
- ScoredChunk,
4
  ExpandedQuery,
5
  PipelineEvent,
 
 
6
  } from "../types";
 
 
 
 
7
  import { expandQuery } from "./expansion";
8
- import { embedQuery } from "./embeddings";
9
  import { vectorSearch } from "./vectorSearch";
10
  import { BM25Index } from "./bm25";
11
  import { reciprocalRankFusion } from "./rrf";
@@ -19,106 +24,262 @@ export interface PipelineInput {
19
  bm25Index: BM25Index;
20
  }
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  export async function* runPipeline(
23
  input: PipelineInput,
24
  ): AsyncGenerator<PipelineEvent> {
25
  const { query, embeddedChunks, bm25Index } = input;
26
 
27
- // ── Stage 1: Query Expansion ──
28
- let expanded: ExpandedQuery;
29
- if (isExpansionReady()) {
30
- yield { stage: "expansion", status: "running" };
31
- try {
32
- expanded = await expandQuery(query);
33
- yield { stage: "expansion", status: "done", data: expanded };
34
- } catch (err) {
35
- // Fallback: use original query as all variants
36
- expanded = { lex: query, vec: [query], hyde: query };
37
- yield { stage: "expansion", status: "error", error: String(err) };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  }
39
- } else {
40
- // Expansion model not loaded — use original query as all variants
41
- expanded = { lex: query, vec: [query], hyde: query };
42
- yield { stage: "expansion", status: "error", error: "Expansion model not loaded — using original query" };
43
  }
44
 
45
- // ── Stage 2: Parallel Search ──
46
- yield { stage: "search", status: "running" };
 
 
 
 
 
 
 
 
 
47
 
48
- // BM25 searches:
49
- // 1. Original query -> BM25 (dashed line in architecture diagram)
50
- // 2. Lex keywords -> BM25
51
- // 3. Vec sentences -> BM25
52
- const bm25Original = bm25Index.search(query, 20);
53
- const bm25Lex = bm25Index.search(expanded.lex, 20);
54
- const bm25Vec = expanded.vec.flatMap((v) => bm25Index.search(v, 20));
55
-
56
- // Vector searches:
57
- // 1. HyDE -> Vector Search
58
- // 2. Vec sentences -> Vector Search
59
- const hydeEmbedding = await embedQuery(expanded.hyde);
60
- const vecEmbeddings = await Promise.all(
61
- expanded.vec.map((v) => embedQuery(v)),
62
- );
63
 
64
- const vecHyde = vectorSearch(hydeEmbedding, embeddedChunks, 20);
65
- const vecVec = vecEmbeddings.flatMap((emb) =>
66
- vectorSearch(emb, embeddedChunks, 20),
67
- );
68
 
69
- // Combine all hits for UI display
70
- const allBm25: ScoredChunk[] = [...bm25Original, ...bm25Lex, ...bm25Vec];
71
- const allVector: ScoredChunk[] = [...vecHyde, ...vecVec];
 
 
 
 
72
 
73
  yield {
74
  stage: "search",
75
  status: "done",
76
- data: { bm25Hits: allBm25, vectorHits: allVector },
77
  };
78
 
79
- // ── Stage 3: RRF Fusion ──
80
- // Build ranked lists for RRF (order matters for weights: first 2 get 2x)
81
- const rrfLists = [
82
- { results: bm25Original, queryType: "original" as const, query },
83
- { results: vecHyde, queryType: "hyde" as const, query: expanded.hyde },
84
- { results: bm25Lex, queryType: "lex" as const, query: expanded.lex },
85
- ...expanded.vec.map((v, i) => ({
86
- results: vecVec.slice(i * 20, (i + 1) * 20),
87
- queryType: "vec" as const,
88
- query: v,
89
- })),
90
- ...expanded.vec.map((v, i) => ({
91
- results: bm25Vec.slice(i * 20, (i + 1) * 20),
92
- queryType: "vec" as const,
93
- query: v,
94
- })),
95
- ];
96
-
97
- const rrfResults = reciprocalRankFusion(rrfLists);
98
- yield { stage: "rrf", status: "done", data: { merged: rrfResults } };
99
-
100
- // ── Stage 4: Reranking ──
101
  yield { stage: "rerank", status: "running" };
102
  const rerankScores = new Map<string, number>();
103
- for (const result of rrfResults) {
104
  const score = await scoreDocument(query, result.bestChunk);
105
  rerankScores.set(result.docId, score);
106
  }
107
 
108
- // Build reranked view for "before/after" visualization
109
- const rerankedResults = rrfResults.map((r) => ({
110
- ...r,
111
- rerankScore: rerankScores.get(r.docId) ?? 0,
112
- blendedScore: 0, // computed in blend step
113
  }));
114
 
115
  yield {
116
  stage: "rerank",
117
  status: "done",
118
- data: { before: rrfResults, after: rerankedResults },
119
  };
120
 
121
- // ── Stage 5: Score Blending ──
122
- const finalResults = blendScores(rrfResults, rerankScores);
123
  yield { stage: "blend", status: "done", data: { finalResults } };
124
  }
 
1
  import type {
2
  EmbeddedChunk,
 
3
  ExpandedQuery,
4
  PipelineEvent,
5
+ RRFResult,
6
+ ScoredChunk,
7
  } from "../types";
8
+ import {
9
+ STRONG_SIGNAL_MIN_GAP,
10
+ STRONG_SIGNAL_MIN_SCORE,
11
+ } from "../constants";
12
  import { expandQuery } from "./expansion";
13
+ import { embedQueries } from "./embeddings";
14
  import { vectorSearch } from "./vectorSearch";
15
  import { BM25Index } from "./bm25";
16
  import { reciprocalRankFusion } from "./rrf";
 
24
  bm25Index: BM25Index;
25
  }
26
 
27
+ interface RankedList {
28
+ results: ScoredChunk[];
29
+ queryType: "original" | "lex" | "vec" | "hyde";
30
+ query: string;
31
+ }
32
+
33
+ interface VectorQuery {
34
+ text: string;
35
+ queryType: "original" | "vec" | "hyde";
36
+ }
37
+
38
+ const SEARCH_LIMIT = 20;
39
+
40
+ function normalizeBm25Score(score: number): number {
41
+ if (score <= 0) return 0;
42
+ return score / (1 + score);
43
+ }
44
+
45
+ function hasStrongBm25Signal(results: ScoredChunk[]): boolean {
46
+ const topScore = normalizeBm25Score(results[0]?.score ?? 0);
47
+ const secondScore = normalizeBm25Score(results[1]?.score ?? 0);
48
+ return (
49
+ results.length > 0 &&
50
+ topScore >= STRONG_SIGNAL_MIN_SCORE &&
51
+ topScore - secondScore >= STRONG_SIGNAL_MIN_GAP
52
+ );
53
+ }
54
+
55
+ function extractQueryTerms(query: string): string[] {
56
+ return [...new Set(
57
+ query
58
+ .toLowerCase()
59
+ .split(/\s+/)
60
+ .map((term) => term.replace(/^[^a-z0-9]+|[^a-z0-9]+$/g, ""))
61
+ .filter((term) => term.length > 2),
62
+ )];
63
+ }
64
+
65
+ function buildChunkLookup(
66
+ chunks: EmbeddedChunk[],
67
+ ): Map<string, EmbeddedChunk[]> {
68
+ const byDoc = new Map<string, EmbeddedChunk[]>();
69
+ for (const chunk of chunks) {
70
+ const existing = byDoc.get(chunk.docId);
71
+ if (existing) {
72
+ existing.push(chunk);
73
+ } else {
74
+ byDoc.set(chunk.docId, [chunk]);
75
+ }
76
+ }
77
+ for (const docChunks of byDoc.values()) {
78
+ docChunks.sort((a, b) => a.chunkIndex - b.chunkIndex);
79
+ }
80
+ return byDoc;
81
+ }
82
+
83
+ function selectBestChunkForRerank(
84
+ query: string,
85
+ docChunks: EmbeddedChunk[],
86
+ ): string {
87
+ if (docChunks.length === 0) return "";
88
+
89
+ const queryTerms = extractQueryTerms(query);
90
+ if (queryTerms.length === 0) return docChunks[0].text;
91
+
92
+ let bestChunk = docChunks[0];
93
+ let bestScore = -1;
94
+
95
+ for (const chunk of docChunks) {
96
+ const chunkLower = chunk.text.toLowerCase();
97
+ const overlap = queryTerms.reduce(
98
+ (score, term) => score + (chunkLower.includes(term) ? 1 : 0),
99
+ 0,
100
+ );
101
+
102
+ if (overlap > bestScore) {
103
+ bestChunk = chunk;
104
+ bestScore = overlap;
105
+ }
106
+ }
107
+
108
+ return bestChunk.text;
109
+ }
110
+
111
+ function assignRerankChunks(
112
+ query: string,
113
+ rrfResults: RRFResult[],
114
+ embeddedChunks: EmbeddedChunk[],
115
+ ): RRFResult[] {
116
+ const chunksByDoc = buildChunkLookup(embeddedChunks);
117
+ return rrfResults.map((result) => {
118
+ const bestChunk = selectBestChunkForRerank(
119
+ query,
120
+ chunksByDoc.get(result.docId) ?? [],
121
+ );
122
+
123
+ return {
124
+ ...result,
125
+ bestChunk: bestChunk || result.bestChunk,
126
+ };
127
+ });
128
+ }
129
+
130
+ async function resolveExpansion(query: string, initialFts: ScoredChunk[]): Promise<ExpandedQuery> {
131
+ if (hasStrongBm25Signal(initialFts)) {
132
+ return {
133
+ lex: "",
134
+ vec: [],
135
+ hyde: "",
136
+ source: "strong-signal",
137
+ note: "Strong BM25 match detected, so expansion was skipped.",
138
+ };
139
+ }
140
+
141
+ if (!isExpansionReady()) {
142
+ return {
143
+ lex: "",
144
+ vec: [],
145
+ hyde: "",
146
+ source: "fallback",
147
+ note: "Expansion model unavailable, so the pipeline used the original query only.",
148
+ };
149
+ }
150
+
151
+ try {
152
+ return {
153
+ ...(await expandQuery(query)),
154
+ source: "model",
155
+ };
156
+ } catch (err) {
157
+ return {
158
+ lex: "",
159
+ vec: [],
160
+ hyde: "",
161
+ source: "fallback",
162
+ note: `Expansion failed, so the pipeline used the original query only. ${String(err)}`,
163
+ };
164
+ }
165
+ }
166
+
167
  export async function* runPipeline(
168
  input: PipelineInput,
169
  ): AsyncGenerator<PipelineEvent> {
170
  const { query, embeddedChunks, bm25Index } = input;
171
 
172
+ // Step 1: initial lexical probe for strong exact-match signals.
173
+ const initialFts = bm25Index.search(query, SEARCH_LIMIT);
174
+ const strongSignal = hasStrongBm25Signal(initialFts);
175
+
176
+ // Step 2: expansion (or intentional skip/fallback).
177
+ yield { stage: "expansion", status: "running" };
178
+ const expanded = await resolveExpansion(query, initialFts);
179
+ yield { stage: "expansion", status: "done", data: expanded };
180
+
181
+ // Step 3: route lexical and vector searches by query type.
182
+ yield { stage: "search", status: "running" };
183
+
184
+ const rankedLists: RankedList[] = [];
185
+ const bm25Hits: ScoredChunk[] = [...initialFts];
186
+ const vectorHits: ScoredChunk[] = [];
187
+
188
+ if (initialFts.length > 0) {
189
+ rankedLists.push({
190
+ results: initialFts,
191
+ queryType: "original",
192
+ query,
193
+ });
194
+ }
195
+
196
+ if (!strongSignal && expanded.lex.trim()) {
197
+ const lexicalExpansionHits = bm25Index.search(expanded.lex, SEARCH_LIMIT);
198
+ if (lexicalExpansionHits.length > 0) {
199
+ bm25Hits.push(...lexicalExpansionHits);
200
+ rankedLists.push({
201
+ results: lexicalExpansionHits,
202
+ queryType: "lex",
203
+ query: expanded.lex,
204
+ });
205
  }
 
 
 
 
206
  }
207
 
208
+ const vectorQueries: VectorQuery[] = [{ text: query, queryType: "original" }];
209
+ if (!strongSignal) {
210
+ for (const vecQuery of expanded.vec) {
211
+ if (vecQuery.trim()) {
212
+ vectorQueries.push({ text: vecQuery, queryType: "vec" });
213
+ }
214
+ }
215
+ if (expanded.hyde.trim()) {
216
+ vectorQueries.push({ text: expanded.hyde, queryType: "hyde" });
217
+ }
218
+ }
219
 
220
+ const queryEmbeddings = await embedQueries(vectorQueries.map((entry) => entry.text));
221
+ for (let i = 0; i < vectorQueries.length; i++) {
222
+ const queryEntry = vectorQueries[i];
223
+ const embedding = queryEmbeddings[i];
224
+ if (!queryEntry || !embedding) continue;
 
 
 
 
 
 
 
 
 
 
225
 
226
+ const hits = vectorSearch(embedding, embeddedChunks, SEARCH_LIMIT);
227
+ if (hits.length === 0) continue;
 
 
228
 
229
+ vectorHits.push(...hits);
230
+ rankedLists.push({
231
+ results: hits,
232
+ queryType: queryEntry.queryType,
233
+ query: queryEntry.text,
234
+ });
235
+ }
236
 
237
  yield {
238
  stage: "search",
239
  status: "done",
240
+ data: { bm25Hits, vectorHits },
241
  };
242
 
243
+ // Step 4: RRF fusion over all retrieval lists.
244
+ const fusedResults = reciprocalRankFusion(rankedLists);
245
+ const rerankCandidates = assignRerankChunks(query, fusedResults, embeddedChunks);
246
+ yield { stage: "rrf", status: "done", data: { merged: rerankCandidates } };
247
+
248
+ if (rerankCandidates.length === 0) {
249
+ yield {
250
+ stage: "rerank",
251
+ status: "done",
252
+ data: { before: [], after: [] },
253
+ };
254
+ yield {
255
+ stage: "blend",
256
+ status: "done",
257
+ data: { finalResults: [] },
258
+ };
259
+ return;
260
+ }
261
+
262
+ // Step 5: rerank the best chunk per document.
 
 
263
  yield { stage: "rerank", status: "running" };
264
  const rerankScores = new Map<string, number>();
265
+ for (const result of rerankCandidates) {
266
  const score = await scoreDocument(query, result.bestChunk);
267
  rerankScores.set(result.docId, score);
268
  }
269
 
270
+ const rerankedResults = rerankCandidates.map((result) => ({
271
+ ...result,
272
+ rerankScore: rerankScores.get(result.docId) ?? 0,
273
+ blendedScore: 0,
 
274
  }));
275
 
276
  yield {
277
  stage: "rerank",
278
  status: "done",
279
+ data: { before: rerankCandidates, after: rerankedResults },
280
  };
281
 
282
+ // Step 6: blend retrieval position with reranker score for final ordering.
283
+ const finalResults = blendScores(rerankCandidates, rerankScores);
284
  yield { stage: "blend", status: "done", data: { finalResults } };
285
  }
src/types.ts CHANGED
@@ -66,6 +66,8 @@ export interface ExpandedQuery {
66
  hyde: string; // hypothetical document snippet
67
  vec: string[]; // dense retrieval sentences
68
  lex: string; // BM25 keywords
 
 
69
  }
70
 
71
  // Pipeline events for React UI
 
66
  hyde: string; // hypothetical document snippet
67
  vec: string[]; // dense retrieval sentences
68
  lex: string; // BM25 keywords
69
+ source?: "model" | "fallback" | "strong-signal";
70
+ note?: string;
71
  }
72
 
73
  // Pipeline events for React UI