Spaces:

shreyask
/

qmd-web

Running

App Files Files Community

shreyask commited on Mar 12

Commit

eb89325

verified ·

1 Parent(s): bf69e75

feat: align browser demo with qmd pipeline

Browse files

Files changed (15) hide show

src/App.tsx +265 -74
src/components/DocumentManager.tsx +1 -1
src/components/ExpansionColumn.tsx +31 -3
src/components/FusionColumn.tsx +1 -1
src/components/ModelStatus.tsx +31 -5
src/components/PipelineView.tsx +18 -5
src/components/QueryInput.tsx +2 -2
src/constants.ts +9 -0
src/pipeline/blend.test.ts +71 -70
src/pipeline/blend.ts +17 -13
src/pipeline/embeddings.ts +18 -0
src/pipeline/expansion.test.ts +41 -30
src/pipeline/expansion.ts +43 -12
src/pipeline/orchestrator.ts +234 -73
src/types.ts +2 -0

src/App.tsx CHANGED Viewed

@@ -1,8 +1,8 @@
-import { useState, useEffect, useCallback } from 'react';
 import type { Document, Chunk, EmbeddedChunk, ModelState } from './types';
 import { loadAllModels, isAllModelsReady } from './pipeline/models';
 import { chunkDocument, extractTitle } from './pipeline/chunking';
-import { embedDocChunk } from './pipeline/embeddings';
 import { BM25Index } from './pipeline/bm25';
 import { runPipeline } from './pipeline/orchestrator';
 import type { PipelineState } from './components/PipelineView';
@@ -11,7 +11,6 @@ import ModelStatus from './components/ModelStatus';
 import PipelineView from './components/PipelineView';
 import DocumentManager from './components/DocumentManager';
-// Sample doc filenames to load from public/eval-docs/
 const SAMPLE_DOCS = [
   'api-design-principles.md',
   'distributed-systems-overview.md',
@@ -19,6 +18,19 @@ const SAMPLE_DOCS = [
   'history-of-coffee.md',
 ];
 const INITIAL_PIPELINE: PipelineState = {
   expansion: { status: 'idle' },
   search: { status: 'idle' },
@@ -27,6 +39,51 @@ const INITIAL_PIPELINE: PipelineState = {
   blend: { status: 'idle' },
 };
 function App() {
   const [models, setModels] = useState<ModelState[]>([
     { name: 'embedding', status: 'pending', progress: 0 },
@@ -39,87 +96,150 @@ function App() {
   const [bm25Index, setBm25Index] = useState<BM25Index | null>(null);
   const [pipeline, setPipeline] = useState<PipelineState>(INITIAL_PIPELINE);
   const [indexing, setIndexing] = useState(false);
   const [query, setQuery] = useState('');
-  // Load models on mount
   useEffect(() => {
     loadAllModels((state) => {
-      setModels(prev => prev.map(m => m.name === state.name ? state : m));
     }).catch(console.error);
   }, []);
-  // Load sample documents
   useEffect(() => {
     async function loadSampleDocs() {
-      const docs: Document[] = [];
-      for (const filename of SAMPLE_DOCS) {
-        const resp = await fetch(`/eval-docs/${filename}`);
-        const body = await resp.text();
-        const title = extractTitle(body, filename);
-        docs.push({ id: filename, title, body, filepath: filename });
       }
-      setDocuments(docs);
     }
     loadSampleDocs();
   }, []);
-  // When documents change, chunk them and build BM25 index
-  // When embedding model becomes ready, embed the chunks
   useEffect(() => {
-    if (documents.length === 0) return;
-    const allChunks = documents.flatMap(doc => chunkDocument(doc));
-    setChunks(allChunks);
-    setBm25Index(new BM25Index(allChunks));
-    // Check if embedding model is ready for embedding
-    const embeddingReady = models.find(m => m.name === 'embedding')?.status === 'ready';
-    if (embeddingReady && allChunks.length > 0) {
       setIndexing(true);
-      (async () => {
-        const embedded: EmbeddedChunk[] = [];
-        for (const chunk of allChunks) {
-          const embedding = await embedDocChunk(chunk.title, chunk.text);
           embedded.push({ ...chunk, embedding });
         }
-        setEmbeddedChunks(embedded);
-        setIndexing(false);
-      })();
     }
-  }, [documents, models]);
-  // Handle user upload
   const handleUpload = useCallback(async (files: FileList) => {
-    const newDocs: Document[] = [];
-    for (const file of Array.from(files)) {
-      const body = await file.text();
-      const title = extractTitle(body, file.name);
-      newDocs.push({ id: file.name, title, body, filepath: file.name });
-    }
-    setDocuments(prev => [...prev, ...newDocs]);
   }, []);
-  // Handle paste
   const handlePaste = useCallback((text: string, filename: string) => {
     const title = extractTitle(text, filename);
-    setDocuments(prev => [...prev, { id: filename, title, body: text, filepath: filename }]);
   }, []);
-  // Run search pipeline
   const handleSearch = useCallback(async (searchQuery: string) => {
     if (!bm25Index || embeddedChunks.length === 0) return;
     setQuery(searchQuery);
     setPipeline(INITIAL_PIPELINE);
-    const gen = runPipeline({
       query: searchQuery,
       embeddedChunks,
       bm25Index,
     });
-    for await (const event of gen) {
-      setPipeline(prev => ({
         ...prev,
         [event.stage]: {
           status: event.status,
@@ -128,16 +248,12 @@ function App() {
         },
       }));
     }
-  }, [bm25Index, embeddedChunks, chunks]);
   const allReady = isAllModelsReady() && embeddedChunks.length > 0 && !indexing;
-  const [dark, setDark] = useState(() =>
-    document.documentElement.getAttribute('data-theme') === 'dark'
-  );
   const toggleTheme = useCallback(() => {
-    setDark(prev => {
       const next = !prev;
       document.documentElement.setAttribute('data-theme', next ? 'dark' : 'light');
       localStorage.setItem('qmd-theme', next ? 'dark' : 'light');
@@ -146,11 +262,78 @@ function App() {
   }, []);
   return (
-    <div style={{ fontFamily: 'system-ui, -apple-system, sans-serif', maxWidth: 1400, margin: '0 auto', padding: '1rem' }}>
       <header style={{ marginBottom: '1.5rem' }}>
-        <div style={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between' }}>
-          <h1 style={{ margin: 0, fontSize: '1.5rem', color: 'var(--text)' }}>QMD Web Demo</h1>
-          <div style={{ display: 'flex', alignItems: 'center', gap: '0.75rem' }}>
             <a
               href="https://github.com/tobi/qmd"
               target="_blank"
@@ -159,24 +342,25 @@ function App() {
                 fontSize: '0.78rem',
                 color: 'var(--text-secondary)',
                 textDecoration: 'none',
-                padding: '0.3rem 0.6rem',
                 border: '1px solid var(--border)',
-                borderRadius: '5px',
                 fontFamily: 'system-ui, -apple-system, sans-serif',
               }}
-              onMouseEnter={e => { (e.currentTarget as HTMLAnchorElement).style.color = '#4285F4'; }}
-              onMouseLeave={e => { (e.currentTarget as HTMLAnchorElement).style.color = 'var(--text-secondary)'; }}
             >
-              GitHub
             </a>
             <button
               onClick={toggleTheme}
               title={dark ? 'Switch to light mode' : 'Switch to dark mode'}
               style={{
-                background: 'none',
                 border: '1px solid var(--border)',
-                borderRadius: '5px',
-                padding: '0.3rem 0.55rem',
                 cursor: 'pointer',
                 fontSize: '1rem',
                 lineHeight: 1,
@@ -187,22 +371,29 @@ function App() {
             </button>
           </div>
         </div>
-        <p style={{ margin: '0.25rem 0 0', color: 'var(--text-secondary)', fontSize: '0.85rem', lineHeight: 1.5 }}>
-          In-browser hybrid search pipeline running entirely on WebGPU.
-          Three ONNX models (embedding, reranker, query expansion) power a full
-          search stack: query expansion, BM25 + vector search, RRF fusion, and cross-encoder reranking.
-          Built with{' '}
-          <a href="https://github.com/tobi/qmd" target="_blank" rel="noopener noreferrer" style={{ color: '#4285F4', textDecoration: 'none' }}>QMD</a>
-          {' '}and{' '}
-          <a href="https://huggingface.co/docs/transformers.js" target="_blank" rel="noopener noreferrer" style={{ color: '#4285F4', textDecoration: 'none' }}>Transformers.js</a>.
-        </p>
       </header>
       <ModelStatus models={models} />
       {indexing && (
-        <div style={{ padding: '0.5rem 1rem', background: 'var(--indexing-bg)', borderRadius: 6, marginBottom: '1rem', fontSize: '0.85rem', color: 'var(--text)' }}>
-          Indexing documents (embedding chunks)...
         </div>
       )}
@@ -211,7 +402,7 @@ function App() {
       {query && <PipelineView state={pipeline} query={query} />}
       <DocumentManager
-        documents={documents.map(d => ({ id: d.id, title: d.title, filepath: d.filepath }))}
         onUpload={handleUpload}
         onPaste={handlePaste}
       />

+import { useState, useEffect, useCallback, useRef } from 'react';
 import type { Document, Chunk, EmbeddedChunk, ModelState } from './types';
 import { loadAllModels, isAllModelsReady } from './pipeline/models';
 import { chunkDocument, extractTitle } from './pipeline/chunking';
+import { embedDocChunksBatch } from './pipeline/embeddings';
 import { BM25Index } from './pipeline/bm25';
 import { runPipeline } from './pipeline/orchestrator';
 import type { PipelineState } from './components/PipelineView';
 import PipelineView from './components/PipelineView';
 import DocumentManager from './components/DocumentManager';
 const SAMPLE_DOCS = [
   'api-design-principles.md',
   'distributed-systems-overview.md',
   'history-of-coffee.md',
 ];
+const SHOWCASE_CARDS = [
+  {
+    title: 'Faithful to qmd',
+    body: 'BM25, vector search, query expansion, RRF fusion, and reranking follow the upstream retrieval recipe instead of flattening everything into one model call.',
+  },
+  {
+    title: 'Browser-native bits',
+    body: 'Transformers.js and WebGPU run the pipeline locally, cache model weights in the browser, and expose each stage so the search system stays inspectable.',
+  },
+];
+const INDEX_BATCH_SIZE = 8;
 const INITIAL_PIPELINE: PipelineState = {
   expansion: { status: 'idle' },
   search: { status: 'idle' },
   blend: { status: 'idle' },
 };
+function upsertDocuments(current: Document[], incoming: Document[]): Document[] {
+  const merged = new Map(current.map((doc) => [doc.id, doc]));
+  for (const doc of incoming) {
+    merged.set(doc.id, doc);
+  }
+  return [...merged.values()];
+}
+function ShowcaseCard({ title, body }: { title: string; body: string }) {
+  return (
+    <div
+      style={{
+        padding: '0.9rem 1rem',
+        background: 'var(--bg-card)',
+        border: '1px solid var(--border)',
+        borderRadius: '10px',
+        boxShadow: '0 2px 12px var(--shadow)',
+      }}
+    >
+      <div
+        style={{
+          marginBottom: '0.35rem',
+          fontSize: '0.74rem',
+          fontWeight: 700,
+          letterSpacing: '0.08em',
+          textTransform: 'uppercase',
+          color: '#4285F4',
+        }}
+      >
+        {title}
+      </div>
+      <p
+        style={{
+          margin: 0,
+          fontSize: '0.84rem',
+          lineHeight: 1.6,
+          color: 'var(--text-secondary)',
+        }}
+      >
+        {body}
+      </p>
+    </div>
+  );
+}
 function App() {
   const [models, setModels] = useState<ModelState[]>([
     { name: 'embedding', status: 'pending', progress: 0 },
   const [bm25Index, setBm25Index] = useState<BM25Index | null>(null);
   const [pipeline, setPipeline] = useState<PipelineState>(INITIAL_PIPELINE);
   const [indexing, setIndexing] = useState(false);
+  const [indexingProgress, setIndexingProgress] = useState({ completed: 0, total: 0 });
   const [query, setQuery] = useState('');
+  const [dark, setDark] = useState(() =>
+    document.documentElement.getAttribute('data-theme') === 'dark',
+  );
+  const searchRunIdRef = useRef(0);
+  const embeddingReady = models.find((model) => model.name === 'embedding')?.status === 'ready';
   useEffect(() => {
     loadAllModels((state) => {
+      setModels((prev) => prev.map((model) => (
+        model.name === state.name ? state : model
+      )));
     }).catch(console.error);
   }, []);
   useEffect(() => {
     async function loadSampleDocs() {
+      try {
+        const loadedDocs = await Promise.all(
+          SAMPLE_DOCS.map(async (filename) => {
+            const response = await fetch(`/eval-docs/${filename}`);
+            const body = await response.text();
+            const title = extractTitle(body, filename);
+            return { id: filename, title, body, filepath: filename };
+          }),
+        );
+        setDocuments((prev) => upsertDocuments(prev, loadedDocs));
+      } catch (error) {
+        console.error(error);
       }
     }
     loadSampleDocs();
   }, []);
   useEffect(() => {
+    if (documents.length === 0) {
+      setChunks([]);
+      setEmbeddedChunks([]);
+      setBm25Index(null);
+      setIndexing(false);
+      setIndexingProgress({ completed: 0, total: 0 });
+      return;
+    }
+    const nextChunks = documents.flatMap((doc) => chunkDocument(doc));
+    setChunks(nextChunks);
+    setBm25Index(new BM25Index(nextChunks));
+  }, [documents]);
+  useEffect(() => {
+    let cancelled = false;
+    if (!embeddingReady || chunks.length === 0) {
+      setEmbeddedChunks([]);
+      setIndexing(false);
+      setIndexingProgress({ completed: 0, total: chunks.length });
+      return () => {
+        cancelled = true;
+      };
+    }
+    async function embedChunks() {
       setIndexing(true);
+      setIndexingProgress({ completed: 0, total: chunks.length });
+      const embedded: EmbeddedChunk[] = [];
+      for (let i = 0; i < chunks.length; i += INDEX_BATCH_SIZE) {
+        const batch = chunks.slice(i, i + INDEX_BATCH_SIZE);
+        const embeddings = await embedDocChunksBatch(
+          batch.map((chunk) => ({ title: chunk.title, text: chunk.text })),
+        );
+        if (cancelled) return;
+        for (let j = 0; j < batch.length; j++) {
+          const chunk = batch[j];
+          const embedding = embeddings[j];
+          if (!chunk || !embedding) continue;
           embedded.push({ ...chunk, embedding });
         }
+        setIndexingProgress({
+          completed: Math.min(i + batch.length, chunks.length),
+          total: chunks.length,
+        });
+      }
+      if (cancelled) return;
+      setEmbeddedChunks(embedded);
+      setIndexing(false);
     }
+    embedChunks().catch((error) => {
+      if (cancelled) return;
+      console.error(error);
+      setEmbeddedChunks([]);
+      setIndexing(false);
+    });
+    return () => {
+      cancelled = true;
+    };
+  }, [chunks, embeddingReady]);
   const handleUpload = useCallback(async (files: FileList) => {
+    const uploadedDocs = await Promise.all(
+      Array.from(files).map(async (file) => {
+        const body = await file.text();
+        const title = extractTitle(body, file.name);
+        return { id: file.name, title, body, filepath: file.name };
+      }),
+    );
+    setDocuments((prev) => upsertDocuments(prev, uploadedDocs));
   }, []);
   const handlePaste = useCallback((text: string, filename: string) => {
     const title = extractTitle(text, filename);
+    setDocuments((prev) => upsertDocuments(prev, [
+      { id: filename, title, body: text, filepath: filename },
+    ]));
   }, []);
   const handleSearch = useCallback(async (searchQuery: string) => {
     if (!bm25Index || embeddedChunks.length === 0) return;
+    const runId = ++searchRunIdRef.current;
     setQuery(searchQuery);
     setPipeline(INITIAL_PIPELINE);
+    const generator = runPipeline({
       query: searchQuery,
       embeddedChunks,
       bm25Index,
     });
+    for await (const event of generator) {
+      if (searchRunIdRef.current !== runId) return;
+      setPipeline((prev) => ({
         ...prev,
         [event.stage]: {
           status: event.status,
         },
       }));
     }
+  }, [bm25Index, embeddedChunks]);
   const allReady = isAllModelsReady() && embeddedChunks.length > 0 && !indexing;
   const toggleTheme = useCallback(() => {
+    setDark((prev) => {
       const next = !prev;
       document.documentElement.setAttribute('data-theme', next ? 'dark' : 'light');
       localStorage.setItem('qmd-theme', next ? 'dark' : 'light');
   }, []);
   return (
+    <div
+      style={{
+        fontFamily: 'system-ui, -apple-system, sans-serif',
+        maxWidth: 1400,
+        margin: '0 auto',
+        padding: '1.25rem 1rem 2rem',
+      }}
+    >
+      <style>{`
+        .showcase-grid {
+          display: grid;
+          grid-template-columns: repeat(2, minmax(0, 1fr));
+          gap: 0.85rem;
+          margin-top: 1rem;
+        }
+        @media (max-width: 900px) {
+          .showcase-grid {
+            grid-template-columns: 1fr;
+          }
+        }
+      `}</style>
       <header style={{ marginBottom: '1.5rem' }}>
+        <div style={{ display: 'flex', alignItems: 'flex-start', justifyContent: 'space-between', gap: '1rem' }}>
+          <div style={{ flex: 1 }}>
+            <div
+              style={{
+                marginBottom: '0.4rem',
+                fontSize: '0.74rem',
+                fontWeight: 700,
+                letterSpacing: '0.08em',
+                textTransform: 'uppercase',
+                color: '#4285F4',
+              }}
+            >
+              QMD in the browser
+            </div>
+            <h1 style={{ margin: 0, fontSize: '1.7rem', color: 'var(--text)' }}>
+              QMD Web Sandbox
+            </h1>
+            <p style={{ margin: '0.45rem 0 0', color: 'var(--text-secondary)', fontSize: '0.9rem', lineHeight: 1.65, maxWidth: 860 }}>
+              A browser-native sandbox that recreates the core{' '}
+              <a href="https://github.com/tobi/qmd" target="_blank" rel="noopener noreferrer" style={{ color: '#4285F4', textDecoration: 'none' }}>qmd</a>
+              {' '}retrieval pipeline with Transformers.js, while making the local WebGPU execution path visible.
+              Documents are chunked, embedded, searched, fused, reranked, and inspected entirely in the browser.
+            </p>
+            <div
+              style={{
+                marginTop: '0.7rem',
+                display: 'inline-flex',
+                alignItems: 'center',
+                gap: '0.45rem',
+                padding: '0.4rem 0.7rem',
+                borderRadius: '999px',
+                border: '1px solid var(--border)',
+                background: 'var(--bg-card)',
+                color: 'var(--text-secondary)',
+                fontSize: '0.78rem',
+                boxShadow: '0 2px 10px var(--shadow)',
+              }}
+            >
+              <span style={{ color: '#34a853', fontWeight: 700 }}>Browser bits:</span>
+              <span>WebGPU inference</span>
+              <span style={{ color: 'var(--text-muted)' }}>•</span>
+              <span>local model cache</span>
+              <span style={{ color: 'var(--text-muted)' }}>•</span>
+              <span>transparent pipeline</span>
+            </div>
+          </div>
+          <div style={{ display: 'flex', alignItems: 'center', gap: '0.75rem', flexShrink: 0 }}>
             <a
               href="https://github.com/tobi/qmd"
               target="_blank"
                 fontSize: '0.78rem',
                 color: 'var(--text-secondary)',
                 textDecoration: 'none',
+                padding: '0.35rem 0.7rem',
                 border: '1px solid var(--border)',
+                borderRadius: '999px',
                 fontFamily: 'system-ui, -apple-system, sans-serif',
+                background: 'var(--bg-card)',
               }}
+              onMouseEnter={(event) => { event.currentTarget.style.color = '#4285F4'; }}
+              onMouseLeave={(event) => { event.currentTarget.style.color = 'var(--text-secondary)'; }}
             >
+              Original qmd
             </a>
             <button
               onClick={toggleTheme}
               title={dark ? 'Switch to light mode' : 'Switch to dark mode'}
               style={{
+                background: 'var(--bg-card)',
                 border: '1px solid var(--border)',
+                borderRadius: '999px',
+                padding: '0.35rem 0.6rem',
                 cursor: 'pointer',
                 fontSize: '1rem',
                 lineHeight: 1,
             </button>
           </div>
         </div>
+        <div className="showcase-grid">
+          {SHOWCASE_CARDS.map((card) => (
+            <ShowcaseCard key={card.title} title={card.title} body={card.body} />
+          ))}
+        </div>
       </header>
       <ModelStatus models={models} />
       {indexing && (
+        <div
+          style={{
+            padding: '0.6rem 1rem',
+            background: 'var(--indexing-bg)',
+            borderRadius: 8,
+            marginBottom: '1rem',
+            fontSize: '0.84rem',
+            color: 'var(--text)',
+            border: '1px solid var(--border)',
+          }}
+        >
+          Indexing local chunks in the browser ({indexingProgress.completed}/{indexingProgress.total})...
         </div>
       )}
       {query && <PipelineView state={pipeline} query={query} />}
       <DocumentManager
+        documents={documents.map((doc) => ({ id: doc.id, title: doc.title, filepath: doc.filepath }))}
         onUpload={handleUpload}
         onPaste={handlePaste}
       />

src/components/DocumentManager.tsx CHANGED Viewed

@@ -221,7 +221,7 @@ export default function DocumentManager({ documents, onUpload, onPaste }: Docume
       {documents.length === 0 ? (
         <p style={{ fontSize: '0.82rem', color: 'var(--text-muted)', margin: 0 }}>
-          No documents loaded. Upload .md or .txt files, or paste text.
         </p>
       ) : (
         <div style={{ maxHeight: '180px', overflowY: 'auto' }}>

       {documents.length === 0 ? (
         <p style={{ fontSize: '0.82rem', color: 'var(--text-muted)', margin: 0 }}>
+          No documents loaded. Upload .md or .txt files, or paste text. They stay local to this browser session.
         </p>
       ) : (
         <div style={{ maxHeight: '180px', overflowY: 'auto' }}>

src/components/ExpansionColumn.tsx CHANGED Viewed

@@ -61,6 +61,24 @@ function ExpansionCard({ label, content }: { label: string; content: string | st
   );
 }
 export default function ExpansionColumn({ state, info }: ExpansionColumnProps) {
   const isIdle = state.status === 'idle';
   const isRunning = state.status === 'running';
@@ -129,9 +147,19 @@ export default function ExpansionColumn({ state, info }: ExpansionColumnProps) {
       {isDone && state.data && (
         <>
-          <ExpansionCard label="HyDE (Hypothetical Document)" content={state.data.hyde} />
-          <ExpansionCard label="Vec Sentences" content={state.data.vec} />
-          <ExpansionCard label="Lex Keywords" content={state.data.lex} />
         </>
       )}
     </div>

   );
 }
+function ExpansionNote({ text }: { text: string }) {
+  return (
+    <div style={{
+      padding: '0.65rem 0.8rem',
+      marginBottom: '0.6rem',
+      background: 'var(--bg-card)',
+      border: '1px solid var(--border)',
+      borderRadius: '6px',
+      fontFamily: 'system-ui, -apple-system, sans-serif',
+      fontSize: '0.8rem',
+      color: 'var(--text-secondary)',
+      lineHeight: 1.55,
+    }}>
+      {text}
+    </div>
+  );
+}
 export default function ExpansionColumn({ state, info }: ExpansionColumnProps) {
   const isIdle = state.status === 'idle';
   const isRunning = state.status === 'running';
       {isDone && state.data && (
         <>
+          {state.data.note && <ExpansionNote text={state.data.note} />}
+          {state.data.hyde.trim() && (
+            <ExpansionCard label="HyDE (Hypothetical Document)" content={state.data.hyde} />
+          )}
+          {state.data.vec.length > 0 && (
+            <ExpansionCard label="Vec Sentences" content={state.data.vec} />
+          )}
+          {state.data.lex.trim() && (
+            <ExpansionCard label="Lex Keywords" content={state.data.lex} />
+          )}
+          {!state.data.note && !state.data.hyde.trim() && state.data.vec.length === 0 && !state.data.lex.trim() && (
+            <ExpansionNote text="No additional query variants were generated." />
+          )}
         </>
       )}
     </div>

src/components/FusionColumn.tsx CHANGED Viewed

@@ -96,7 +96,7 @@ function RRFRow({ result, rank }: { result: RRFResult; rank: number }) {
 function BeforeAfterComparison({ before, after }: { before: RRFResult[]; after: RerankedResult[] }) {
   const top5before = before.slice(0, 5);
-  const top5after = [...after].sort((a, b) => b.blendedScore - a.blendedScore).slice(0, 5);
   return (
     <div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: '0.5rem' }}>

 function BeforeAfterComparison({ before, after }: { before: RRFResult[]; after: RerankedResult[] }) {
   const top5before = before.slice(0, 5);
+  const top5after = [...after].sort((a, b) => b.rerankScore - a.rerankScore).slice(0, 5);
   return (
     <div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: '0.5rem' }}>

src/components/ModelStatus.tsx CHANGED Viewed

@@ -99,7 +99,11 @@ function ModelRow({ model }: { model: ModelState }) {
 }
 export default function ModelStatus({ models }: ModelStatusProps) {
-  const allReady = models.length > 0 && models.every(m => m.status === 'ready');
   return (
     <div style={{
@@ -126,18 +130,18 @@ export default function ModelStatus({ models }: ModelStatusProps) {
         }}>
           Models
         </h3>
-        {allReady && (
           <span style={{
             fontSize: '0.75rem',
             fontFamily: 'system-ui, -apple-system, sans-serif',
             color: '#388e3c',
             fontWeight: 600,
           }}>
-            All ready
           </span>
         )}
       </div>
-      {!allReady && (
         <p style={{
           margin: '0 0 0.5rem',
           fontSize: '0.75rem',
@@ -145,7 +149,29 @@ export default function ModelStatus({ models }: ModelStatusProps) {
           color: 'var(--text-secondary)',
           lineHeight: 1.4,
         }}>
-          First load downloads ~4 GB of model weights. Subsequent visits use the browser cache.
         </p>
       )}
       {models.map(m => (

 }
 export default function ModelStatus({ models }: ModelStatusProps) {
+  const coreModels = models.filter((model) => model.name !== 'expansion');
+  const expansionModel = models.find((model) => model.name === 'expansion');
+  const coreReady = coreModels.length > 0 && coreModels.every((model) => model.status === 'ready');
+  const expansionReady = expansionModel?.status === 'ready';
+  const expansionUnavailable = expansionModel?.status === 'error';
   return (
     <div style={{
         }}>
           Models
         </h3>
+        {coreReady && (
           <span style={{
             fontSize: '0.75rem',
             fontFamily: 'system-ui, -apple-system, sans-serif',
             color: '#388e3c',
             fontWeight: 600,
           }}>
+            Search ready
           </span>
         )}
       </div>
+      {!coreReady && (
         <p style={{
           margin: '0 0 0.5rem',
           fontSize: '0.75rem',
           color: 'var(--text-secondary)',
           lineHeight: 1.4,
         }}>
+          First load downloads several GB of model weights. Subsequent visits use the browser cache.
+        </p>
+      )}
+      {coreReady && !expansionReady && !expansionUnavailable && (
+        <p style={{
+          margin: '0 0 0.5rem',
+          fontSize: '0.75rem',
+          fontFamily: 'system-ui, -apple-system, sans-serif',
+          color: 'var(--text-secondary)',
+          lineHeight: 1.4,
+        }}>
+          Embedding and reranker are ready. Expansion is optional and will join when it finishes loading.
+        </p>
+      )}
+      {coreReady && expansionUnavailable && (
+        <p style={{
+          margin: '0 0 0.5rem',
+          fontSize: '0.75rem',
+          fontFamily: 'system-ui, -apple-system, sans-serif',
+          color: '#c62828',
+          lineHeight: 1.4,
+        }}>
+          Expansion is optional. Search still works with the original query when the expansion model is unavailable.
         </p>
       )}
       {models.map(m => (

src/components/PipelineView.tsx CHANGED Viewed

@@ -28,19 +28,19 @@ const COLUMNS = [
     label: 'Query Expansion',
     bg: 'var(--col-expansion)',
     headerColor: '#f57f17',
-    info: 'A fine-tuned 1.7B LLM generates three query variants: lexical keywords (lex) for BM25, semantic sentences (vec) for vector search, and a hypothetical document (HyDE) to improve recall.',
   },
   {
     label: 'Parallel Search',
     bg: 'var(--col-search)',
     headerColor: '#00897b',
-    info: 'Two search strategies run simultaneously: BM25 keyword search (exact term matching) and vector similarity search (semantic meaning via embeddings). Each finds relevant document chunks independently.',
   },
   {
     label: 'Fusion & Reranking',
     bg: 'var(--col-fusion)',
     headerColor: '#388e3c',
-    info: 'Results are merged via Reciprocal Rank Fusion (RRF), then a cross-encoder reranker (Qwen3-Reranker-0.6B) re-scores the top candidates for precision. Final scores blend RRF and reranker signals.',
   },
 ];
@@ -161,7 +161,9 @@ export default function PipelineView({ state, query }: PipelineViewProps) {
         }
       `}</style>
-      <div style={{
         display: 'grid',
         gridTemplateColumns: 'minmax(100px, 0.6fr) minmax(120px, 0.8fr) minmax(200px, 1.5fr) minmax(200px, 2fr)',
         gap: '0',
@@ -169,10 +171,12 @@ export default function PipelineView({ state, query }: PipelineViewProps) {
         overflow: 'hidden',
         border: '1px solid var(--border)',
         boxShadow: '0 2px 12px var(--shadow)',
-      }}>
         {COLUMNS.map((col, i) => (
           <div
             key={col.label}
             style={{
               background: col.bg,
               padding: '1rem',
@@ -200,6 +204,15 @@ export default function PipelineView({ state, query }: PipelineViewProps) {
           .pipeline-grid {
             grid-template-columns: 1fr !important;
           }
         }
       `}</style>
     </>

     label: 'Query Expansion',
     bg: 'var(--col-expansion)',
     headerColor: '#f57f17',
+    info: 'A fine-tuned 1.7B LLM can generate lexical keywords (lex), semantic sentences (vec), and a hypothetical document (HyDE). When BM25 already has a strong exact match, expansion is skipped to stay closer to qmd.',
   },
   {
     label: 'Parallel Search',
     bg: 'var(--col-search)',
     headerColor: '#00897b',
+    info: 'The original query always runs through BM25 and vector search. Lex variants route only to BM25, while vec and HyDE variants route to vector search, mirroring qmd’s typed retrieval flow.',
   },
   {
     label: 'Fusion & Reranking',
     bg: 'var(--col-fusion)',
     headerColor: '#388e3c',
+    info: 'Results are merged via Reciprocal Rank Fusion (RRF), then a cross-encoder reranker (Qwen3-Reranker-0.6B) re-scores the top candidates. Final ranking blends reranker confidence with RRF position, not raw retrieval scores.',
   },
 ];
         }
       `}</style>
+      <div
+        className="pipeline-grid"
+        style={{
         display: 'grid',
         gridTemplateColumns: 'minmax(100px, 0.6fr) minmax(120px, 0.8fr) minmax(200px, 1.5fr) minmax(200px, 2fr)',
         gap: '0',
         overflow: 'hidden',
         border: '1px solid var(--border)',
         boxShadow: '0 2px 12px var(--shadow)',
+      }}
+      >
         {COLUMNS.map((col, i) => (
           <div
             key={col.label}
+            className="pipeline-cell"
             style={{
               background: col.bg,
               padding: '1rem',
           .pipeline-grid {
             grid-template-columns: 1fr !important;
           }
+          .pipeline-cell {
+            border-right: none !important;
+            border-bottom: 1px solid var(--border);
+          }
+          .pipeline-cell:last-child {
+            border-bottom: none;
+          }
         }
       `}</style>
     </>

src/components/QueryInput.tsx CHANGED Viewed

@@ -28,7 +28,7 @@ export default function QueryInput({ onSearch, disabled }: QueryInputProps) {
           value={query}
           onChange={e => setQuery(e.target.value)}
           disabled={disabled}
-          placeholder={disabled ? 'Loading models\u2026' : 'Enter a search query\u2026'}
           style={{
             flex: 1,
             padding: '0.6rem 0.9rem',
@@ -66,7 +66,7 @@ export default function QueryInput({ onSearch, disabled }: QueryInputProps) {
       <div style={{ marginTop: '0.6rem', display: 'flex', gap: '0.4rem', flexWrap: 'wrap', alignItems: 'center' }}>
         <span style={{ fontSize: '0.8rem', color: 'var(--text-secondary)', fontFamily: 'system-ui, -apple-system, sans-serif' }}>
-          Examples:
         </span>
         {EXAMPLE_QUERIES.map(q => (
           <button

           value={query}
           onChange={e => setQuery(e.target.value)}
           disabled={disabled}
+          placeholder={disabled ? 'Loading browser models\u2026' : 'Enter a search query\u2026'}
           style={{
             flex: 1,
             padding: '0.6rem 0.9rem',
       <div style={{ marginTop: '0.6rem', display: 'flex', gap: '0.4rem', flexWrap: 'wrap', alignItems: 'center' }}>
         <span style={{ fontSize: '0.8rem', color: 'var(--text-secondary)', fontFamily: 'system-ui, -apple-system, sans-serif' }}>
+          Demo queries:
         </span>
         {EXAMPLE_QUERIES.map(q => (
           <button

src/constants.ts CHANGED Viewed

@@ -11,6 +11,15 @@ export const RRF_SECONDARY_WEIGHT = 1.0;
 export const RRF_RANK1_BONUS = 0.05;
 export const RRF_RANK2_BONUS = 0.02;
 // BM25
 export const BM25_K1 = 1.2;

 export const RRF_RANK1_BONUS = 0.05;
 export const RRF_RANK2_BONUS = 0.02;
+// Strong lexical match detection
+export const STRONG_SIGNAL_MIN_SCORE = 0.85;
+export const STRONG_SIGNAL_MIN_GAP = 0.15;
+// Position-aware blending
+export const BLEND_TOP3_RRF_WEIGHT = 0.75;
+export const BLEND_TOP10_RRF_WEIGHT = 0.6;
+export const BLEND_TAIL_RRF_WEIGHT = 0.4;
 // BM25
 export const BM25_K1 = 1.2;

src/pipeline/blend.test.ts CHANGED Viewed

@@ -1,12 +1,12 @@
 import { describe, it, expect } from "vitest";
 import { blendScores } from "./blend";
 import type { RRFResult } from "../types";
-const RRF_W = 0.8; // must match BLEND_RRF_WEIGHT in blend.ts
-// ---------------------------------------------------------------------------
-// Helpers
-// ---------------------------------------------------------------------------
 function makeRRFResult(
   docId: string,
   score: number,
@@ -22,110 +22,111 @@ function makeRRFResult(
   };
 }
-// ---------------------------------------------------------------------------
-// blendScores
-// ---------------------------------------------------------------------------
 describe("blendScores", () => {
   it("returns empty array for empty input", () => {
     expect(blendScores([], new Map())).toEqual([]);
   });
-  it("applies uniform 70/30 weight to all ranks", () => {
     const rrfResults = [
       makeRRFResult("doc1", 0.5),
       makeRRFResult("doc2", 0.4),
       makeRRFResult("doc3", 0.3),
     ];
-    const rerankScores = new Map([
-      ["doc1", 0.9],
-      ["doc2", 0.8],
-      ["doc3", 0.7],
-    ]);
-    const results = blendScores(rrfResults, rerankScores);
-    // doc1: normalized RRF = 1.0, rerank = 0.9 -> 0.7*1.0 + 0.3*0.9 = 0.97
-    const doc1 = results.find((r) => r.docId === "doc1")!;
-    expect(doc1.score).toBeCloseTo(RRF_W * 1.0 + (1 - RRF_W) * 0.9, 10);
   });
-  it("defaults missing rerank scores to 0", () => {
-    const rrfResults = [makeRRFResult("doc1", 0.5)];
-    const rerankScores = new Map<string, number>();
-    const results = blendScores(rrfResults, rerankScores);
-    // score = 0.7 * 1.0 + 0.3 * 0 = 0.7 (normalized RRF = 1.0 for single result)
-    expect(results[0].score).toBeCloseTo(RRF_W, 10);
   });
-  it("sorts by blended score descending", () => {
     const rrfResults = [
       makeRRFResult("doc1", 0.5),
       makeRRFResult("doc2", 0.4),
       makeRRFResult("doc3", 0.3),
     ];
     const rerankScores = new Map([
-      ["doc1", 0.1],
-      ["doc2", 0.2],
-      ["doc3", 0.99],
     ]);
     const results = blendScores(rrfResults, rerankScores);
-    for (let i = 1; i < results.length; i++) {
-      expect(results[i].score).toBeLessThanOrEqual(results[i - 1].score);
-    }
   });
-  it("can reorder results when reranker disagrees with RRF", () => {
     const rrfResults = [
       makeRRFResult("doc1", 0.5),
-      makeRRFResult("doc2", 0.49),
     ];
     const rerankScores = new Map([
-      ["doc1", 0.0],
-      ["doc2", 1.0],
     ]);
     const results = blendScores(rrfResults, rerankScores);
-    // doc1: 0.7*1.0 + 0.3*0.0 = 0.700
-    // doc2: 0.7*(0.49/0.5) + 0.3*1.0 = 0.686 + 0.3 = 0.986
-    expect(results[0].docId).toBe("doc2");
   });
-  it("preserves filepath, title, bestChunk in final results", () => {
-    const rrfResults = [makeRRFResult("doc1", 0.5, "My Title")];
-    const results = blendScores(rrfResults, new Map());
     expect(results[0].filepath).toBe("doc1");
     expect(results[0].title).toBe("My Title");
     expect(results[0].bestChunk).toBe("chunk from doc1");
     expect(results[0].docId).toBe("doc1");
   });
-  it("deduplicates by docId, keeping highest blended score", () => {
-    const rrfResults = [
-      makeRRFResult("doc1", 0.5),
-      makeRRFResult("doc1", 0.3),
-    ];
-    const rerankScores = new Map([["doc1", 0.8]]);
-    const results = blendScores(rrfResults, rerankScores);
-    expect(results).toHaveLength(1);
-    expect(results[0].docId).toBe("doc1");
-  });
-  it("uniform weight does not cause rank leapfrogging from reranker noise", () => {
-    // The bug: with position-aware weights, rank 4 got 40% reranker weight
-    // while rank 3 got only 25%, causing irrelevant docs to jump up.
-    // With uniform weights, a low-RRF doc needs a very high reranker score to leapfrog.
-    const rrfResults = [
-      makeRRFResult("doc1", 0.12),  // rank 1 — relevant
-      makeRRFResult("doc2", 0.07),  // rank 2 — relevant
-      makeRRFResult("doc3", 0.05),  // rank 3 — relevant
-      makeRRFResult("doc4", 0.047), // rank 4 — irrelevant (Taj Mahal)
-    ];
-    const rerankScores = new Map([
-      ["doc1", 0.0],
-      ["doc2", 0.0],
-      ["doc3", 0.0],
-      ["doc4", 0.66], // noisy reranker gives moderate score to irrelevant doc
-    ]);
-    const results = blendScores(rrfResults, rerankScores);
-    // doc4 should NOT be at position 2
     expect(results[0].docId).toBe("doc1");
-    expect(results[1].docId).not.toBe("doc4");
   });
 });

 import { describe, it, expect } from "vitest";
 import { blendScores } from "./blend";
 import type { RRFResult } from "../types";
+import {
+  BLEND_TAIL_RRF_WEIGHT,
+  BLEND_TOP10_RRF_WEIGHT,
+  BLEND_TOP3_RRF_WEIGHT,
+} from "../constants";
 function makeRRFResult(
   docId: string,
   score: number,
   };
 }
 describe("blendScores", () => {
   it("returns empty array for empty input", () => {
     expect(blendScores([], new Map())).toEqual([]);
   });
+  it("uses the top-3 RRF weight for the highest-ranked documents", () => {
+    const results = blendScores(
+      [makeRRFResult("doc1", 0.5)],
+      new Map([["doc1", 0.9]]),
+    );
+    expect(results[0].score).toBeCloseTo(
+      BLEND_TOP3_RRF_WEIGHT * 1 + (1 - BLEND_TOP3_RRF_WEIGHT) * 0.9,
+      10,
+    );
+  });
+  it("falls back to rank-only RRF when rerank scores are missing", () => {
+    const results = blendScores(
+      [makeRRFResult("doc1", 0.5), makeRRFResult("doc2", 0.4)],
+      new Map<string, number>(),
+    );
+    expect(results[0].score).toBeCloseTo(BLEND_TOP3_RRF_WEIGHT * 1, 10);
+    expect(results[1].score).toBeCloseTo(BLEND_TOP3_RRF_WEIGHT * 0.5, 10);
+  });
+  it("switches to the top-10 weight after rank 3", () => {
     const rrfResults = [
       makeRRFResult("doc1", 0.5),
       makeRRFResult("doc2", 0.4),
       makeRRFResult("doc3", 0.3),
+      makeRRFResult("doc4", 0.2),
     ];
+    const results = blendScores(rrfResults, new Map());
+    const doc4 = results.find((result) => result.docId === "doc4")!;
+    expect(doc4.score).toBeCloseTo(BLEND_TOP10_RRF_WEIGHT * 0.25, 10);
   });
+  it("uses the tail weight after rank 10", () => {
+    const rrfResults = Array.from({ length: 11 }, (_, index) =>
+      makeRRFResult(`doc${index + 1}`, 1 - index * 0.01),
+    );
+    const results = blendScores(rrfResults, new Map());
+    const tailDoc = results.find((result) => result.docId === "doc11")!;
+    expect(tailDoc.score).toBeCloseTo(BLEND_TAIL_RRF_WEIGHT * (1 / 11), 10);
   });
+  it("can reorder results when reranker strongly disagrees", () => {
     const rrfResults = [
       makeRRFResult("doc1", 0.5),
       makeRRFResult("doc2", 0.4),
       makeRRFResult("doc3", 0.3),
+      makeRRFResult("doc4", 0.2),
     ];
     const rerankScores = new Map([
+      ["doc1", 0],
+      ["doc2", 0],
+      ["doc3", 0],
+      ["doc4", 1],
     ]);
     const results = blendScores(rrfResults, rerankScores);
+    expect(results[0].docId).toBe("doc1");
+    expect(results[1].docId).toBe("doc4");
   });
+  it("sorts final results by blended score descending", () => {
     const rrfResults = [
       makeRRFResult("doc1", 0.5),
+      makeRRFResult("doc2", 0.4),
+      makeRRFResult("doc3", 0.3),
     ];
     const rerankScores = new Map([
+      ["doc1", 0.2],
+      ["doc2", 0.8],
+      ["doc3", 0.1],
     ]);
     const results = blendScores(rrfResults, rerankScores);
+    for (let i = 1; i < results.length; i++) {
+      expect(results[i].score).toBeLessThanOrEqual(results[i - 1].score);
+    }
   });
+  it("preserves filepath, title, bestChunk, and docId", () => {
+    const results = blendScores([makeRRFResult("doc1", 0.5, "My Title")], new Map());
     expect(results[0].filepath).toBe("doc1");
     expect(results[0].title).toBe("My Title");
     expect(results[0].bestChunk).toBe("chunk from doc1");
     expect(results[0].docId).toBe("doc1");
   });
+  it("deduplicates by docId, keeping the highest blended score", () => {
+    const results = blendScores(
+      [makeRRFResult("doc1", 0.5), makeRRFResult("doc1", 0.3)],
+      new Map([["doc1", 0.8]]),
+    );
+    expect(results).toHaveLength(1);
     expect(results[0].docId).toBe("doc1");
   });
 });

src/pipeline/blend.ts CHANGED Viewed

@@ -1,24 +1,28 @@
 import type { RRFResult, RerankedResult, FinalResult } from "../types";
-const BLEND_RRF_WEIGHT = 0.8; // uniform 80% RRF / 20% reranker
-// Blend RRF score with reranker score using uniform weights.
-// Browser-sized reranker models are noisy — position-aware weights
-// (which gave tail ranks MORE reranker influence) caused irrelevant
-// docs to leapfrog relevant ones.
 export function blendScores(
   rrfResults: RRFResult[],
   rerankScores: Map<string, number>, // docId -> rerank score
 ): FinalResult[] {
-  // Normalize RRF scores to [0,1] range so they're comparable with reranker scores (0-1)
-  const maxRRF = Math.max(...rrfResults.map(r => r.score), 1e-9);
-  const blended: RerankedResult[] = rrfResults.map((result) => {
     const rerankScore = rerankScores.get(result.docId) ?? 0;
-    const normalizedRRF = result.score / maxRRF;
     const blendedScore =
-      BLEND_RRF_WEIGHT * normalizedRRF + (1 - BLEND_RRF_WEIGHT) * rerankScore;
     return {
       ...result,

 import type { RRFResult, RerankedResult, FinalResult } from "../types";
+import {
+  BLEND_TAIL_RRF_WEIGHT,
+  BLEND_TOP10_RRF_WEIGHT,
+  BLEND_TOP3_RRF_WEIGHT,
+} from "../constants";
+function getRrfWeight(rank: number): number {
+  if (rank <= 3) return BLEND_TOP3_RRF_WEIGHT;
+  if (rank <= 10) return BLEND_TOP10_RRF_WEIGHT;
+  return BLEND_TAIL_RRF_WEIGHT;
+}
+// Blend RRF rank position with reranker score using qmd's position-aware weights.
 export function blendScores(
   rrfResults: RRFResult[],
   rerankScores: Map<string, number>, // docId -> rerank score
 ): FinalResult[] {
+  const blended: RerankedResult[] = rrfResults.map((result, index) => {
+    const rank = index + 1;
+    const rrfWeight = getRrfWeight(rank);
+    const positionScore = 1 / rank;
     const rerankScore = rerankScores.get(result.docId) ?? 0;
     const blendedScore =
+      rrfWeight * positionScore + (1 - rrfWeight) * rerankScore;
     return {
       ...result,

src/pipeline/embeddings.ts CHANGED Viewed

@@ -25,6 +25,15 @@ export async function embedBatch(
   return output.tolist().map((arr: number[]) => new Float32Array(arr));
 }
 /**
  * Embed a document chunk using the doc template.
  * Uses the doc template: "title: {title} | text: {body}"
@@ -39,3 +48,12 @@ export async function embedDocChunk(
   const output = await pipe(text, { pooling: "mean", normalize: true });
   return new Float32Array(output.tolist()[0]);
 }

   return output.tolist().map((arr: number[]) => new Float32Array(arr));
 }
+/**
+ * Embed multiple query strings using the query template.
+ */
+export async function embedQueries(
+  queries: string[],
+): Promise<Float32Array[]> {
+  return embedBatch(queries.map(EMBED_QUERY_TEMPLATE));
+}
 /**
  * Embed a document chunk using the doc template.
  * Uses the doc template: "title: {title} | text: {body}"
   const output = await pipe(text, { pooling: "mean", normalize: true });
   return new Float32Array(output.tolist()[0]);
 }
+/**
+ * Embed multiple document chunks using the document template.
+ */
+export async function embedDocChunksBatch(
+  chunks: Array<{ title: string; text: string }>,
+): Promise<Float32Array[]> {
+  return embedBatch(chunks.map((chunk) => EMBED_DOC_TEMPLATE(chunk.title, chunk.text)));
+}

src/pipeline/expansion.test.ts CHANGED Viewed

@@ -9,47 +9,45 @@ const QUERY = "test query";
 describe("parseExpansionOutput", () => {
   it("parses all three variants from well-formed output", () => {
     const text = [
-      "lex: sqlite, fts5, full-text search",
-      "vec: How to perform full-text search using SQLite FTS5",
-      "hyde: This document explains the SQLite FTS5 extension for full-text search indexing.",
     ].join("\n");
     const result = parseExpansionOutput(text, QUERY);
-    expect(result.lex).toBe("sqlite, fts5, full-text search");
     expect(result.vec).toEqual([
-      "How to perform full-text search using SQLite FTS5",
     ]);
-    expect(result.hyde).toBe(
-      "This document explains the SQLite FTS5 extension for full-text search indexing.",
-    );
   });
   it("handles multiple vec lines", () => {
     const text = [
-      "lex: embeddings, vector search",
-      "vec: Generating vector embeddings for semantic search",
-      "vec: Using cosine similarity to find related documents",
-      "hyde: This guide covers the creation of vector embeddings.",
     ].join("\n");
     const result = parseExpansionOutput(text, QUERY);
     expect(result.vec).toEqual([
-      "Generating vector embeddings for semantic search",
-      "Using cosine similarity to find related documents",
     ]);
   });
   it("is case-insensitive for prefixes", () => {
     const text = [
-      "LEX: uppercase keywords",
-      "Vec: Mixed case sentence",
-      "HYDE: All caps hypothetical document",
     ].join("\n");
     const result = parseExpansionOutput(text, QUERY);
-    expect(result.lex).toBe("uppercase keywords");
-    expect(result.vec).toEqual(["Mixed case sentence"]);
-    expect(result.hyde).toBe("All caps hypothetical document");
   });
   it("falls back to query when no prefixes are found", () => {
@@ -61,9 +59,9 @@ describe("parseExpansionOutput", () => {
   });
   it("uses query for missing variants", () => {
-    const text = "lex: only keywords here";
     const result = parseExpansionOutput(text, QUERY);
-    expect(result.lex).toBe("only keywords here");
     expect(result.vec).toEqual([QUERY]);
     expect(result.hyde).toBe(`Information about ${QUERY}`);
   });
@@ -71,24 +69,37 @@ describe("parseExpansionOutput", () => {
   it("handles extra whitespace and blank lines", () => {
     const text = [
       "",
-      "  lex:  spaced keywords  ",
       "",
-      "  vec:  spaced sentence  ",
-      "  hyde:  spaced hypothetical  ",
       "",
     ].join("\n");
     const result = parseExpansionOutput(text, QUERY);
-    expect(result.lex).toBe("spaced keywords");
-    expect(result.vec).toEqual(["spaced sentence"]);
-    expect(result.hyde).toBe("spaced hypothetical");
   });
   it("handles only vec present", () => {
-    const text = "vec: just a vector sentence";
     const result = parseExpansionOutput(text, QUERY);
-    expect(result.vec).toEqual(["just a vector sentence"]);
     expect(result.lex).toBe(QUERY);
     expect(result.hyde).toBe(`Information about ${QUERY}`);
   });
 });

 describe("parseExpansionOutput", () => {
   it("parses all three variants from well-formed output", () => {
     const text = [
+      "lex: test query keywords",
+      "vec: How to improve results for a test query",
+      "hyde: This document explains how to answer a test query well.",
     ].join("\n");
     const result = parseExpansionOutput(text, QUERY);
+    expect(result.lex).toBe("test query keywords");
     expect(result.vec).toEqual([
+      "How to improve results for a test query",
     ]);
+    expect(result.hyde).toBe("This document explains how to answer a test query well.");
   });
   it("handles multiple vec lines", () => {
     const text = [
+      "lex: test query embeddings",
+      "vec: Generating vector embeddings for a test query",
+      "vec: Using cosine similarity to answer the query better",
+      "hyde: This guide covers the creation of test query embeddings.",
     ].join("\n");
     const result = parseExpansionOutput(text, QUERY);
     expect(result.vec).toEqual([
+      "Generating vector embeddings for a test query",
+      "Using cosine similarity to answer the query better",
     ]);
   });
   it("is case-insensitive for prefixes", () => {
     const text = [
+      "LEX: uppercase test keywords",
+      "Vec: Mixed case query sentence",
+      "HYDE: All caps test hypothetical document",
     ].join("\n");
     const result = parseExpansionOutput(text, QUERY);
+    expect(result.lex).toBe("uppercase test keywords");
+    expect(result.vec).toEqual(["Mixed case query sentence"]);
+    expect(result.hyde).toBe("All caps test hypothetical document");
   });
   it("falls back to query when no prefixes are found", () => {
   });
   it("uses query for missing variants", () => {
+    const text = "lex: only test query keywords here";
     const result = parseExpansionOutput(text, QUERY);
+    expect(result.lex).toBe("only test query keywords here");
     expect(result.vec).toEqual([QUERY]);
     expect(result.hyde).toBe(`Information about ${QUERY}`);
   });
   it("handles extra whitespace and blank lines", () => {
     const text = [
       "",
+      "  lex:  spaced test keywords  ",
       "",
+      "  vec:  spaced query sentence  ",
+      "  hyde:  spaced test hypothetical  ",
       "",
     ].join("\n");
     const result = parseExpansionOutput(text, QUERY);
+    expect(result.lex).toBe("spaced test keywords");
+    expect(result.vec).toEqual(["spaced query sentence"]);
+    expect(result.hyde).toBe("spaced test hypothetical");
   });
   it("handles only vec present", () => {
+    const text = "vec: just a test query vector sentence";
     const result = parseExpansionOutput(text, QUERY);
+    expect(result.vec).toEqual(["just a test query vector sentence"]);
     expect(result.lex).toBe(QUERY);
     expect(result.hyde).toBe(`Information about ${QUERY}`);
   });
+  it("filters expansion lines that drift away from the original query", () => {
+    const text = [
+      "lex: unrelated cooking keywords",
+      "vec: another unrelated sentence",
+      "hyde: test query overview with useful detail",
+    ].join("\n");
+    const result = parseExpansionOutput(text, QUERY);
+    expect(result.lex).toBe(QUERY);
+    expect(result.vec).toEqual([QUERY]);
+    expect(result.hyde).toBe("test query overview with useful detail");
+  });
 });

src/pipeline/expansion.ts CHANGED Viewed

@@ -1,4 +1,8 @@
-import { Tensor } from "@huggingface/transformers";
 import { getExpansionModel, getExpansionTokenizer } from "./models";
 import type { ExpandedQuery } from "../types";
@@ -17,6 +21,14 @@ const TOP_P = 0.8;
 // Parse the model's output text into structured ExpandedQuery
 function parseExpansionOutput(text: string, query: string): ExpandedQuery {
   const lines = text.trim().split("\n");
   let lex = "";
   const vec: string[] = [];
   let hyde = "";
@@ -24,11 +36,14 @@ function parseExpansionOutput(text: string, query: string): ExpandedQuery {
   for (const line of lines) {
     const trimmed = line.trim();
     if (trimmed.toLowerCase().startsWith("lex:")) {
-      lex = trimmed.slice(4).trim();
     } else if (trimmed.toLowerCase().startsWith("vec:")) {
-      vec.push(trimmed.slice(4).trim());
     } else if (trimmed.toLowerCase().startsWith("hyde:")) {
-      hyde = trimmed.slice(5).trim();
     }
   }
@@ -56,7 +71,7 @@ function sampleToken(logitsData: Float32Array | Float64Array, vocabSize: number)
   const topKIndices = indices.slice(0, TOP_K);
   // Softmax over top-k
-  let maxLogit = scaled[topKIndices[0]];
   const exps = topKIndices.map(i => Math.exp(scaled[i] - maxLogit));
   const sumExp = exps.reduce((a, b) => a + b, 0);
   const probs = exps.map(e => e / sumExp);
@@ -89,10 +104,10 @@ function sampleToken(logitsData: Float32Array | Float64Array, vocabSize: number)
 // Manual autoregressive generation — bypasses Transformers.js generate()
 // which breaks when the ONNX model was exported without KV cache tensors.
 async function manualGenerate(
-  model: any,
   inputIds: bigint[],
   eosTokenId: number,
-  tokenizer: any,
 ): Promise<bigint[]> {
   const generated: bigint[] = [...inputIds];
   const newTokens: bigint[] = [];
@@ -101,7 +116,15 @@ async function manualGenerate(
     const idsTensor = new Tensor("int64", BigInt64Array.from(generated), [1, generated.length]);
     const maskTensor = new Tensor("int64", new BigInt64Array(generated.length).fill(1n), [1, generated.length]);
-    const output = await model({ input_ids: idsTensor, attention_mask: maskTensor });
     const logits = output.logits;
     const vocabSize = logits.dims[2];
@@ -154,12 +177,20 @@ export async function expandQuery(query: string): Promise<ExpandedQuery> {
     [{ role: "user", content: prompt }],
     { add_generation_prompt: true, tokenize: false },
   ) as string;
-  const inputs = tokenizer(chatPrompt, { return_tensor: true });
   const inputIds = Array.from(inputs.input_ids.data as BigInt64Array);
-  const eosTokenId = (tokenizer as any).model?.config?.eos_token_id
-    ?? (tokenizer as any).eos_token_id
-    ?? 151643; // Qwen default
   const allIds = await manualGenerate(model, inputIds, eosTokenId, tokenizer);

+import {
+  Tensor,
+  type PreTrainedModel,
+  type PreTrainedTokenizer,
+} from "@huggingface/transformers";
 import { getExpansionModel, getExpansionTokenizer } from "./models";
 import type { ExpandedQuery } from "../types";
 // Parse the model's output text into structured ExpandedQuery
 function parseExpansionOutput(text: string, query: string): ExpandedQuery {
   const lines = text.trim().split("\n");
+  const queryTerms = query
+    .toLowerCase()
+    .replace(/[^a-z0-9\s]/g, " ")
+    .split(/\s+/)
+    .filter(Boolean);
+  const hasQueryTerm = (value: string) =>
+    queryTerms.length === 0 ||
+    queryTerms.some((term) => value.toLowerCase().includes(term));
   let lex = "";
   const vec: string[] = [];
   let hyde = "";
   for (const line of lines) {
     const trimmed = line.trim();
     if (trimmed.toLowerCase().startsWith("lex:")) {
+      const value = trimmed.slice(4).trim();
+      if (value && hasQueryTerm(value)) lex = value;
     } else if (trimmed.toLowerCase().startsWith("vec:")) {
+      const value = trimmed.slice(4).trim();
+      if (value && hasQueryTerm(value)) vec.push(value);
     } else if (trimmed.toLowerCase().startsWith("hyde:")) {
+      const value = trimmed.slice(5).trim();
+      if (value && hasQueryTerm(value)) hyde = value;
     }
   }
   const topKIndices = indices.slice(0, TOP_K);
   // Softmax over top-k
+  const maxLogit = scaled[topKIndices[0]];
   const exps = topKIndices.map(i => Math.exp(scaled[i] - maxLogit));
   const sumExp = exps.reduce((a, b) => a + b, 0);
   const probs = exps.map(e => e / sumExp);
 // Manual autoregressive generation — bypasses Transformers.js generate()
 // which breaks when the ONNX model was exported without KV cache tensors.
 async function manualGenerate(
+  model: PreTrainedModel,
   inputIds: bigint[],
   eosTokenId: number,
+  tokenizer: PreTrainedTokenizer,
 ): Promise<bigint[]> {
   const generated: bigint[] = [...inputIds];
   const newTokens: bigint[] = [];
     const idsTensor = new Tensor("int64", BigInt64Array.from(generated), [1, generated.length]);
     const maskTensor = new Tensor("int64", new BigInt64Array(generated.length).fill(1n), [1, generated.length]);
+    const output = await model({
+      input_ids: idsTensor,
+      attention_mask: maskTensor,
+    }) as {
+      logits: {
+        dims: number[];
+        data: Float32Array | Float64Array;
+      };
+    };
     const logits = output.logits;
     const vocabSize = logits.dims[2];
     [{ role: "user", content: prompt }],
     { add_generation_prompt: true, tokenize: false },
   ) as string;
+  const inputs = tokenizer(chatPrompt, {
+    return_tensor: true,
+  }) as { input_ids: Tensor };
   const inputIds = Array.from(inputs.input_ids.data as BigInt64Array);
+  const tokenizerWithConfig = tokenizer as PreTrainedTokenizer & {
+    model?: { config?: { eos_token_id?: number | number[] } };
+    eos_token_id?: number | number[];
+  };
+  const eosTokenValue =
+    tokenizerWithConfig.model?.config?.eos_token_id ??
+    tokenizerWithConfig.eos_token_id;
+  const eosTokenId =
+    Array.isArray(eosTokenValue) ? eosTokenValue[0] ?? 151643 : eosTokenValue ?? 151643;
   const allIds = await manualGenerate(model, inputIds, eosTokenId, tokenizer);

src/pipeline/orchestrator.ts CHANGED Viewed

@@ -1,11 +1,16 @@
 import type {
   EmbeddedChunk,
-  ScoredChunk,
   ExpandedQuery,
   PipelineEvent,
 } from "../types";
 import { expandQuery } from "./expansion";
-import { embedQuery } from "./embeddings";
 import { vectorSearch } from "./vectorSearch";
 import { BM25Index } from "./bm25";
 import { reciprocalRankFusion } from "./rrf";
@@ -19,106 +24,262 @@ export interface PipelineInput {
   bm25Index: BM25Index;
 }
 export async function* runPipeline(
   input: PipelineInput,
 ): AsyncGenerator<PipelineEvent> {
   const { query, embeddedChunks, bm25Index } = input;
-  // ── Stage 1: Query Expansion ──
-  let expanded: ExpandedQuery;
-  if (isExpansionReady()) {
-    yield { stage: "expansion", status: "running" };
-    try {
-      expanded = await expandQuery(query);
-      yield { stage: "expansion", status: "done", data: expanded };
-    } catch (err) {
-      // Fallback: use original query as all variants
-      expanded = { lex: query, vec: [query], hyde: query };
-      yield { stage: "expansion", status: "error", error: String(err) };
     }
-  } else {
-    // Expansion model not loaded — use original query as all variants
-    expanded = { lex: query, vec: [query], hyde: query };
-    yield { stage: "expansion", status: "error", error: "Expansion model not loaded — using original query" };
   }
-  // ── Stage 2: Parallel Search ──
-  yield { stage: "search", status: "running" };
-  // BM25 searches:
-  //   1. Original query -> BM25 (dashed line in architecture diagram)
-  //   2. Lex keywords -> BM25
-  //   3. Vec sentences -> BM25
-  const bm25Original = bm25Index.search(query, 20);
-  const bm25Lex = bm25Index.search(expanded.lex, 20);
-  const bm25Vec = expanded.vec.flatMap((v) => bm25Index.search(v, 20));
-  // Vector searches:
-  //   1. HyDE -> Vector Search
-  //   2. Vec sentences -> Vector Search
-  const hydeEmbedding = await embedQuery(expanded.hyde);
-  const vecEmbeddings = await Promise.all(
-    expanded.vec.map((v) => embedQuery(v)),
-  );
-  const vecHyde = vectorSearch(hydeEmbedding, embeddedChunks, 20);
-  const vecVec = vecEmbeddings.flatMap((emb) =>
-    vectorSearch(emb, embeddedChunks, 20),
-  );
-  // Combine all hits for UI display
-  const allBm25: ScoredChunk[] = [...bm25Original, ...bm25Lex, ...bm25Vec];
-  const allVector: ScoredChunk[] = [...vecHyde, ...vecVec];
   yield {
     stage: "search",
     status: "done",
-    data: { bm25Hits: allBm25, vectorHits: allVector },
   };
-  // ── Stage 3: RRF Fusion ──
-  // Build ranked lists for RRF (order matters for weights: first 2 get 2x)
-  const rrfLists = [
-    { results: bm25Original, queryType: "original" as const, query },
-    { results: vecHyde, queryType: "hyde" as const, query: expanded.hyde },
-    { results: bm25Lex, queryType: "lex" as const, query: expanded.lex },
-    ...expanded.vec.map((v, i) => ({
-      results: vecVec.slice(i * 20, (i + 1) * 20),
-      queryType: "vec" as const,
-      query: v,
-    })),
-    ...expanded.vec.map((v, i) => ({
-      results: bm25Vec.slice(i * 20, (i + 1) * 20),
-      queryType: "vec" as const,
-      query: v,
-    })),
-  ];
-  const rrfResults = reciprocalRankFusion(rrfLists);
-  yield { stage: "rrf", status: "done", data: { merged: rrfResults } };
-  // ── Stage 4: Reranking ──
   yield { stage: "rerank", status: "running" };
   const rerankScores = new Map<string, number>();
-  for (const result of rrfResults) {
     const score = await scoreDocument(query, result.bestChunk);
     rerankScores.set(result.docId, score);
   }
-  // Build reranked view for "before/after" visualization
-  const rerankedResults = rrfResults.map((r) => ({
-    ...r,
-    rerankScore: rerankScores.get(r.docId) ?? 0,
-    blendedScore: 0, // computed in blend step
   }));
   yield {
     stage: "rerank",
     status: "done",
-    data: { before: rrfResults, after: rerankedResults },
   };
-  // ── Stage 5: Score Blending ──
-  const finalResults = blendScores(rrfResults, rerankScores);
   yield { stage: "blend", status: "done", data: { finalResults } };
 }

 import type {
   EmbeddedChunk,
   ExpandedQuery,
   PipelineEvent,
+  RRFResult,
+  ScoredChunk,
 } from "../types";
+import {
+  STRONG_SIGNAL_MIN_GAP,
+  STRONG_SIGNAL_MIN_SCORE,
+} from "../constants";
 import { expandQuery } from "./expansion";
+import { embedQueries } from "./embeddings";
 import { vectorSearch } from "./vectorSearch";
 import { BM25Index } from "./bm25";
 import { reciprocalRankFusion } from "./rrf";
   bm25Index: BM25Index;
 }
+interface RankedList {
+  results: ScoredChunk[];
+  queryType: "original" | "lex" | "vec" | "hyde";
+  query: string;
+}
+interface VectorQuery {
+  text: string;
+  queryType: "original" | "vec" | "hyde";
+}
+const SEARCH_LIMIT = 20;
+function normalizeBm25Score(score: number): number {
+  if (score <= 0) return 0;
+  return score / (1 + score);
+}
+function hasStrongBm25Signal(results: ScoredChunk[]): boolean {
+  const topScore = normalizeBm25Score(results[0]?.score ?? 0);
+  const secondScore = normalizeBm25Score(results[1]?.score ?? 0);
+  return (
+    results.length > 0 &&
+    topScore >= STRONG_SIGNAL_MIN_SCORE &&
+    topScore - secondScore >= STRONG_SIGNAL_MIN_GAP
+  );
+}
+function extractQueryTerms(query: string): string[] {
+  return [...new Set(
+    query
+      .toLowerCase()
+      .split(/\s+/)
+      .map((term) => term.replace(/^[^a-z0-9]+|[^a-z0-9]+$/g, ""))
+      .filter((term) => term.length > 2),
+  )];
+}
+function buildChunkLookup(
+  chunks: EmbeddedChunk[],
+): Map<string, EmbeddedChunk[]> {
+  const byDoc = new Map<string, EmbeddedChunk[]>();
+  for (const chunk of chunks) {
+    const existing = byDoc.get(chunk.docId);
+    if (existing) {
+      existing.push(chunk);
+    } else {
+      byDoc.set(chunk.docId, [chunk]);
+    }
+  }
+  for (const docChunks of byDoc.values()) {
+    docChunks.sort((a, b) => a.chunkIndex - b.chunkIndex);
+  }
+  return byDoc;
+}
+function selectBestChunkForRerank(
+  query: string,
+  docChunks: EmbeddedChunk[],
+): string {
+  if (docChunks.length === 0) return "";
+  const queryTerms = extractQueryTerms(query);
+  if (queryTerms.length === 0) return docChunks[0].text;
+  let bestChunk = docChunks[0];
+  let bestScore = -1;
+  for (const chunk of docChunks) {
+    const chunkLower = chunk.text.toLowerCase();
+    const overlap = queryTerms.reduce(
+      (score, term) => score + (chunkLower.includes(term) ? 1 : 0),
+      0,
+    );
+    if (overlap > bestScore) {
+      bestChunk = chunk;
+      bestScore = overlap;
+    }
+  }
+  return bestChunk.text;
+}
+function assignRerankChunks(
+  query: string,
+  rrfResults: RRFResult[],
+  embeddedChunks: EmbeddedChunk[],
+): RRFResult[] {
+  const chunksByDoc = buildChunkLookup(embeddedChunks);
+  return rrfResults.map((result) => {
+    const bestChunk = selectBestChunkForRerank(
+      query,
+      chunksByDoc.get(result.docId) ?? [],
+    );
+    return {
+      ...result,
+      bestChunk: bestChunk || result.bestChunk,
+    };
+  });
+}
+async function resolveExpansion(query: string, initialFts: ScoredChunk[]): Promise<ExpandedQuery> {
+  if (hasStrongBm25Signal(initialFts)) {
+    return {
+      lex: "",
+      vec: [],
+      hyde: "",
+      source: "strong-signal",
+      note: "Strong BM25 match detected, so expansion was skipped.",
+    };
+  }
+  if (!isExpansionReady()) {
+    return {
+      lex: "",
+      vec: [],
+      hyde: "",
+      source: "fallback",
+      note: "Expansion model unavailable, so the pipeline used the original query only.",
+    };
+  }
+  try {
+    return {
+      ...(await expandQuery(query)),
+      source: "model",
+    };
+  } catch (err) {
+    return {
+      lex: "",
+      vec: [],
+      hyde: "",
+      source: "fallback",
+      note: `Expansion failed, so the pipeline used the original query only. ${String(err)}`,
+    };
+  }
+}
 export async function* runPipeline(
   input: PipelineInput,
 ): AsyncGenerator<PipelineEvent> {
   const { query, embeddedChunks, bm25Index } = input;
+  // Step 1: initial lexical probe for strong exact-match signals.
+  const initialFts = bm25Index.search(query, SEARCH_LIMIT);
+  const strongSignal = hasStrongBm25Signal(initialFts);
+  // Step 2: expansion (or intentional skip/fallback).
+  yield { stage: "expansion", status: "running" };
+  const expanded = await resolveExpansion(query, initialFts);
+  yield { stage: "expansion", status: "done", data: expanded };
+  // Step 3: route lexical and vector searches by query type.
+  yield { stage: "search", status: "running" };
+  const rankedLists: RankedList[] = [];
+  const bm25Hits: ScoredChunk[] = [...initialFts];
+  const vectorHits: ScoredChunk[] = [];
+  if (initialFts.length > 0) {
+    rankedLists.push({
+      results: initialFts,
+      queryType: "original",
+      query,
+    });
+  }
+  if (!strongSignal && expanded.lex.trim()) {
+    const lexicalExpansionHits = bm25Index.search(expanded.lex, SEARCH_LIMIT);
+    if (lexicalExpansionHits.length > 0) {
+      bm25Hits.push(...lexicalExpansionHits);
+      rankedLists.push({
+        results: lexicalExpansionHits,
+        queryType: "lex",
+        query: expanded.lex,
+      });
     }
   }
+  const vectorQueries: VectorQuery[] = [{ text: query, queryType: "original" }];
+  if (!strongSignal) {
+    for (const vecQuery of expanded.vec) {
+      if (vecQuery.trim()) {
+        vectorQueries.push({ text: vecQuery, queryType: "vec" });
+      }
+    }
+    if (expanded.hyde.trim()) {
+      vectorQueries.push({ text: expanded.hyde, queryType: "hyde" });
+    }
+  }
+  const queryEmbeddings = await embedQueries(vectorQueries.map((entry) => entry.text));
+  for (let i = 0; i < vectorQueries.length; i++) {
+    const queryEntry = vectorQueries[i];
+    const embedding = queryEmbeddings[i];
+    if (!queryEntry || !embedding) continue;
+    const hits = vectorSearch(embedding, embeddedChunks, SEARCH_LIMIT);
+    if (hits.length === 0) continue;
+    vectorHits.push(...hits);
+    rankedLists.push({
+      results: hits,
+      queryType: queryEntry.queryType,
+      query: queryEntry.text,
+    });
+  }
   yield {
     stage: "search",
     status: "done",
+    data: { bm25Hits, vectorHits },
   };
+  // Step 4: RRF fusion over all retrieval lists.
+  const fusedResults = reciprocalRankFusion(rankedLists);
+  const rerankCandidates = assignRerankChunks(query, fusedResults, embeddedChunks);
+  yield { stage: "rrf", status: "done", data: { merged: rerankCandidates } };
+  if (rerankCandidates.length === 0) {
+    yield {
+      stage: "rerank",
+      status: "done",
+      data: { before: [], after: [] },
+    };
+    yield {
+      stage: "blend",
+      status: "done",
+      data: { finalResults: [] },
+    };
+    return;
+  }
+  // Step 5: rerank the best chunk per document.
   yield { stage: "rerank", status: "running" };
   const rerankScores = new Map<string, number>();
+  for (const result of rerankCandidates) {
     const score = await scoreDocument(query, result.bestChunk);
     rerankScores.set(result.docId, score);
   }
+  const rerankedResults = rerankCandidates.map((result) => ({
+    ...result,
+    rerankScore: rerankScores.get(result.docId) ?? 0,
+    blendedScore: 0,
   }));
   yield {
     stage: "rerank",
     status: "done",
+    data: { before: rerankCandidates, after: rerankedResults },
   };
+  // Step 6: blend retrieval position with reranker score for final ordering.
+  const finalResults = blendScores(rerankCandidates, rerankScores);
   yield { stage: "blend", status: "done", data: { finalResults } };
 }

src/types.ts CHANGED Viewed

@@ -66,6 +66,8 @@ export interface ExpandedQuery {
   hyde: string; // hypothetical document snippet
   vec: string[]; // dense retrieval sentences
   lex: string; // BM25 keywords
 }
 // Pipeline events for React UI

   hyde: string; // hypothetical document snippet
   vec: string[]; // dense retrieval sentences
   lex: string; // BM25 keywords
+  source?: "model" | "fallback" | "strong-signal";
+  note?: string;
 }
 // Pipeline events for React UI