Spaces:

shreyask
/

qmd-web

Running

App Files Files Community

shreyask Claude Opus 4.6 commited on Mar 12

Commit

0e526ea

verified ·

1 Parent(s): 5e17e07

wire App.tsx integration, update title, add HF Space README

Browse files

Connect all pipeline modules and UI components in App.tsx:
- Load models on mount, sample docs from public/eval-docs/
- Chunk and embed documents when embedding model is ready
- Run full pipeline (expansion, search, RRF, rerank, blend) on search
- Support file upload and paste for adding documents

Update index.html title and replace Vite boilerplate README with
HuggingFace Space metadata and project description.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (3) hide show

README.md +25 -65
index.html +1 -1
src/App.tsx +160 -5

README.md CHANGED Viewed

@@ -1,73 +1,33 @@
-# React + TypeScript + Vite
-This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
-Currently, two official plugins are available:
-- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Babel](https://babeljs.io/) (or [oxc](https://oxc.rs) when used in [rolldown-vite](https://vite.dev/guide/rolldown)) for Fast Refresh
-- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
-## React Compiler
-The React Compiler is not enabled on this template because of its impact on dev & build performances. To add it, see [this documentation](https://react.dev/learn/react-compiler/installation).
-## Expanding the ESLint configuration
-If you are developing a production application, we recommend updating the configuration to enable type-aware lint rules:
-```js
-export default defineConfig([
-  globalIgnores(['dist']),
-  {
-    files: ['**/*.{ts,tsx}'],
-    extends: [
-      // Other configs...
-      // Remove tseslint.configs.recommended and replace with this
-      tseslint.configs.recommendedTypeChecked,
-      // Alternatively, use this for stricter rules
-      tseslint.configs.strictTypeChecked,
-      // Optionally, add this for stylistic rules
-      tseslint.configs.stylisticTypeChecked,
-      // Other configs...
-    ],
-    languageOptions: {
-      parserOptions: {
-        project: ['./tsconfig.node.json', './tsconfig.app.json'],
-        tsconfigRootDir: import.meta.dirname,
-      },
-      // other options...
-    },
-  },
-])
-```
-You can also install [eslint-plugin-react-x](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-x) and [eslint-plugin-react-dom](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-dom) for React-specific lint rules:
-```js
-// eslint.config.js
-import reactX from 'eslint-plugin-react-x'
-import reactDom from 'eslint-plugin-react-dom'
-export default defineConfig([
-  globalIgnores(['dist']),
-  {
-    files: ['**/*.{ts,tsx}'],
-    extends: [
-      // Other configs...
-      // Enable lint rules for React
-      reactX.configs['recommended-typescript'],
-      // Enable lint rules for React DOM
-      reactDom.configs.recommended,
-    ],
-    languageOptions: {
-      parserOptions: {
-        project: ['./tsconfig.node.json', './tsconfig.app.json'],
-        tsconfigRootDir: import.meta.dirname,
-      },
-      // other options...
-    },
-  },
-])
-```

+---
+title: QMD Web Demo
+emoji: 🔍
+colorFrom: blue
+colorTo: green
+sdk: static
+pinned: false
+license: mit
+---
+# QMD Web Demo
+In-browser hybrid search pipeline using WebGPU + Transformers.js v4.
+Demonstrates the full QMD search pipeline running entirely in your browser:
+1. **Query Expansion** — Qwen3 1.7B generates HyDE, semantic, and keyword variants
+2. **Parallel Search** — BM25 keyword search + vector similarity search
+3. **Reciprocal Rank Fusion** — Merges results from multiple search backends
+4. **LLM Reranking** — Qwen3 Reranker 0.6B scores document relevance
+5. **Score Blending** — Position-aware combination of RRF and reranker scores
+## Requirements
+- Chrome 113+ or Edge 113+ (WebGPU required)
+- ~2.5GB model download on first visit (cached for subsequent visits)
+## Models
+- [embeddinggemma-300M](https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX) — Embeddings
+- [Qwen3-Reranker-0.6B](https://huggingface.co/onnx-community/Qwen3-Reranker-0.6B-ONNX) — Reranking
+- [qmd-query-expansion-1.7B](https://huggingface.co/shreyask/qmd-query-expansion-1.7B-ONNX) — Query expansion
+Based on [QMD](https://github.com/tobi/qmd) by Tobi Lütke.

index.html CHANGED Viewed

@@ -4,7 +4,7 @@
     <meta charset="UTF-8" />
     <link rel="icon" type="image/svg+xml" href="/vite.svg" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>qmd-web</title>
   </head>
   <body>
     <div id="root"></div>

     <meta charset="UTF-8" />
     <link rel="icon" type="image/svg+xml" href="/vite.svg" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>QMD Web Demo — In-Browser Hybrid Search</title>
   </head>
   <body>
     <div id="root"></div>

src/App.tsx CHANGED Viewed

@@ -1,10 +1,165 @@
 function App() {
   return (
-    <div style={{ fontFamily: 'system-ui, sans-serif', padding: '2rem' }}>
-      <h1>QMD Web Demo</h1>
-      <p>In-Browser Hybrid Search Pipeline</p>
     </div>
-  )
 }
-export default App

+import { useState, useEffect, useCallback } from 'react';
+import type { Document, Chunk, EmbeddedChunk, ModelState } from './types';
+import { loadAllModels, isAllModelsReady } from './pipeline/models';
+import { chunkDocument, extractTitle } from './pipeline/chunking';
+import { embedDocChunk } from './pipeline/embeddings';
+import { BM25Index } from './pipeline/bm25';
+import { runPipeline } from './pipeline/orchestrator';
+import type { PipelineState } from './components/PipelineView';
+import QueryInput from './components/QueryInput';
+import ModelStatus from './components/ModelStatus';
+import PipelineView from './components/PipelineView';
+import DocumentManager from './components/DocumentManager';
+// Sample doc filenames to load from public/eval-docs/
+const SAMPLE_DOCS = [
+  'api-design-principles.md',
+  'distributed-systems-overview.md',
+  'machine-learning-primer.md',
+];
+const INITIAL_PIPELINE: PipelineState = {
+  expansion: { status: 'idle' },
+  search: { status: 'idle' },
+  rrf: { status: 'idle' },
+  rerank: { status: 'idle' },
+  blend: { status: 'idle' },
+};
 function App() {
+  const [models, setModels] = useState<ModelState[]>([
+    { name: 'embedding', status: 'pending', progress: 0 },
+    { name: 'reranker', status: 'pending', progress: 0 },
+    { name: 'expansion', status: 'pending', progress: 0 },
+  ]);
+  const [documents, setDocuments] = useState<Document[]>([]);
+  const [chunks, setChunks] = useState<Chunk[]>([]);
+  const [embeddedChunks, setEmbeddedChunks] = useState<EmbeddedChunk[]>([]);
+  const [bm25Index, setBm25Index] = useState<BM25Index | null>(null);
+  const [pipeline, setPipeline] = useState<PipelineState>(INITIAL_PIPELINE);
+  const [indexing, setIndexing] = useState(false);
+  const [query, setQuery] = useState('');
+  // Load models on mount
+  useEffect(() => {
+    loadAllModels((state) => {
+      setModels(prev => prev.map(m => m.name === state.name ? state : m));
+    }).catch(console.error);
+  }, []);
+  // Load sample documents
+  useEffect(() => {
+    async function loadSampleDocs() {
+      const docs: Document[] = [];
+      for (const filename of SAMPLE_DOCS) {
+        const resp = await fetch(`/eval-docs/${filename}`);
+        const body = await resp.text();
+        const title = extractTitle(body, filename);
+        docs.push({ id: filename, title, body, filepath: filename });
+      }
+      setDocuments(docs);
+    }
+    loadSampleDocs();
+  }, []);
+  // When documents change, chunk them and build BM25 index
+  // When embedding model becomes ready, embed the chunks
+  useEffect(() => {
+    if (documents.length === 0) return;
+    const allChunks = documents.flatMap(doc => chunkDocument(doc));
+    setChunks(allChunks);
+    setBm25Index(new BM25Index(allChunks));
+    // Check if embedding model is ready for embedding
+    const embeddingReady = models.find(m => m.name === 'embedding')?.status === 'ready';
+    if (embeddingReady && allChunks.length > 0) {
+      setIndexing(true);
+      (async () => {
+        const embedded: EmbeddedChunk[] = [];
+        for (const chunk of allChunks) {
+          const embedding = await embedDocChunk(chunk.title, chunk.text);
+          embedded.push({ ...chunk, embedding });
+        }
+        setEmbeddedChunks(embedded);
+        setIndexing(false);
+      })();
+    }
+  }, [documents, models]);
+  // Handle user upload
+  const handleUpload = useCallback(async (files: FileList) => {
+    const newDocs: Document[] = [];
+    for (const file of Array.from(files)) {
+      const body = await file.text();
+      const title = extractTitle(body, file.name);
+      newDocs.push({ id: file.name, title, body, filepath: file.name });
+    }
+    setDocuments(prev => [...prev, ...newDocs]);
+  }, []);
+  // Handle paste
+  const handlePaste = useCallback((text: string, filename: string) => {
+    const title = extractTitle(text, filename);
+    setDocuments(prev => [...prev, { id: filename, title, body: text, filepath: filename }]);
+  }, []);
+  // Run search pipeline
+  const handleSearch = useCallback(async (searchQuery: string) => {
+    if (!bm25Index || embeddedChunks.length === 0) return;
+    setQuery(searchQuery);
+    setPipeline(INITIAL_PIPELINE);
+    const gen = runPipeline({
+      query: searchQuery,
+      chunks,
+      embeddedChunks,
+      bm25Index,
+    });
+    for await (const event of gen) {
+      setPipeline(prev => ({
+        ...prev,
+        [event.stage]: {
+          status: event.status,
+          ...('data' in event ? { data: event.data } : {}),
+          ...('error' in event ? { error: event.error } : {}),
+        },
+      }));
+    }
+  }, [bm25Index, embeddedChunks, chunks]);
+  const allReady = isAllModelsReady() && embeddedChunks.length > 0 && !indexing;
   return (
+    <div style={{ fontFamily: 'system-ui, -apple-system, sans-serif', maxWidth: 1400, margin: '0 auto', padding: '1rem' }}>
+      <header style={{ marginBottom: '1.5rem' }}>
+        <h1 style={{ margin: 0, fontSize: '1.5rem' }}>QMD Web Demo</h1>
+        <p style={{ margin: '0.25rem 0 0', color: '#666', fontSize: '0.9rem' }}>
+          In-Browser Hybrid Search Pipeline — WebGPU + Transformers.js
+        </p>
+      </header>
+      <ModelStatus models={models} />
+      {indexing && (
+        <div style={{ padding: '0.5rem 1rem', background: '#FFF3E0', borderRadius: 6, marginBottom: '1rem', fontSize: '0.85rem' }}>
+          Indexing documents (embedding chunks)...
+        </div>
+      )}
+      <QueryInput onSearch={handleSearch} disabled={!allReady} />
+      {query && <PipelineView state={pipeline} query={query} />}
+      <DocumentManager
+        documents={documents.map(d => ({ id: d.id, title: d.title, filepath: d.filepath }))}
+        onUpload={handleUpload}
+        onPaste={handlePaste}
+      />
     </div>
+  );
 }
+export default App;