Spaces:
Running
Running
| // OCR Document Viewer | |
| pdfjsLib.GlobalWorkerOptions.workerSrc = | |
| 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js'; | |
| // ββ State ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| const blockMap = new Map(); | |
| let viewer = null; | |
| let selectedId = null; | |
| let jsonW = 1; // set from JSON data at load time | |
| // ββ OSD coordinate helpers βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // OSD SVG overlay uses image-width-normalized coords; y is also divided by width | |
| function toOSD([x1, y1, x2, y2]) { | |
| return { | |
| x: x1 / jsonW, | |
| y: y1 / jsonW, | |
| w: (x2 - x1) / jsonW, | |
| h: (y2 - y1) / jsonW, | |
| }; | |
| } | |
| function stripHtml(html) { | |
| const div = document.createElement('div'); | |
| div.innerHTML = html; | |
| return div.textContent.trim(); | |
| } | |
| // ββ Upload screen wiring βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| let docFile = null; | |
| let jsonFile = null; | |
| const docInput = document.getElementById('doc-input'); | |
| const jsonInput = document.getElementById('json-input'); | |
| const docDrop = document.getElementById('doc-drop'); | |
| const jsonDrop = document.getElementById('json-drop'); | |
| const docName = document.getElementById('doc-name'); | |
| const jsonName = document.getElementById('json-name'); | |
| const loadBtn = document.getElementById('load-btn'); | |
| const errorBox = document.getElementById('upload-error'); | |
| docInput.addEventListener('change', () => { | |
| docFile = docInput.files[0] || null; | |
| docName.textContent = docFile ? docFile.name : 'Choose fileβ¦'; | |
| docDrop.classList.toggle('has-file', !!docFile); | |
| updateLoadBtn(); | |
| }); | |
| jsonInput.addEventListener('change', () => { | |
| jsonFile = jsonInput.files[0] || null; | |
| jsonName.textContent = jsonFile ? jsonFile.name : 'Choose fileβ¦'; | |
| jsonDrop.classList.toggle('has-file', !!jsonFile); | |
| updateLoadBtn(); | |
| }); | |
| function updateLoadBtn() { | |
| loadBtn.disabled = !(docFile && jsonFile); | |
| } | |
| loadBtn.addEventListener('click', () => { | |
| errorBox.textContent = ''; | |
| loadFiles(docFile, jsonFile); | |
| }); | |
| document.getElementById('new-file-btn').addEventListener('click', resetToUpload); | |
| // ββ File loading βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async function loadFiles(imageFile, ocrJsonFile) { | |
| showLoading(); | |
| try { | |
| const [ocrData, imageUrl] = await Promise.all([ | |
| readJson(ocrJsonFile), | |
| fileToImageUrl(imageFile), | |
| ]); | |
| // Read image dimensions from root Page block's bbox | |
| const rootBbox = ocrData.children[0].bbox; | |
| jsonW = rootBbox[2]; // width of the original OCR'd image | |
| const blocks = ocrData.children[0].children.filter( | |
| b => b.block_type !== 'Page' | |
| ); | |
| blockMap.clear(); | |
| selectedId = null; | |
| document.getElementById('block-list').innerHTML = ''; | |
| buildBlockList(blocks); | |
| initViewer(imageUrl, blocks); | |
| } catch (err) { | |
| console.error(err); | |
| showUpload(); | |
| errorBox.textContent = `Error: ${err.message}`; | |
| } | |
| } | |
| async function readJson(file) { | |
| const text = await file.text(); | |
| return JSON.parse(text); | |
| } | |
| async function fileToImageUrl(file) { | |
| const isPdf = file.type === 'application/pdf' || file.name.toLowerCase().endsWith('.pdf'); | |
| if (isPdf) { | |
| return renderPdfToImage(file); | |
| } | |
| // Regular image β hand directly to OSD | |
| return URL.createObjectURL(file); | |
| } | |
| async function renderPdfToImage(file) { | |
| const url = URL.createObjectURL(file); | |
| const pdf = await pdfjsLib.getDocument(url).promise; | |
| URL.revokeObjectURL(url); | |
| const page = await pdf.getPage(1); | |
| const viewport = page.getViewport({ scale: 2 }); | |
| const canvas = document.createElement('canvas'); | |
| canvas.width = viewport.width; | |
| canvas.height = viewport.height; | |
| await page.render({ canvasContext: canvas.getContext('2d'), viewport }).promise; | |
| return new Promise(resolve => | |
| canvas.toBlob(blob => resolve(URL.createObjectURL(blob)), 'image/jpeg', 0.95) | |
| ); | |
| } | |
| // ββ Viewer βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function buildBlockList(blocks) { | |
| const list = document.getElementById('block-list'); | |
| blocks.forEach(block => { | |
| const text = stripHtml(block.html); | |
| if (!text) return; | |
| const el = document.createElement('div'); | |
| el.className = 'block-item'; | |
| el.dataset.blockId = block.id; | |
| el.innerHTML = ` | |
| <div class="block-type">${block.block_type}</div> | |
| <div class="block-text">${text}</div> | |
| `; | |
| el.addEventListener('click', () => selectBlock(block.id, true)); | |
| list.appendChild(el); | |
| blockMap.set(block.id, { block, rectEl: null, listEl: el }); | |
| }); | |
| } | |
| function initViewer(imageUrl, blocks) { | |
| if (viewer) { | |
| viewer.destroy(); | |
| viewer = null; | |
| } | |
| viewer = OpenSeadragon({ | |
| id: 'osd-viewer', | |
| tileSources: { type: 'image', url: imageUrl }, | |
| showNavigationControl: false, | |
| animationTime: 0.4, | |
| blendTime: 0.1, | |
| constrainDuringPan: true, | |
| maxZoomPixelRatio: 4, | |
| defaultZoomLevel: 0, | |
| visibilityRatio: 0.8, | |
| gestureSettingsMouse: { scrollToZoom: true }, | |
| }); | |
| viewer.addHandler('open', () => { | |
| addBboxOverlay(blocks); | |
| hideLoading(); | |
| }); | |
| } | |
| function addBboxOverlay(blocks) { | |
| const overlay = viewer.svgOverlay(); | |
| blocks.forEach(block => { | |
| const text = stripHtml(block.html); | |
| if (!text) return; | |
| const entry = blockMap.get(block.id); | |
| if (!entry) return; | |
| const { x, y, w, h } = toOSD(block.bbox); | |
| const rect = document.createElementNS('http://www.w3.org/2000/svg', 'rect'); | |
| rect.setAttribute('x', x); | |
| rect.setAttribute('y', y); | |
| rect.setAttribute('width', w); | |
| rect.setAttribute('height', h); | |
| rect.setAttribute('class', 'bbox-rect'); | |
| rect.dataset.blockId = block.id; | |
| overlay.node().appendChild(rect); | |
| overlay.onClick(rect, () => selectBlock(block.id, false)); | |
| entry.rectEl = rect; | |
| }); | |
| } | |
| // ββ Selection ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function selectBlock(id, fromList) { | |
| if (selectedId && selectedId !== id) { | |
| const prev = blockMap.get(selectedId); | |
| if (prev) { | |
| prev.listEl?.classList.remove('selected'); | |
| prev.rectEl?.classList.remove('selected'); | |
| } | |
| } | |
| selectedId = id; | |
| const entry = blockMap.get(id); | |
| if (!entry) return; | |
| entry.listEl?.classList.add('selected'); | |
| entry.rectEl?.classList.add('selected'); | |
| if (fromList) { | |
| const { x, y, w, h } = toOSD(entry.block.bbox); | |
| const pad = 0.05; | |
| viewer.viewport.fitBoundsWithConstraints( | |
| new OpenSeadragon.Rect(x - pad, y - pad, w + pad * 2, h + pad * 2), | |
| false | |
| ); | |
| } else { | |
| entry.listEl?.scrollIntoView({ behavior: 'smooth', block: 'nearest' }); | |
| } | |
| } | |
| function deselectAll() { | |
| if (selectedId) { | |
| const entry = blockMap.get(selectedId); | |
| if (entry) { | |
| entry.listEl?.classList.remove('selected'); | |
| entry.rectEl?.classList.remove('selected'); | |
| } | |
| selectedId = null; | |
| } | |
| } | |
| // ββ Screen transitions βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function showLoading() { | |
| document.getElementById('upload-screen').style.display = 'none'; | |
| document.getElementById('viewer-layout').style.display = 'flex'; | |
| document.getElementById('loading-overlay').classList.remove('hidden'); | |
| } | |
| function hideLoading() { | |
| document.getElementById('loading-overlay').classList.add('hidden'); | |
| } | |
| function showUpload() { | |
| document.getElementById('viewer-layout').style.display = 'none'; | |
| document.getElementById('upload-screen').style.display = 'flex'; | |
| } | |
| function resetToUpload() { | |
| deselectAll(); | |
| if (viewer) { viewer.destroy(); viewer = null; } | |
| blockMap.clear(); | |
| document.getElementById('block-list').innerHTML = ''; | |
| // Reset file inputs | |
| docFile = null; jsonFile = null; | |
| docInput.value = ''; jsonInput.value = ''; | |
| docName.textContent = 'Choose fileβ¦'; jsonName.textContent = 'Choose fileβ¦'; | |
| docDrop.classList.remove('has-file'); jsonDrop.classList.remove('has-file'); | |
| loadBtn.disabled = true; | |
| errorBox.textContent = ''; | |
| showUpload(); | |
| } | |
| // ββ Keyboard shortcuts βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| document.addEventListener('keydown', e => { | |
| if (!viewer) return; | |
| const vp = viewer.viewport; | |
| switch (e.key) { | |
| case '+': case '=': vp.zoomBy(1.3, vp.getCenter()); break; | |
| case '-': case '_': vp.zoomBy(1 / 1.3, vp.getCenter()); break; | |
| case 'ArrowLeft': vp.panBy(new OpenSeadragon.Point(-0.05, 0)); break; | |
| case 'ArrowRight': vp.panBy(new OpenSeadragon.Point(0.05, 0)); break; | |
| case 'ArrowUp': vp.panBy(new OpenSeadragon.Point(0, -0.05)); break; | |
| case 'ArrowDown': vp.panBy(new OpenSeadragon.Point(0, 0.05)); break; | |
| case 'h': case 'H': vp.goHome(); break; | |
| case 'Escape': deselectAll(); break; | |
| } | |
| }); | |