davanstrien HF Staff
Default to bibliographic-only view; hyperlink the model + Jobs
e302f7f verified | <html lang="en"> | |
| <head> | |
| <meta charset="utf-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1"> | |
| <title>BPL Shelf-List Card Extraction — NuExtract3</title> | |
| <style> | |
| :root { --ink:#111110; --muted:#8a8a82; --rule:#d8d8cf; --accent:#7b1f1f; --bg:#fffff8; } | |
| * { box-sizing:border-box; } | |
| body { margin:0; background:var(--bg); color:var(--ink); | |
| font-family:"Palatino Linotype","Palatino",Georgia,serif; line-height:1.5; } | |
| header { max-width:980px; margin:0 auto; padding:44px 28px 0; } | |
| h1 { font-size:30px; font-weight:400; margin:0 0 6px; } | |
| h1 i { color:var(--accent); } | |
| .sub { color:#555; font-size:16px; margin:0 0 4px; } | |
| .sub a { color:var(--accent); text-decoration:none; border-bottom:1px solid #d8c2c2; } | |
| .sub a:hover { border-bottom-color:var(--accent); } | |
| .meta { color:var(--muted); font-size:14px; font-style:italic; } | |
| .bar { max-width:980px; margin:18px auto 0; padding:0 28px; display:flex; | |
| align-items:center; gap:16px; border-bottom:1px solid var(--rule); padding-bottom:14px; | |
| position:sticky; top:0; background:var(--bg); z-index:5; } | |
| .bar .count { font-size:14px; color:var(--muted); } | |
| .bar label { font-size:14px; color:#333; cursor:pointer; user-select:none; } | |
| main { max-width:980px; margin:0 auto; padding:8px 28px 80px; } | |
| .card { display:flex; gap:26px; padding:26px 0; border-bottom:1px solid var(--rule); | |
| align-items:flex-start; } | |
| .card .imgwrap { flex:0 0 46%; } | |
| .card img { width:100%; border:1px solid #c9c9bf; display:block; background:#eee; } | |
| .card .fields { flex:1; padding-top:2px; } | |
| .badge { display:inline-block; font-size:11px; font-variant:small-caps; letter-spacing:1.4px; | |
| color:var(--muted); border:1px solid var(--rule); border-radius:3px; padding:2px 8px; margin-bottom:12px; } | |
| .badge.divider { color:#9a7b1f; border-color:#e3d9b6; } | |
| dl { margin:0; display:grid; grid-template-columns:auto 1fr; gap:7px 16px; } | |
| dt { font-size:13px; font-variant:small-caps; letter-spacing:1px; color:var(--muted); white-space:nowrap; } | |
| dd { margin:0; font-size:16px; } | |
| dd.mono { font-family:ui-monospace,"SF Mono",Menlo,monospace; font-size:15px; color:var(--accent); } | |
| dd.null { color:#bcbcb2; } | |
| .cid { margin-top:14px; font-size:12px; color:#bcbcb2; font-family:ui-monospace,Menlo,monospace; } | |
| footer { max-width:980px; margin:0 auto; padding:24px 28px 60px; color:var(--muted); | |
| font-size:13px; font-style:italic; border-top:1px solid var(--rule); } | |
| footer a { color:var(--accent); } | |
| @media (max-width:700px){ .card{flex-direction:column;} .card .imgwrap{flex:none;width:100%;} } | |
| </style> | |
| </head> | |
| <body> | |
| <header> | |
| <h1>Boston Public Library shelf-list cards <i>→ structured records</i></h1> | |
| <p class="sub">Zero-shot extraction with <a href="https://huggingface.co/numind/NuExtract3">NuExtract3</a> (4B, Apache-2.0) — one command on <a href="https://huggingface.co/docs/hub/spaces-gpu-jobs">Hugging Face Jobs</a>.</p> | |
| <p class="meta" id="meta">loading…</p> | |
| </header> | |
| <div class="bar"> | |
| <span class="count" id="count"></span> | |
| <label><input type="checkbox" id="bibOnly" checked> bibliographic cards only</label> | |
| </div> | |
| <main id="cards"></main> | |
| <footer> | |
| Unreviewed zero-shot demo · model <a href="https://huggingface.co/numind/NuExtract3">numind/NuExtract3</a> | |
| · script <a href="https://huggingface.co/datasets/uv-scripts/ocr">uv-scripts/ocr</a> | |
| · cards: Boston Public Library (public domain). Next step: curator review + iteration. | |
| </footer> | |
| <script> | |
| const FIELDS = [ | |
| ["shelf_no","Shelf no.",true], | |
| ["author","Author",false], | |
| ["title","Title",false], | |
| ["place_of_publication","Place",false], | |
| ["date","Date",true], | |
| ["volumes","Vols.",true], | |
| ["accession_no","Accession",true], | |
| ["additions","Additions",false], | |
| ]; | |
| function val(v){ if(v==null||v===""||(Array.isArray(v)&&!v.length)) return null; | |
| return Array.isArray(v)? v.join("; ") : String(v); } | |
| function render(rows){ | |
| const bibOnly = document.getElementById("bibOnly").checked; | |
| const main = document.getElementById("cards"); main.innerHTML=""; | |
| let shown=0; | |
| for(const r of rows){ | |
| const ct = (r.card_type||"").toLowerCase(); | |
| if(bibOnly && ct!=="bibliographic") continue; | |
| shown++; | |
| const div=document.createElement("div"); div.className="card"; | |
| const badge = ct==="bibliographic" ? '<span class="badge">bibliographic</span>' | |
| : ct==="shelf_divider" ? '<span class="badge divider">shelf divider</span>' | |
| : '<span class="badge divider">'+(ct||"?")+'</span>'; | |
| let dl='<dl>'; | |
| for(const [k,label,mono] of FIELDS){ | |
| const v=val(r[k]); | |
| dl+='<dt>'+label+'</dt><dd class="'+(v?(mono?'mono':''):'null')+'">'+(v?escapeHtml(v):'—')+'</dd>'; | |
| } | |
| dl+='</dl>'; | |
| div.innerHTML='<div class="imgwrap"><img loading="lazy" src="imgs/'+r.card_id+'.jpg" alt="BPL card '+r.card_id+'"></div>' | |
| +'<div class="fields">'+badge+dl+'<div class="cid">'+r.card_id+'</div></div>'; | |
| main.appendChild(div); | |
| } | |
| document.getElementById("count").textContent = shown+" cards shown"; | |
| } | |
| function escapeHtml(s){return s.replace(/[&<>"]/g,c=>({'&':'&','<':'<','>':'>','"':'"'}[c]));} | |
| fetch("data.json").then(r=>r.json()).then(data=>{ | |
| document.getElementById("meta").textContent = | |
| data.length+" sampled cards · extracted "+ (data._date||"") ; | |
| const rows=data.rows||data; | |
| document.getElementById("meta").textContent = rows.length+" sampled cards · NuExtract3 zero-shot"; | |
| render(rows); | |
| document.getElementById("bibOnly").addEventListener("change",()=>render(rows)); | |
| }).catch(e=>{document.getElementById("meta").textContent="failed to load data.json: "+e;}); | |
| </script> | |
| </body> | |
| </html> | |