davanstrien's picture
davanstrien HF Staff
Default to bibliographic-only view; hyperlink the model + Jobs
e302f7f verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>BPL Shelf-List Card Extraction — NuExtract3</title>
<style>
:root { --ink:#111110; --muted:#8a8a82; --rule:#d8d8cf; --accent:#7b1f1f; --bg:#fffff8; }
* { box-sizing:border-box; }
body { margin:0; background:var(--bg); color:var(--ink);
font-family:"Palatino Linotype","Palatino",Georgia,serif; line-height:1.5; }
header { max-width:980px; margin:0 auto; padding:44px 28px 0; }
h1 { font-size:30px; font-weight:400; margin:0 0 6px; }
h1 i { color:var(--accent); }
.sub { color:#555; font-size:16px; margin:0 0 4px; }
.sub a { color:var(--accent); text-decoration:none; border-bottom:1px solid #d8c2c2; }
.sub a:hover { border-bottom-color:var(--accent); }
.meta { color:var(--muted); font-size:14px; font-style:italic; }
.bar { max-width:980px; margin:18px auto 0; padding:0 28px; display:flex;
align-items:center; gap:16px; border-bottom:1px solid var(--rule); padding-bottom:14px;
position:sticky; top:0; background:var(--bg); z-index:5; }
.bar .count { font-size:14px; color:var(--muted); }
.bar label { font-size:14px; color:#333; cursor:pointer; user-select:none; }
main { max-width:980px; margin:0 auto; padding:8px 28px 80px; }
.card { display:flex; gap:26px; padding:26px 0; border-bottom:1px solid var(--rule);
align-items:flex-start; }
.card .imgwrap { flex:0 0 46%; }
.card img { width:100%; border:1px solid #c9c9bf; display:block; background:#eee; }
.card .fields { flex:1; padding-top:2px; }
.badge { display:inline-block; font-size:11px; font-variant:small-caps; letter-spacing:1.4px;
color:var(--muted); border:1px solid var(--rule); border-radius:3px; padding:2px 8px; margin-bottom:12px; }
.badge.divider { color:#9a7b1f; border-color:#e3d9b6; }
dl { margin:0; display:grid; grid-template-columns:auto 1fr; gap:7px 16px; }
dt { font-size:13px; font-variant:small-caps; letter-spacing:1px; color:var(--muted); white-space:nowrap; }
dd { margin:0; font-size:16px; }
dd.mono { font-family:ui-monospace,"SF Mono",Menlo,monospace; font-size:15px; color:var(--accent); }
dd.null { color:#bcbcb2; }
.cid { margin-top:14px; font-size:12px; color:#bcbcb2; font-family:ui-monospace,Menlo,monospace; }
footer { max-width:980px; margin:0 auto; padding:24px 28px 60px; color:var(--muted);
font-size:13px; font-style:italic; border-top:1px solid var(--rule); }
footer a { color:var(--accent); }
@media (max-width:700px){ .card{flex-direction:column;} .card .imgwrap{flex:none;width:100%;} }
</style>
</head>
<body>
<header>
<h1>Boston Public Library shelf-list cards <i>→ structured records</i></h1>
<p class="sub">Zero-shot extraction with <a href="https://huggingface.co/numind/NuExtract3">NuExtract3</a> (4B, Apache-2.0) — one command on <a href="https://huggingface.co/docs/hub/spaces-gpu-jobs">Hugging Face Jobs</a>.</p>
<p class="meta" id="meta">loading…</p>
</header>
<div class="bar">
<span class="count" id="count"></span>
<label><input type="checkbox" id="bibOnly" checked> bibliographic cards only</label>
</div>
<main id="cards"></main>
<footer>
Unreviewed zero-shot demo · model <a href="https://huggingface.co/numind/NuExtract3">numind/NuExtract3</a>
· script <a href="https://huggingface.co/datasets/uv-scripts/ocr">uv-scripts/ocr</a>
· cards: Boston Public Library (public domain). Next step: curator review + iteration.
</footer>
<script>
const FIELDS = [
["shelf_no","Shelf no.",true],
["author","Author",false],
["title","Title",false],
["place_of_publication","Place",false],
["date","Date",true],
["volumes","Vols.",true],
["accession_no","Accession",true],
["additions","Additions",false],
];
function val(v){ if(v==null||v===""||(Array.isArray(v)&&!v.length)) return null;
return Array.isArray(v)? v.join("; ") : String(v); }
function render(rows){
const bibOnly = document.getElementById("bibOnly").checked;
const main = document.getElementById("cards"); main.innerHTML="";
let shown=0;
for(const r of rows){
const ct = (r.card_type||"").toLowerCase();
if(bibOnly && ct!=="bibliographic") continue;
shown++;
const div=document.createElement("div"); div.className="card";
const badge = ct==="bibliographic" ? '<span class="badge">bibliographic</span>'
: ct==="shelf_divider" ? '<span class="badge divider">shelf divider</span>'
: '<span class="badge divider">'+(ct||"?")+'</span>';
let dl='<dl>';
for(const [k,label,mono] of FIELDS){
const v=val(r[k]);
dl+='<dt>'+label+'</dt><dd class="'+(v?(mono?'mono':''):'null')+'">'+(v?escapeHtml(v):'—')+'</dd>';
}
dl+='</dl>';
div.innerHTML='<div class="imgwrap"><img loading="lazy" src="imgs/'+r.card_id+'.jpg" alt="BPL card '+r.card_id+'"></div>'
+'<div class="fields">'+badge+dl+'<div class="cid">'+r.card_id+'</div></div>';
main.appendChild(div);
}
document.getElementById("count").textContent = shown+" cards shown";
}
function escapeHtml(s){return s.replace(/[&<>"]/g,c=>({'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;'}[c]));}
fetch("data.json").then(r=>r.json()).then(data=>{
document.getElementById("meta").textContent =
data.length+" sampled cards · extracted "+ (data._date||"") ;
const rows=data.rows||data;
document.getElementById("meta").textContent = rows.length+" sampled cards · NuExtract3 zero-shot";
render(rows);
document.getElementById("bibOnly").addEventListener("change",()=>render(rows));
}).catch(e=>{document.getElementById("meta").textContent="failed to load data.json: "+e;});
</script>
</body>
</html>