File size: 5,668 Bytes
bba8312
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e302f7f
 
bba8312
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e302f7f
bba8312
 
 
 
e302f7f
bba8312
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c6c09b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>BPL Shelf-List Card Extraction — NuExtract3</title>
<style>
  :root { --ink:#111110; --muted:#8a8a82; --rule:#d8d8cf; --accent:#7b1f1f; --bg:#fffff8; }
  * { box-sizing:border-box; }
  body { margin:0; background:var(--bg); color:var(--ink);
    font-family:"Palatino Linotype","Palatino",Georgia,serif; line-height:1.5; }
  header { max-width:980px; margin:0 auto; padding:44px 28px 0; }
  h1 { font-size:30px; font-weight:400; margin:0 0 6px; }
  h1 i { color:var(--accent); }
  .sub { color:#555; font-size:16px; margin:0 0 4px; }
  .sub a { color:var(--accent); text-decoration:none; border-bottom:1px solid #d8c2c2; }
  .sub a:hover { border-bottom-color:var(--accent); }
  .meta { color:var(--muted); font-size:14px; font-style:italic; }
  .bar { max-width:980px; margin:18px auto 0; padding:0 28px; display:flex;
    align-items:center; gap:16px; border-bottom:1px solid var(--rule); padding-bottom:14px;
    position:sticky; top:0; background:var(--bg); z-index:5; }
  .bar .count { font-size:14px; color:var(--muted); }
  .bar label { font-size:14px; color:#333; cursor:pointer; user-select:none; }
  main { max-width:980px; margin:0 auto; padding:8px 28px 80px; }
  .card { display:flex; gap:26px; padding:26px 0; border-bottom:1px solid var(--rule);
    align-items:flex-start; }
  .card .imgwrap { flex:0 0 46%; }
  .card img { width:100%; border:1px solid #c9c9bf; display:block; background:#eee; }
  .card .fields { flex:1; padding-top:2px; }
  .badge { display:inline-block; font-size:11px; font-variant:small-caps; letter-spacing:1.4px;
    color:var(--muted); border:1px solid var(--rule); border-radius:3px; padding:2px 8px; margin-bottom:12px; }
  .badge.divider { color:#9a7b1f; border-color:#e3d9b6; }
  dl { margin:0; display:grid; grid-template-columns:auto 1fr; gap:7px 16px; }
  dt { font-size:13px; font-variant:small-caps; letter-spacing:1px; color:var(--muted); white-space:nowrap; }
  dd { margin:0; font-size:16px; }
  dd.mono { font-family:ui-monospace,"SF Mono",Menlo,monospace; font-size:15px; color:var(--accent); }
  dd.null { color:#bcbcb2; }
  .cid { margin-top:14px; font-size:12px; color:#bcbcb2; font-family:ui-monospace,Menlo,monospace; }
  footer { max-width:980px; margin:0 auto; padding:24px 28px 60px; color:var(--muted);
    font-size:13px; font-style:italic; border-top:1px solid var(--rule); }
  footer a { color:var(--accent); }
  @media (max-width:700px){ .card{flex-direction:column;} .card .imgwrap{flex:none;width:100%;} }
</style>
</head>
<body>
<header>
  <h1>Boston Public Library shelf-list cards <i>→ structured records</i></h1>
  <p class="sub">Zero-shot extraction with <a href="https://huggingface.co/numind/NuExtract3">NuExtract3</a> (4B, Apache-2.0) — one command on <a href="https://huggingface.co/docs/hub/spaces-gpu-jobs">Hugging Face Jobs</a>.</p>
  <p class="meta" id="meta">loading…</p>
</header>
<div class="bar">
  <span class="count" id="count"></span>
  <label><input type="checkbox" id="bibOnly" checked> bibliographic cards only</label>
</div>
<main id="cards"></main>
<footer>
  Unreviewed zero-shot demo · model <a href="https://huggingface.co/numind/NuExtract3">numind/NuExtract3</a>
  · script <a href="https://huggingface.co/datasets/uv-scripts/ocr">uv-scripts/ocr</a>
  · cards: Boston Public Library (public domain). Next step: curator review + iteration.
</footer>
<script>
const FIELDS = [
  ["shelf_no","Shelf no.",true],
  ["author","Author",false],
  ["title","Title",false],
  ["place_of_publication","Place",false],
  ["date","Date",true],
  ["volumes","Vols.",true],
  ["accession_no","Accession",true],
  ["additions","Additions",false],
];
function val(v){ if(v==null||v===""||(Array.isArray(v)&&!v.length)) return null;
  return Array.isArray(v)? v.join("; ") : String(v); }
function render(rows){
  const bibOnly = document.getElementById("bibOnly").checked;
  const main = document.getElementById("cards"); main.innerHTML="";
  let shown=0;
  for(const r of rows){
    const ct = (r.card_type||"").toLowerCase();
    if(bibOnly && ct!=="bibliographic") continue;
    shown++;
    const div=document.createElement("div"); div.className="card";
    const badge = ct==="bibliographic" ? '<span class="badge">bibliographic</span>'
      : ct==="shelf_divider" ? '<span class="badge divider">shelf divider</span>'
      : '<span class="badge divider">'+(ct||"?")+'</span>';
    let dl='<dl>';
    for(const [k,label,mono] of FIELDS){
      const v=val(r[k]);
      dl+='<dt>'+label+'</dt><dd class="'+(v?(mono?'mono':''):'null')+'">'+(v?escapeHtml(v):'—')+'</dd>';
    }
    dl+='</dl>';
    div.innerHTML='<div class="imgwrap"><img loading="lazy" src="imgs/'+r.card_id+'.jpg" alt="BPL card '+r.card_id+'"></div>'
      +'<div class="fields">'+badge+dl+'<div class="cid">'+r.card_id+'</div></div>';
    main.appendChild(div);
  }
  document.getElementById("count").textContent = shown+" cards shown";
}
function escapeHtml(s){return s.replace(/[&<>"]/g,c=>({'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;'}[c]));}
fetch("data.json").then(r=>r.json()).then(data=>{
  document.getElementById("meta").textContent =
    data.length+" sampled cards · extracted "+ (data._date||"") ;
  const rows=data.rows||data;
  document.getElementById("meta").textContent = rows.length+" sampled cards · NuExtract3 zero-shot";
  render(rows);
  document.getElementById("bibOnly").addEventListener("change",()=>render(rows));
}).catch(e=>{document.getElementById("meta").textContent="failed to load data.json: "+e;});
</script>
</body>
</html>