JaydeepR Claude Sonnet 4.6 commited on
Commit
b14fc84
·
1 Parent(s): 76e0cee

Add Interpretability tab, fix mandatory verdict logic, demo reset, UI polish

Browse files

- core/config.py: add BIDDER_NAMES dict (shared across all tabs)
- core/evaluator.py: richer criterion_evaluated audit payload (extracted_value,
llm_confidence, ocr_tier, reason, escalation_reason)
- ui/tab_review.py: store original_verdict + original_extracted_value in
human_review_action audit entries
- ui/tab_bidders.py: fix _overall_verdict to only count mandatory criteria;
add column headers; use friendly company names; show passed/total count
- ui/tab_overview.py: replace text architecture section with ASCII pipeline
diagram; use BIDDER_NAMES in demo loader
- ui/tab_audit.py: richer human-readable display with summary column,
category dots, metric cards, raw payload expander
- ui/tab_interpretability.py: new tab — plain-English per-criterion breakdown
with page-level source citations and inline PDF/image previews; LLM-powered
Q&A with rule-based fallback when API unavailable
- app.py: add Tab 6 (Interpretability); sidebar demo reset button that clears
audit DB + ChromaDB + OCR cache + session in one click; LLM probe cached
once per session

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import streamlit as st
2
 
3
  from ui.tab_overview import render as render_overview
@@ -5,6 +7,7 @@ from ui.tab_tender import render as render_tender
5
  from ui.tab_bidders import render as render_bidders
6
  from ui.tab_review import render as render_review
7
  from ui.tab_audit import render as render_audit
 
8
 
9
  st.set_page_config(
10
  page_title="TenderIQ",
@@ -14,7 +17,7 @@ st.set_page_config(
14
 
15
 
16
  def _probe_llm() -> str:
17
- """Returns 'green', 'amber', or 'red'."""
18
  if st.session_state.get("fallback_active"):
19
  return "amber"
20
  if "llm_status" in st.session_state:
@@ -32,6 +35,18 @@ def _probe_llm() -> str:
32
  return "red"
33
 
34
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  # ── Sidebar ──────────────────────────────────────────────────────────────────
36
  with st.sidebar:
37
  st.markdown("## ⚖️ TenderIQ")
@@ -49,18 +64,33 @@ with st.sidebar:
49
  st.caption("Using pre-computed fallback data.")
50
 
51
  st.divider()
 
52
  if st.button("Reset Session", use_container_width=True):
53
  for key in list(st.session_state.keys()):
54
  del st.session_state[key]
55
  st.rerun()
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  # ── Tabs ─────────────────────────────────────────────────────────────────────
58
- tab1, tab2, tab3, tab4, tab5 = st.tabs([
59
  "Overview",
60
  "Tender Analysis",
61
  "Bidder Evaluation",
62
  "Human Review",
63
  "Audit Log",
 
64
  ])
65
 
66
  with tab1:
@@ -77,3 +107,6 @@ with tab4:
77
 
78
  with tab5:
79
  render_audit()
 
 
 
 
1
+ import shutil
2
+
3
  import streamlit as st
4
 
5
  from ui.tab_overview import render as render_overview
 
7
  from ui.tab_bidders import render as render_bidders
8
  from ui.tab_review import render as render_review
9
  from ui.tab_audit import render as render_audit
10
+ from ui.tab_interpretability import render as render_interpretability
11
 
12
  st.set_page_config(
13
  page_title="TenderIQ",
 
17
 
18
 
19
  def _probe_llm() -> str:
20
+ """Probe once per session; returns 'green', 'amber', or 'red'."""
21
  if st.session_state.get("fallback_active"):
22
  return "amber"
23
  if "llm_status" in st.session_state:
 
35
  return "red"
36
 
37
 
38
+ def _reset_demo() -> None:
39
+ """Clear session, audit DB, ChromaDB, and OCR cache for a clean demo run."""
40
+ from core import audit
41
+ from core.config import CHROMA_DIR, OCR_CACHE_DIR
42
+ audit.clear()
43
+ shutil.rmtree(CHROMA_DIR, ignore_errors=True)
44
+ shutil.rmtree(str(OCR_CACHE_DIR), ignore_errors=True)
45
+ st.cache_resource.clear()
46
+ for key in list(st.session_state.keys()):
47
+ del st.session_state[key]
48
+
49
+
50
  # ── Sidebar ──────────────────────────────────────────────────────────────────
51
  with st.sidebar:
52
  st.markdown("## ⚖️ TenderIQ")
 
64
  st.caption("Using pre-computed fallback data.")
65
 
66
  st.divider()
67
+
68
  if st.button("Reset Session", use_container_width=True):
69
  for key in list(st.session_state.keys()):
70
  del st.session_state[key]
71
  st.rerun()
72
 
73
+ if st.button("🗑 Reset for Demo", use_container_width=True, type="secondary"):
74
+ st.session_state["confirm_demo_reset"] = True
75
+
76
+ if st.session_state.get("confirm_demo_reset"):
77
+ st.warning("Clears audit log, vector index, OCR cache, and session. Sure?")
78
+ col1, col2 = st.columns(2)
79
+ if col1.button("Yes, reset", type="primary", use_container_width=True):
80
+ _reset_demo()
81
+ st.rerun()
82
+ if col2.button("Cancel", use_container_width=True):
83
+ st.session_state.pop("confirm_demo_reset", None)
84
+ st.rerun()
85
+
86
  # ── Tabs ─────────────────────────────────────────────────────────────────────
87
+ tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
88
  "Overview",
89
  "Tender Analysis",
90
  "Bidder Evaluation",
91
  "Human Review",
92
  "Audit Log",
93
+ "Interpretability",
94
  ])
95
 
96
  with tab1:
 
107
 
108
  with tab5:
109
  render_audit()
110
+
111
+ with tab6:
112
+ render_interpretability()
core/audit.py CHANGED
@@ -47,6 +47,14 @@ def log(action: str, actor: str = "system", **fields) -> int:
47
  return row_id
48
 
49
 
 
 
 
 
 
 
 
 
50
  def query(filters: dict | None = None) -> list[dict]:
51
  conn = _conn()
52
  sql = "SELECT * FROM audit_log"
 
47
  return row_id
48
 
49
 
50
+ def clear() -> None:
51
+ conn = _conn()
52
+ conn.execute("DELETE FROM audit_log")
53
+ conn.execute("DELETE FROM sqlite_sequence WHERE name='audit_log'")
54
+ conn.commit()
55
+ conn.close()
56
+
57
+
58
  def query(filters: dict | None = None) -> list[dict]:
59
  conn = _conn()
60
  sql = "SELECT * FROM audit_log"
core/config.py CHANGED
@@ -19,3 +19,9 @@ CHROMA_DIR = str(BASE_DIR / ".chroma")
19
  AUDIT_DB = str(BASE_DIR / "audit.db")
20
  PRECOMPUTED_DIR = DATA_DIR / "precomputed"
21
  OCR_CACHE_DIR = BASE_DIR / ".ocr_cache"
 
 
 
 
 
 
 
19
  AUDIT_DB = str(BASE_DIR / "audit.db")
20
  PRECOMPUTED_DIR = DATA_DIR / "precomputed"
21
  OCR_CACHE_DIR = BASE_DIR / ".ocr_cache"
22
+
23
+ BIDDER_NAMES = {
24
+ "bidder_a": "Apex Constructions Pvt. Ltd.",
25
+ "bidder_b": "BuildRight Enterprises",
26
+ "bidder_c": "Shree Constructions & Services",
27
+ }
core/evaluator.py CHANGED
@@ -60,7 +60,9 @@ def evaluate(bidder_id: str, criterion: Criterion) -> Verdict:
60
  )
61
  audit.log("criterion_evaluated", bidder_id=bidder_id,
62
  criterion_id=criterion.id, verdict="needs_review",
63
- combined_confidence=0.0)
 
 
64
  return v
65
 
66
  evidence_dicts = [
@@ -148,9 +150,26 @@ Rules:
148
  timestamp=_now_iso(),
149
  review_status="pending",
150
  )
151
- audit.log("criterion_evaluated", bidder_id=bidder_id,
152
- criterion_id=criterion.id, verdict=final_verdict,
153
- combined_confidence=round(combined, 4))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  return v
155
 
156
 
 
60
  )
61
  audit.log("criterion_evaluated", bidder_id=bidder_id,
62
  criterion_id=criterion.id, verdict="needs_review",
63
+ llm_verdict="needs_review", extracted_value="",
64
+ llm_confidence=0.0, combined_confidence=0.0,
65
+ ocr_tier="", escalation_reason="no evidence found", reason=v.reason)
66
  return v
67
 
68
  evidence_dicts = [
 
150
  timestamp=_now_iso(),
151
  review_status="pending",
152
  )
153
+ escalation_reason = None
154
+ if llm_verdict != final_verdict:
155
+ if combined < CONFIDENCE_REVIEW:
156
+ escalation_reason = f"auto-escalated: combined confidence {combined:.0%} below threshold"
157
+ elif combined < CONFIDENCE_HIGH and llm_verdict == "not_eligible":
158
+ escalation_reason = f"auto-escalated: borderline confidence {combined:.0%} on disqualification"
159
+
160
+ audit.log(
161
+ "criterion_evaluated",
162
+ bidder_id=bidder_id,
163
+ criterion_id=criterion.id,
164
+ verdict=final_verdict,
165
+ llm_verdict=llm_verdict,
166
+ extracted_value=extracted_value or "",
167
+ llm_confidence=round(llm_confidence, 4),
168
+ combined_confidence=round(combined, 4),
169
+ ocr_tier=source_type,
170
+ escalation_reason=escalation_reason or "",
171
+ reason=reason,
172
+ )
173
  return v
174
 
175
 
ui/tab_audit.py CHANGED
@@ -1,14 +1,99 @@
1
  import io
 
2
 
3
  import pandas as pd
4
  import streamlit as st
5
 
6
  from core import audit
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def render() -> None:
10
  st.header("Audit Log")
 
 
 
 
11
 
 
12
  col1, col2, col3 = st.columns(3)
13
  with col1:
14
  bidder_filter = st.selectbox(
@@ -18,11 +103,24 @@ def render() -> None:
18
  with col2:
19
  action_filter = st.selectbox(
20
  "Filter by action",
21
- options=["All", "criteria_extracted", "bidder_processed", "criterion_evaluated",
22
- "human_review_action", "precomputed_fallback_used", "vision_ocr_invoked"],
23
  )
24
  with col3:
25
- st.markdown("&nbsp;") # spacer
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  filters: dict = {}
28
  if bidder_filter != "All":
@@ -36,17 +134,61 @@ def render() -> None:
36
  st.info("No audit entries yet. Run an evaluation to generate entries.")
37
  return
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  df = pd.DataFrame(rows)
40
- display_cols = ["id", "ts", "action", "actor", "bidder_id", "criterion_id", "payload_json"]
41
- display_cols = [c for c in display_cols if c in df.columns]
42
- df_display = df[display_cols].copy()
43
- df_display["ts"] = df_display["ts"].str[:19].str.replace("T", " ")
44
 
45
- st.markdown(f"**{len(rows)} entries** (newest first)")
46
- st.dataframe(df_display, use_container_width=True, hide_index=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
 
 
 
48
  csv_buf = io.StringIO()
49
- df_display.to_csv(csv_buf, index=False)
50
  st.download_button(
51
  label="Export CSV",
52
  data=csv_buf.getvalue().encode("utf-8"),
 
1
  import io
2
+ import json
3
 
4
  import pandas as pd
5
  import streamlit as st
6
 
7
  from core import audit
8
 
9
+ _ACTION_LABELS = {
10
+ "criteria_extracted": "📋 Criteria Extracted",
11
+ "bidder_processed": "📥 Bidder Document Indexed",
12
+ "criterion_evaluated": "⚖️ Criterion Evaluated",
13
+ "human_review_action": "👤 Human Review Action",
14
+ "precomputed_fallback_used":"⚠️ Fallback Used",
15
+ "vision_ocr_invoked": "👁️ Vision OCR Invoked",
16
+ "smoke_test": "🧪 Smoke Test",
17
+ }
18
+
19
+ _ACTION_CATEGORIES = {
20
+ "criteria_extracted": "system",
21
+ "bidder_processed": "system",
22
+ "criterion_evaluated": "system",
23
+ "human_review_action": "human",
24
+ "precomputed_fallback_used": "warning",
25
+ "vision_ocr_invoked": "system",
26
+ }
27
+
28
+ _VERDICT_ICONS = {
29
+ "eligible": "✅ Eligible",
30
+ "not_eligible": "❌ Not Eligible",
31
+ "needs_review": "⚠️ Needs Review",
32
+ }
33
+
34
+
35
+ def _make_summary(row: dict) -> str:
36
+ action = row.get("action", "")
37
+ bidder = row.get("bidder_id") or ""
38
+ crit = row.get("criterion_id") or ""
39
+ try:
40
+ p = json.loads(row.get("payload_json") or "{}")
41
+ except Exception:
42
+ p = {}
43
+
44
+ if action == "criteria_extracted":
45
+ return f"Extracted {p.get('count', '?')} criteria from {p.get('source', 'tender PDF')}"
46
+
47
+ if action == "bidder_processed":
48
+ return f"{bidder} — {p.get('doc_name', '?')} indexed ({p.get('chunk_count', '?')} chunks)"
49
+
50
+ if action == "criterion_evaluated":
51
+ verdict = _VERDICT_ICONS.get(p.get("verdict", ""), p.get("verdict", "?"))
52
+ conf = p.get("combined_confidence", "?")
53
+ conf_str = f"{float(conf):.0%}" if conf != "?" else "?"
54
+ extracted = p.get("extracted_value", "")
55
+ esc = p.get("escalation_reason", "")
56
+ base = f"{bidder} / {crit} → {verdict} (confidence: {conf_str})"
57
+ if extracted:
58
+ base += f" | Extracted: {extracted}"
59
+ if esc:
60
+ base += f" | ⚠️ {esc}"
61
+ return base
62
+
63
+ if action == "human_review_action":
64
+ taken = p.get("action_taken", "?").capitalize()
65
+ orig = p.get("original_extracted_value", "")
66
+ edited = p.get("edited_value", "")
67
+ base = f"Officer {taken}: {bidder} / {crit}"
68
+ if orig:
69
+ base += f" | Original value: {orig}"
70
+ if edited:
71
+ base += f" → Edited to: {edited}"
72
+ return base
73
+
74
+ if action == "precomputed_fallback_used":
75
+ return f"API unavailable — pre-computed data used | {p.get('reason', '')}"
76
+
77
+ if action == "vision_ocr_invoked":
78
+ tc = p.get("tesseract_conf", "?")
79
+ tc_str = f"{float(tc):.0%}" if tc != "?" else "?"
80
+ return f"{bidder} page {p.get('page', '?')} — Tesseract confidence {tc_str}, escalated to Vision LLM"
81
+
82
+ return action
83
+
84
+
85
+ def _category_color(category: str) -> str:
86
+ return {"system": "🔵", "human": "🟢", "warning": "🟡"}.get(category, "⚪")
87
+
88
 
89
  def render() -> None:
90
  st.header("Audit Log")
91
+ st.caption(
92
+ "Every system action and human decision is recorded here. "
93
+ "This log is the compliance trail — it can be exported and submitted as part of the evaluation record."
94
+ )
95
 
96
+ # ── Filters ──────────────────────────────────────────────────────────────
97
  col1, col2, col3 = st.columns(3)
98
  with col1:
99
  bidder_filter = st.selectbox(
 
103
  with col2:
104
  action_filter = st.selectbox(
105
  "Filter by action",
106
+ options=["All"] + list(_ACTION_LABELS.keys()),
107
+ format_func=lambda x: "All" if x == "All" else _ACTION_LABELS.get(x, x),
108
  )
109
  with col3:
110
+ if st.button("🗑 Clear Log", type="secondary", use_container_width=True):
111
+ st.session_state["confirm_clear_audit"] = True
112
+
113
+ if st.session_state.get("confirm_clear_audit"):
114
+ st.warning("This will permanently delete all audit entries. Are you sure?")
115
+ c1, c2 = st.columns(2)
116
+ if c1.button("Yes, clear everything", type="primary", use_container_width=True):
117
+ audit.clear()
118
+ st.session_state.pop("confirm_clear_audit", None)
119
+ st.success("Audit log cleared.")
120
+ st.rerun()
121
+ if c2.button("Cancel", use_container_width=True):
122
+ st.session_state.pop("confirm_clear_audit", None)
123
+ st.rerun()
124
 
125
  filters: dict = {}
126
  if bidder_filter != "All":
 
134
  st.info("No audit entries yet. Run an evaluation to generate entries.")
135
  return
136
 
137
+ # ── Summary counts ────────────────────────────────────────────────────────
138
+ total = len(rows)
139
+ human_actions = sum(1 for r in rows if r["action"] == "human_review_action")
140
+ fallbacks = sum(1 for r in rows if r["action"] == "precomputed_fallback_used")
141
+ vision_ocr = sum(1 for r in rows if r["action"] == "vision_ocr_invoked")
142
+
143
+ m1, m2, m3, m4 = st.columns(4)
144
+ m1.metric("Total entries", total)
145
+ m2.metric("Human actions", human_actions)
146
+ m3.metric("Fallback events", fallbacks)
147
+ m4.metric("Vision OCR calls", vision_ocr)
148
+
149
+ st.divider()
150
+
151
+ # ── Human-readable table ──────────────────────────────────────────────────
152
  df = pd.DataFrame(rows)
 
 
 
 
153
 
154
+ df["Action"] = df["action"].map(lambda x: _ACTION_LABELS.get(x, x))
155
+ df["Category"] = df["action"].map(
156
+ lambda x: _category_color(_ACTION_CATEGORIES.get(x, "system"))
157
+ )
158
+ df["Summary"] = df.apply(_make_summary, axis=1)
159
+ df["Timestamp"] = df["ts"].str[:19].str.replace("T", " ")
160
+ df["Actor"] = df["actor"]
161
+ df["Bidder"] = df["bidder_id"].fillna("—")
162
+ df["Criterion"] = df["criterion_id"].fillna("—")
163
+
164
+ display = df[["Category", "Timestamp", "Action", "Bidder", "Criterion", "Summary", "Actor"]].copy()
165
+
166
+ st.dataframe(
167
+ display,
168
+ use_container_width=True,
169
+ hide_index=True,
170
+ column_config={
171
+ "Category": st.column_config.TextColumn("", width="small"),
172
+ "Timestamp": st.column_config.TextColumn("Timestamp", width="medium"),
173
+ "Action": st.column_config.TextColumn("Action", width="medium"),
174
+ "Bidder": st.column_config.TextColumn("Bidder", width="small"),
175
+ "Criterion": st.column_config.TextColumn("Criterion", width="small"),
176
+ "Summary": st.column_config.TextColumn("Summary", width="large"),
177
+ "Actor": st.column_config.TextColumn("Actor", width="small"),
178
+ },
179
+ )
180
+
181
+ # ── Raw detail expander ───────────────────────────────────────────────────
182
+ with st.expander("Raw payload data (for compliance / full detail)", expanded=False):
183
+ raw_df = df[["Timestamp", "action", "actor", "bidder_id", "criterion_id", "payload_json"]].copy()
184
+ raw_df.columns = ["Timestamp", "action", "actor", "bidder_id", "criterion_id", "payload_json"]
185
+ st.dataframe(raw_df, use_container_width=True, hide_index=True)
186
 
187
+ # ── Export ────────────────────────────────────────────────────────────────
188
+ export_df = df[["Timestamp", "Action", "Actor", "Bidder", "Criterion", "Summary"]].copy()
189
+ export_df["raw_payload"] = df["payload_json"]
190
  csv_buf = io.StringIO()
191
+ export_df.to_csv(csv_buf, index=False)
192
  st.download_button(
193
  label="Export CSV",
194
  data=csv_buf.getvalue().encode("utf-8"),
ui/tab_bidders.py CHANGED
@@ -1,17 +1,15 @@
1
- from pathlib import Path
2
-
3
  import streamlit as st
4
 
5
  from core import bidder_processor, evaluator
6
- from core.config import DATA_DIR
7
  from core.fallback import load_criteria
8
  from core.schemas import Criterion
9
  from ui.components import category_badge, confidence_bar, ocr_tier_badge, verdict_pill
10
 
11
  _BIDDER_LABELS = {
12
- "bidder_a": "Bidder A — Apex Constructions (Clearly Eligible)",
13
  "bidder_b": "Bidder B — BuildRight Enterprises (Ineligible: Low Turnover)",
14
- "bidder_c": "Bidder C — Shree Constructions (Scanned Cert: Needs Review)",
15
  }
16
 
17
 
@@ -22,8 +20,12 @@ def _get_criteria() -> list[Criterion]:
22
  return load_criteria()
23
 
24
 
25
- def _overall_verdict(verdicts: list[dict]) -> str:
26
- mandatory = [v for v in verdicts if True] # all criteria checked
 
 
 
 
27
  if any(v["verdict"] == "not_eligible" for v in mandatory):
28
  return "not_eligible"
29
  if any(v["verdict"] == "needs_review" for v in mandatory):
@@ -52,14 +54,15 @@ def render() -> None:
52
  f for f in (DATA_DIR / "bidders" / bidder_id).iterdir()
53
  if f.suffix.lower() in {".pdf", ".png", ".jpg"}
54
  )
55
- with st.spinner(f"Processing {bidder_id} documents…"):
56
  bidder_processor.process_bidder(bidder_id, files)
57
  verdicts_list = []
58
  for c in criteria:
59
  v = evaluator.evaluate(bidder_id, c)
60
  verdicts_list.append(v.model_dump())
61
  done += 1
62
- progress.progress(done / total, text=f"Evaluated {c.id} for {bidder_id}")
 
63
  verdicts_dict[bidder_id] = verdicts_list
64
  st.session_state["verdicts"] = verdicts_dict
65
  progress.empty()
@@ -77,13 +80,34 @@ def render() -> None:
77
  if bidder_id not in verdicts_data:
78
  continue
79
  verdicts = verdicts_data[bidder_id]
80
- overall = _overall_verdict(verdicts)
81
  overall_pill = verdict_pill(overall)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- with st.expander(
84
- f"**{_BIDDER_LABELS.get(bidder_id, bidder_id)}** — Overall: {overall_pill}",
85
- expanded=True,
86
- ):
87
  for v in verdicts:
88
  crit = crit_map.get(v["criterion_id"])
89
  crit_title = crit.title if crit else v["criterion_id"]
@@ -105,7 +129,7 @@ def render() -> None:
105
  conf = v.get("combined_confidence", 0.0)
106
  confidence_bar(conf)
107
 
108
- if v.get("reason") or v.get("source"):
109
  with st.expander("Details", expanded=False):
110
  if v.get("reason"):
111
  st.markdown(f"**Reason:** {v['reason']}")
 
 
 
1
  import streamlit as st
2
 
3
  from core import bidder_processor, evaluator
4
+ from core.config import BIDDER_NAMES, DATA_DIR
5
  from core.fallback import load_criteria
6
  from core.schemas import Criterion
7
  from ui.components import category_badge, confidence_bar, ocr_tier_badge, verdict_pill
8
 
9
  _BIDDER_LABELS = {
10
+ "bidder_a": "Bidder A — Apex Constructions Pvt. Ltd. (Clearly Eligible)",
11
  "bidder_b": "Bidder B — BuildRight Enterprises (Ineligible: Low Turnover)",
12
+ "bidder_c": "Bidder C — Shree Constructions & Services (Scanned Cert: Needs Review)",
13
  }
14
 
15
 
 
20
  return load_criteria()
21
 
22
 
23
+ def _overall_verdict(verdicts: list[dict], crit_map: dict) -> str:
24
+ """Only mandatory criteria determine overall eligibility."""
25
+ mandatory = [v for v in verdicts if crit_map.get(v["criterion_id"], None) and
26
+ crit_map[v["criterion_id"]].mandatory]
27
+ if not mandatory:
28
+ mandatory = verdicts # fallback if crit_map is missing
29
  if any(v["verdict"] == "not_eligible" for v in mandatory):
30
  return "not_eligible"
31
  if any(v["verdict"] == "needs_review" for v in mandatory):
 
54
  f for f in (DATA_DIR / "bidders" / bidder_id).iterdir()
55
  if f.suffix.lower() in {".pdf", ".png", ".jpg"}
56
  )
57
+ with st.spinner(f"Processing {BIDDER_NAMES.get(bidder_id, bidder_id)} documents…"):
58
  bidder_processor.process_bidder(bidder_id, files)
59
  verdicts_list = []
60
  for c in criteria:
61
  v = evaluator.evaluate(bidder_id, c)
62
  verdicts_list.append(v.model_dump())
63
  done += 1
64
+ progress.progress(done / total,
65
+ text=f"Evaluated {c.id} for {BIDDER_NAMES.get(bidder_id, bidder_id)}")
66
  verdicts_dict[bidder_id] = verdicts_list
67
  st.session_state["verdicts"] = verdicts_dict
68
  progress.empty()
 
80
  if bidder_id not in verdicts_data:
81
  continue
82
  verdicts = verdicts_data[bidder_id]
83
+ overall = _overall_verdict(verdicts, crit_map)
84
  overall_pill = verdict_pill(overall)
85
+ friendly = BIDDER_NAMES.get(bidder_id, bidder_id)
86
+ mandatory_count = sum(1 for v in verdicts
87
+ if crit_map.get(v["criterion_id"]) and
88
+ crit_map[v["criterion_id"]].mandatory)
89
+ passed = sum(1 for v in verdicts
90
+ if v["verdict"] == "eligible" and
91
+ crit_map.get(v["criterion_id"]) and
92
+ crit_map[v["criterion_id"]].mandatory)
93
+
94
+ with st.container(border=True):
95
+ st.markdown(
96
+ f"#### {friendly} — Overall: {overall_pill}"
97
+ f" <span style='font-size:0.85em; color:grey;'>"
98
+ f"({passed}/{mandatory_count} mandatory criteria met)</span>",
99
+ unsafe_allow_html=True,
100
+ )
101
+
102
+ # Column headers
103
+ hcols = st.columns([3, 2, 2, 2, 1])
104
+ hcols[0].caption("Criterion")
105
+ hcols[1].caption("Verdict")
106
+ hcols[2].caption("Extracted Value")
107
+ hcols[3].caption("Source / OCR Tier")
108
+ hcols[4].caption("Category")
109
+ st.divider()
110
 
 
 
 
 
111
  for v in verdicts:
112
  crit = crit_map.get(v["criterion_id"])
113
  crit_title = crit.title if crit else v["criterion_id"]
 
129
  conf = v.get("combined_confidence", 0.0)
130
  confidence_bar(conf)
131
 
132
+ if v.get("reason") or (v.get("source") and v["source"].get("snippet")):
133
  with st.expander("Details", expanded=False):
134
  if v.get("reason"):
135
  st.markdown(f"**Reason:** {v['reason']}")
ui/tab_interpretability.py ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ import streamlit as st
4
+
5
+ from core.config import BIDDER_NAMES, DATA_DIR, MODEL_VERSION
6
+ from core.fallback import load_criteria
7
+ from core.llm_client import LLM, LLMUnavailable
8
+ from core.pdf_utils import render_page_to_image
9
+ from core.schemas import Criterion
10
+
11
+ _VERDICT_PLAIN = {
12
+ "eligible": ("✅", "PASSED", "green"),
13
+ "not_eligible": ("❌", "FAILED", "red"),
14
+ "needs_review": ("⚠️", "NEEDS REVIEW", "orange"),
15
+ }
16
+
17
+ _CRITERION_RULE_PLAIN = {
18
+ "numeric_threshold": lambda r: (
19
+ f"must be ≥ {r['value']:,} {r.get('unit') or ''}" if r["operator"] == ">="
20
+ else f"must be ≤ {r['value']:,} {r.get('unit') or ''}"
21
+ ),
22
+ "count_threshold": lambda r: f"must have completed at least {int(r['value'])}",
23
+ "certification_present": lambda _: "valid certificate must be present",
24
+ "document_present": lambda _: "supporting document must be present",
25
+ }
26
+
27
+
28
+ def _get_criteria() -> list[Criterion]:
29
+ data = st.session_state.get("criteria")
30
+ if data:
31
+ return [Criterion(**c) for c in data]
32
+ return load_criteria()
33
+
34
+
35
+ def _plain_explanation(v: dict, crit: Criterion | None) -> str:
36
+ verdict = v.get("verdict", "")
37
+ extracted = v.get("extracted_value") or ""
38
+ reason = v.get("reason") or ""
39
+ src = v.get("source") or {}
40
+
41
+ if not crit:
42
+ return reason
43
+
44
+ icon, label, _ = _VERDICT_PLAIN.get(verdict, ("❓", verdict, "grey"))
45
+ rule = crit.rule
46
+
47
+ if verdict == "eligible":
48
+ rule_desc = _CRITERION_RULE_PLAIN.get(rule.type, lambda _: "")(rule.model_dump())
49
+ val_part = f" Found: **{extracted}**." if extracted else ""
50
+ return f"{icon} **{crit.title}** — {label}.{val_part} {reason}"
51
+
52
+ elif verdict == "not_eligible":
53
+ rule_desc = _CRITERION_RULE_PLAIN.get(rule.type, lambda _: "")(rule.model_dump())
54
+ val_part = f" Found: **{extracted}** — this does not meet the requirement ({rule_desc})." if extracted else f" Required: {rule_desc}."
55
+ return f"{icon} **{crit.title}** — {label}.{val_part} {reason}"
56
+
57
+ else: # needs_review
58
+ val_part = f" Extracted value: **{extracted}**." if extracted else ""
59
+ return f"{icon} **{crit.title}** — {label}.{val_part} {reason}"
60
+
61
+
62
+ def _source_citation(v: dict) -> str | None:
63
+ src = v.get("source")
64
+ if not src:
65
+ return None
66
+ doc = src.get("doc_name", "")
67
+ page = src.get("page", "")
68
+ tier = src.get("source_type", "")
69
+ tier_labels = {"text_pdf": "typed PDF", "tesseract": "Tesseract OCR",
70
+ "vision_llm": "Vision LLM OCR"}
71
+ return f"📄 **{doc}**, page {page} · read by _{tier_labels.get(tier, tier)}_"
72
+
73
+
74
+ def _build_qa_context(bidder_id: str, verdicts: list[dict],
75
+ criteria: list[Criterion]) -> str:
76
+ crit_map = {c.id: c for c in criteria}
77
+ lines = [
78
+ f"BIDDER: {BIDDER_NAMES.get(bidder_id, bidder_id)} ({bidder_id})",
79
+ "",
80
+ "EVALUATION RESULTS:",
81
+ ]
82
+ for v in verdicts:
83
+ crit = crit_map.get(v["criterion_id"])
84
+ crit_title = crit.title if crit else v["criterion_id"]
85
+ mandatory = ("Mandatory" if crit and crit.mandatory else "Optional") if crit else "Unknown"
86
+ lines.append(
87
+ f" {v['criterion_id']} — {crit_title} [{mandatory}]: "
88
+ f"{v['verdict'].upper()}"
89
+ )
90
+ if v.get("extracted_value"):
91
+ lines.append(f" Extracted value: {v['extracted_value']}")
92
+ if v.get("source"):
93
+ src = v["source"]
94
+ lines.append(
95
+ f" Evidence source: {src.get('doc_name')} page {src.get('page')} "
96
+ f"(read by {src.get('source_type')})"
97
+ )
98
+ if v.get("source") and v["source"].get("snippet"):
99
+ lines.append(f" Evidence snippet: \"{v['source']['snippet'][:200]}\"")
100
+ lines.append(
101
+ f" Confidence: {v.get('combined_confidence', 0):.0%} | "
102
+ f"Reason: {v.get('reason', '')}"
103
+ )
104
+ if crit:
105
+ rule = crit.rule
106
+ rule_desc = _CRITERION_RULE_PLAIN.get(rule.type, lambda _: "")(rule.model_dump())
107
+ lines.append(f" Requirement: {rule_desc}")
108
+ lines.append("")
109
+ return "\n".join(lines)
110
+
111
+
112
+ def _answer_question(question: str, context: str) -> str:
113
+ system = """You are a procurement compliance assistant helping an evaluation officer
114
+ understand AI-generated eligibility verdicts. Answer questions about the bidder's evaluation
115
+ in plain, professional English. Always cite specific document names and page numbers from the
116
+ evidence. Be concise (2-4 sentences). Do not invent information not present in the context."""
117
+
118
+ user = f"""{context}
119
+
120
+ OFFICER'S QUESTION: {question}
121
+
122
+ Answer the question based only on the evaluation results above.
123
+ Cite the specific document and page number when referring to evidence."""
124
+
125
+ try:
126
+ llm = LLM()
127
+ result = llm.chat_json(
128
+ system + " Return JSON: {\"answer\": \"<your answer>\"}",
129
+ user,
130
+ )
131
+ return result.get("answer", "")
132
+ except LLMUnavailable:
133
+ return _rule_based_answer(question, context)
134
+
135
+
136
+ def _rule_based_answer(question: str, context: str) -> str:
137
+ q = question.lower()
138
+ lines = context.splitlines()
139
+
140
+ if any(w in q for w in ["reject", "fail", "not eligible", "disqualif"]):
141
+ fails = [l for l in lines if "NOT_ELIGIBLE" in l or "NEEDS_REVIEW" in l]
142
+ if fails:
143
+ return ("Based on the evaluation: " +
144
+ "; ".join(f.strip() for f in fails[:3]) +
145
+ ". See the Bidder Evaluation tab for full details.")
146
+ return "No failing criteria were found in the evaluation."
147
+
148
+ if any(w in q for w in ["pass", "eligible", "meet", "satisfy"]):
149
+ passes = [l for l in lines if "ELIGIBLE" in l and "NOT_ELIGIBLE" not in l]
150
+ if passes:
151
+ return ("Criteria passed: " +
152
+ "; ".join(f.strip() for f in passes[:3]) + ".")
153
+ return "No passing criteria were found."
154
+
155
+ if any(w in q for w in ["turnover", "financial", "revenue", "c1"]):
156
+ relevant = [l for l in lines if "C1" in l or "turnover" in l.lower() or
157
+ "Extracted value" in l]
158
+ if relevant:
159
+ return " ".join(l.strip() for l in relevant[:4])
160
+
161
+ return ("I cannot answer that specific question without the live LLM. "
162
+ "The evaluation summary above contains the full details.")
163
+
164
+
165
+ def render() -> None:
166
+ st.header("Interpretability")
167
+ st.caption(
168
+ "Plain-English explanations of why each bidder was evaluated the way it was, "
169
+ "with full source citations. Ask any question about the evaluation."
170
+ )
171
+
172
+ verdicts_data = st.session_state.get("verdicts", {})
173
+ if not verdicts_data:
174
+ st.info("No evaluation results yet. Run the evaluation in Bidder Evaluation tab or "
175
+ "click **Load Pre-computed Demo** in the Overview tab.")
176
+ return
177
+
178
+ criteria = _get_criteria()
179
+ crit_map = {c.id: c for c in criteria}
180
+
181
+ bidder_id = st.selectbox(
182
+ "Select bidder",
183
+ options=list(verdicts_data.keys()),
184
+ format_func=lambda x: BIDDER_NAMES.get(x, x),
185
+ )
186
+
187
+ verdicts = verdicts_data.get(bidder_id, [])
188
+ if not verdicts:
189
+ st.warning("No verdicts available for this bidder.")
190
+ return
191
+
192
+ # ── Overall summary ───────────────────────────────────────────────────────
193
+ mandatory_verdicts = [v for v in verdicts
194
+ if crit_map.get(v["criterion_id"]) and
195
+ crit_map[v["criterion_id"]].mandatory]
196
+ failed = [v for v in mandatory_verdicts if v["verdict"] == "not_eligible"]
197
+ review = [v for v in mandatory_verdicts if v["verdict"] == "needs_review"]
198
+ passed = [v for v in mandatory_verdicts if v["verdict"] == "eligible"]
199
+
200
+ friendly = BIDDER_NAMES.get(bidder_id, bidder_id)
201
+
202
+ if failed:
203
+ st.error(
204
+ f"**{friendly} — NOT ELIGIBLE**\n\n"
205
+ f"Failed {len(failed)} mandatory criterion/criteria. "
206
+ f"A bidder must meet all mandatory criteria to qualify."
207
+ )
208
+ elif review:
209
+ st.warning(
210
+ f"**{friendly} — NEEDS REVIEW**\n\n"
211
+ f"Passed {len(passed)} mandatory criteria, but {len(review)} could not be "
212
+ f"automatically confirmed and require officer verification."
213
+ )
214
+ else:
215
+ st.success(
216
+ f"**{friendly} — ELIGIBLE**\n\n"
217
+ f"All {len(passed)} mandatory criteria satisfied."
218
+ )
219
+
220
+ st.divider()
221
+
222
+ # ── Per-criterion plain-English cards ─────────────────────────────────────
223
+ st.subheader("Criterion-by-Criterion Breakdown")
224
+
225
+ for v in verdicts:
226
+ crit = crit_map.get(v["criterion_id"])
227
+ _, label, color = _VERDICT_PLAIN.get(v["verdict"], ("❓", v["verdict"], "grey"))
228
+ mandatory_tag = "🔴 Mandatory" if (crit and crit.mandatory) else "🟡 Optional"
229
+
230
+ with st.container(border=True):
231
+ col_status, col_detail = st.columns([1, 4])
232
+
233
+ with col_status:
234
+ if color == "green":
235
+ st.success(label)
236
+ elif color == "red":
237
+ st.error(label)
238
+ else:
239
+ st.warning(label)
240
+ st.caption(mandatory_tag)
241
+ conf = v.get("combined_confidence", 0.0)
242
+ st.caption(f"Confidence: {conf:.0%}")
243
+
244
+ with col_detail:
245
+ explanation = _plain_explanation(v, crit)
246
+ st.markdown(explanation)
247
+
248
+ citation = _source_citation(v)
249
+ if citation:
250
+ st.markdown(citation)
251
+
252
+ # Page preview
253
+ src = v.get("source", {})
254
+ doc_name = src.get("doc_name", "")
255
+ page_no = src.get("page", 1)
256
+ bidder_dir = DATA_DIR / "bidders" / bidder_id
257
+ doc_path = bidder_dir / doc_name
258
+
259
+ if doc_path.exists() and doc_path.suffix.lower() == ".pdf":
260
+ with st.expander(f"View source page ({doc_name}, p{page_no})",
261
+ expanded=False):
262
+ try:
263
+ img = render_page_to_image(doc_path, page_no)
264
+ st.image(img, caption=f"{doc_name} — Page {page_no}",
265
+ use_container_width=True)
266
+ except Exception:
267
+ st.caption("Page preview unavailable.")
268
+ elif doc_path.exists() and doc_path.suffix.lower() in {".png", ".jpg"}:
269
+ with st.expander(f"View source image ({doc_name})", expanded=False):
270
+ st.image(str(doc_path), caption=doc_name,
271
+ use_container_width=True)
272
+
273
+ st.divider()
274
+
275
+ # ── Q&A section ───────────────────────────────────────────────────────────
276
+ st.subheader("Ask About This Evaluation")
277
+ st.caption(
278
+ "Ask any question about why this bidder was evaluated the way it was. "
279
+ "Answers cite specific documents and pages."
280
+ )
281
+
282
+ suggestions = [
283
+ "Why was this bidder rejected?",
284
+ "Which criteria did this bidder fail?",
285
+ "What turnover figure was found and which document was it from?",
286
+ "Is this bidder ISO certified?",
287
+ "Why is the turnover verdict in review?",
288
+ ]
289
+ with st.expander("Example questions", expanded=False):
290
+ for s in suggestions:
291
+ st.markdown(f"- _{s}_")
292
+
293
+ question = st.text_input(
294
+ "Your question",
295
+ placeholder="e.g. Why was this bidder's turnover flagged for review?",
296
+ key=f"qa_input_{bidder_id}",
297
+ )
298
+
299
+ if st.button("Get Answer", type="primary", key=f"qa_btn_{bidder_id}"):
300
+ if not question.strip():
301
+ st.warning("Please enter a question.")
302
+ else:
303
+ context = _build_qa_context(bidder_id, verdicts, criteria)
304
+ with st.spinner("Looking up the answer…"):
305
+ answer = _answer_question(question, context)
306
+
307
+ st.markdown("**Answer:**")
308
+ st.info(answer)
309
+
310
+ with st.expander("Full evaluation context used to answer", expanded=False):
311
+ st.code(context, language="text")
ui/tab_overview.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
 
3
  from core import audit
 
4
  from core.fallback import load_criteria
5
 
6
 
@@ -30,23 +31,94 @@ def render() -> None:
30
 
31
  st.divider()
32
 
33
- st.subheader("How it works")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  col_a, col_b = st.columns(2)
35
  with col_a:
36
  st.markdown("""
37
- **Stage 1 — Extract Criteria**
38
- DeepSeek LLM reads the tender PDF and extracts each eligibility criterion as structured JSON (category, rule, query hints).
 
 
39
 
40
- **Stage 2 — OCR & Index Bidder Docs**
41
- Three-tier OCR: PyMuPDF (typed PDF) → Tesseract → DeepSeek Vision LLM (low-confidence scans). All pages indexed into ChromaDB.
 
 
 
 
42
  """)
43
  with col_b:
44
  st.markdown("""
45
- **Stage 3 — Evaluate per Criterion**
46
- Vector search retrieves relevant evidence chunks. DeepSeek evaluates eligible / not_eligible / needs_review with a combined confidence score.
 
 
 
47
 
48
- **Stage 4 — Human Review & Audit**
49
- Low-confidence verdicts are routed to the review queue. Every action is logged with timestamp, model version, and payload.
 
 
50
  """)
51
 
52
  st.divider()
@@ -59,13 +131,13 @@ Low-confidence verdicts are routed to the review queue. Every action is logged w
59
  criteria = lc()
60
  st.session_state["criteria"] = [c.model_dump() for c in criteria]
61
  verdicts_dict: dict = {}
62
- for bidder_id in ["bidder_a", "bidder_b", "bidder_c"]:
63
  verdicts_dict[bidder_id] = [
64
  load_evaluation(bidder_id, c.id).model_dump()
65
  for c in criteria
66
  ]
67
  st.session_state["verdicts"] = verdicts_dict
68
- st.success("Pre-computed demo data loaded. Navigate to the other tabs.")
69
  st.rerun()
70
  with col2:
71
- st.info("Or go to **Tender Analysis** tab to run the live LLM pipeline.")
 
1
  import streamlit as st
2
 
3
  from core import audit
4
+ from core.config import BIDDER_NAMES
5
  from core.fallback import load_criteria
6
 
7
 
 
31
 
32
  st.divider()
33
 
34
+ # Architecture diagram
35
+ st.subheader("System Architecture")
36
+ st.markdown("""
37
+ ```
38
+ ┌─────────────────────────────────────────────────────────────────────┐
39
+ │ TenderIQ Pipeline │
40
+ └─────────────────────────────────────────────────────────────────────┘
41
+
42
+ 📄 Tender PDF 📁 Bidder Documents
43
+ │ (PDFs, scans, photos)
44
+ │ │
45
+ ▼ ▼
46
+ ┌───────────┐ ┌────────────────────────┐
47
+ │ DeepSeek │ │ 3-Tier OCR Pipeline │
48
+ │ LLM │ │ ① PyMuPDF (typed) │
49
+ │ (Stage 1) │ │ ② Tesseract (scans) │
50
+ └───────────┘ │ ③ Vision LLM (poor) │
51
+ │ └────────────────────────┘
52
+ │ │
53
+ ▼ ▼
54
+ ┌───────────┐ ┌────────────────────────┐
55
+ │ Criteria │ │ ChromaDB Vector │
56
+ │ C1 – C5 │ │ Index (per bidder) │
57
+ │ (JSON) │ └────────────────────────┘
58
+ └───────────┘ │
59
+ │ │ semantic search
60
+ └──────────────────┬───────────────────┘
61
+
62
+
63
+ ┌─────────────────────┐
64
+ │ DeepSeek LLM │
65
+ │ (Stage 3 eval) │
66
+ │ │
67
+ │ evidence → verdict │
68
+ │ + confidence score │
69
+ └─────────────────────┘
70
+
71
+ ┌─────────────┴──────────────┐
72
+ │ │
73
+ ▼ ▼
74
+ confidence ≥ 0.80 confidence < 0.80
75
+ verdict kept downgraded to
76
+ needs_review
77
+
78
+
79
+ ┌─────────────────┐
80
+ │ Human Review │
81
+ │ Queue (Tab 4) │
82
+ └─────────────────┘
83
+
84
+
85
+ ┌─────────────────┐
86
+ │ Audit Log │
87
+ │ (every action) │
88
+ └─────────────────┘
89
+ ```
90
+ """)
91
+
92
+ st.divider()
93
+
94
+ st.subheader("Pipeline Stages")
95
  col_a, col_b = st.columns(2)
96
  with col_a:
97
  st.markdown("""
98
+ ** Extract Criteria**
99
+ DeepSeek reads the full tender PDF and extracts each eligibility criterion as structured JSON
100
+ category, mandatory flag, rule (threshold / certification / count), source clause, and query hints
101
+ for downstream retrieval.
102
 
103
+ ** OCR & Index Bidder Documents**
104
+ Three-tier pipeline handles any document format:
105
+ PyMuPDF for typed PDFs (instant, lossless) →
106
+ Tesseract for scans (free, fast) →
107
+ DeepSeek Vision LLM when Tesseract confidence < 65%.
108
+ All text is chunked and indexed into ChromaDB with full provenance metadata.
109
  """)
110
  with col_b:
111
  st.markdown("""
112
+ ** Evaluate per Criterion**
113
+ For each (bidder × criterion) pair: semantic search retrieves the most relevant evidence chunks,
114
+ DeepSeek decides eligible / not_eligible / needs_review with a combined confidence score
115
+ that weights LLM certainty against OCR quality.
116
+ The safety rule: never silently disqualify — borderline cases always go to human review.
117
 
118
+ ** Human Review & Audit**
119
+ Flagged verdicts surface in the Review Queue with full evidence and source citations.
120
+ Every action — extraction, indexing, evaluation, review — is logged to SQLite with
121
+ timestamp, model version, actor, and payload.
122
  """)
123
 
124
  st.divider()
 
131
  criteria = lc()
132
  st.session_state["criteria"] = [c.model_dump() for c in criteria]
133
  verdicts_dict: dict = {}
134
+ for bidder_id in BIDDER_NAMES:
135
  verdicts_dict[bidder_id] = [
136
  load_evaluation(bidder_id, c.id).model_dump()
137
  for c in criteria
138
  ]
139
  st.session_state["verdicts"] = verdicts_dict
140
+ st.success("Pre-computed demo loaded. Navigate to the other tabs.")
141
  st.rerun()
142
  with col2:
143
+ st.info("Or go to **Tender Analysis** to run the live LLM pipeline.")
ui/tab_review.py CHANGED
@@ -33,6 +33,11 @@ def render() -> None:
33
  return
34
 
35
  st.markdown(f"**{len(pending_items)} item(s) pending review**")
 
 
 
 
 
36
  st.divider()
37
 
38
  for bidder_id, idx, v in pending_items:
@@ -52,7 +57,7 @@ def render() -> None:
52
  st.markdown(f"Source snippet: _{v['source']['snippet']}_")
53
  with col2:
54
  conf = v.get("combined_confidence", 0.0)
55
- confidence_bar(conf, "Confidence")
56
 
57
  btn_col1, btn_col2, btn_col3 = st.columns(3)
58
  key_prefix = f"review_{bidder_id}_{v['criterion_id']}"
@@ -66,6 +71,9 @@ def render() -> None:
66
  bidder_id=bidder_id,
67
  criterion_id=v["criterion_id"],
68
  action_taken="approved",
 
 
 
69
  )
70
  st.rerun()
71
 
@@ -82,7 +90,10 @@ def render() -> None:
82
  bidder_id=bidder_id,
83
  criterion_id=v["criterion_id"],
84
  action_taken="edited",
 
 
85
  edited_value=edit_val,
 
86
  )
87
  st.rerun()
88
 
@@ -95,5 +106,8 @@ def render() -> None:
95
  bidder_id=bidder_id,
96
  criterion_id=v["criterion_id"],
97
  action_taken="rejected",
 
 
 
98
  )
99
  st.rerun()
 
33
  return
34
 
35
  st.markdown(f"**{len(pending_items)} item(s) pending review**")
36
+ st.caption(
37
+ "These verdicts require human confirmation before being finalised. "
38
+ "The certainty bar shows how confident the model is in its decision to flag the item — "
39
+ "not how likely the bidder meets the criterion."
40
+ )
41
  st.divider()
42
 
43
  for bidder_id, idx, v in pending_items:
 
57
  st.markdown(f"Source snippet: _{v['source']['snippet']}_")
58
  with col2:
59
  conf = v.get("combined_confidence", 0.0)
60
+ confidence_bar(conf, "Certainty in assessment")
61
 
62
  btn_col1, btn_col2, btn_col3 = st.columns(3)
63
  key_prefix = f"review_{bidder_id}_{v['criterion_id']}"
 
71
  bidder_id=bidder_id,
72
  criterion_id=v["criterion_id"],
73
  action_taken="approved",
74
+ original_verdict=v["verdict"],
75
+ original_extracted_value=v.get("extracted_value", ""),
76
+ combined_confidence=v.get("combined_confidence", 0.0),
77
  )
78
  st.rerun()
79
 
 
90
  bidder_id=bidder_id,
91
  criterion_id=v["criterion_id"],
92
  action_taken="edited",
93
+ original_verdict=v["verdict"],
94
+ original_extracted_value=v.get("extracted_value", ""),
95
  edited_value=edit_val,
96
+ combined_confidence=v.get("combined_confidence", 0.0),
97
  )
98
  st.rerun()
99
 
 
106
  bidder_id=bidder_id,
107
  criterion_id=v["criterion_id"],
108
  action_taken="rejected",
109
+ original_verdict=v["verdict"],
110
+ original_extracted_value=v.get("extracted_value", ""),
111
+ combined_confidence=v.get("combined_confidence", 0.0),
112
  )
113
  st.rerun()