Add Interpretability tab, fix mandatory verdict logic, demo reset, UI polish
Browse files- core/config.py: add BIDDER_NAMES dict (shared across all tabs)
- core/evaluator.py: richer criterion_evaluated audit payload (extracted_value,
llm_confidence, ocr_tier, reason, escalation_reason)
- ui/tab_review.py: store original_verdict + original_extracted_value in
human_review_action audit entries
- ui/tab_bidders.py: fix _overall_verdict to only count mandatory criteria;
add column headers; use friendly company names; show passed/total count
- ui/tab_overview.py: replace text architecture section with ASCII pipeline
diagram; use BIDDER_NAMES in demo loader
- ui/tab_audit.py: richer human-readable display with summary column,
category dots, metric cards, raw payload expander
- ui/tab_interpretability.py: new tab — plain-English per-criterion breakdown
with page-level source citations and inline PDF/image previews; LLM-powered
Q&A with rule-based fallback when API unavailable
- app.py: add Tab 6 (Interpretability); sidebar demo reset button that clears
audit DB + ChromaDB + OCR cache + session in one click; LLM probe cached
once per session
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- app.py +35 -2
- core/audit.py +8 -0
- core/config.py +6 -0
- core/evaluator.py +23 -4
- ui/tab_audit.py +152 -10
- ui/tab_bidders.py +39 -15
- ui/tab_interpretability.py +311 -0
- ui/tab_overview.py +84 -12
- ui/tab_review.py +15 -1
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
|
| 3 |
from ui.tab_overview import render as render_overview
|
|
@@ -5,6 +7,7 @@ from ui.tab_tender import render as render_tender
|
|
| 5 |
from ui.tab_bidders import render as render_bidders
|
| 6 |
from ui.tab_review import render as render_review
|
| 7 |
from ui.tab_audit import render as render_audit
|
|
|
|
| 8 |
|
| 9 |
st.set_page_config(
|
| 10 |
page_title="TenderIQ",
|
|
@@ -14,7 +17,7 @@ st.set_page_config(
|
|
| 14 |
|
| 15 |
|
| 16 |
def _probe_llm() -> str:
|
| 17 |
-
"""
|
| 18 |
if st.session_state.get("fallback_active"):
|
| 19 |
return "amber"
|
| 20 |
if "llm_status" in st.session_state:
|
|
@@ -32,6 +35,18 @@ def _probe_llm() -> str:
|
|
| 32 |
return "red"
|
| 33 |
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
# ── Sidebar ──────────────────────────────────────────────────────────────────
|
| 36 |
with st.sidebar:
|
| 37 |
st.markdown("## ⚖️ TenderIQ")
|
|
@@ -49,18 +64,33 @@ with st.sidebar:
|
|
| 49 |
st.caption("Using pre-computed fallback data.")
|
| 50 |
|
| 51 |
st.divider()
|
|
|
|
| 52 |
if st.button("Reset Session", use_container_width=True):
|
| 53 |
for key in list(st.session_state.keys()):
|
| 54 |
del st.session_state[key]
|
| 55 |
st.rerun()
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
# ── Tabs ─────────────────────────────────────────────────────────────────────
|
| 58 |
-
tab1, tab2, tab3, tab4, tab5 = st.tabs([
|
| 59 |
"Overview",
|
| 60 |
"Tender Analysis",
|
| 61 |
"Bidder Evaluation",
|
| 62 |
"Human Review",
|
| 63 |
"Audit Log",
|
|
|
|
| 64 |
])
|
| 65 |
|
| 66 |
with tab1:
|
|
@@ -77,3 +107,6 @@ with tab4:
|
|
| 77 |
|
| 78 |
with tab5:
|
| 79 |
render_audit()
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import shutil
|
| 2 |
+
|
| 3 |
import streamlit as st
|
| 4 |
|
| 5 |
from ui.tab_overview import render as render_overview
|
|
|
|
| 7 |
from ui.tab_bidders import render as render_bidders
|
| 8 |
from ui.tab_review import render as render_review
|
| 9 |
from ui.tab_audit import render as render_audit
|
| 10 |
+
from ui.tab_interpretability import render as render_interpretability
|
| 11 |
|
| 12 |
st.set_page_config(
|
| 13 |
page_title="TenderIQ",
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
def _probe_llm() -> str:
|
| 20 |
+
"""Probe once per session; returns 'green', 'amber', or 'red'."""
|
| 21 |
if st.session_state.get("fallback_active"):
|
| 22 |
return "amber"
|
| 23 |
if "llm_status" in st.session_state:
|
|
|
|
| 35 |
return "red"
|
| 36 |
|
| 37 |
|
| 38 |
+
def _reset_demo() -> None:
|
| 39 |
+
"""Clear session, audit DB, ChromaDB, and OCR cache for a clean demo run."""
|
| 40 |
+
from core import audit
|
| 41 |
+
from core.config import CHROMA_DIR, OCR_CACHE_DIR
|
| 42 |
+
audit.clear()
|
| 43 |
+
shutil.rmtree(CHROMA_DIR, ignore_errors=True)
|
| 44 |
+
shutil.rmtree(str(OCR_CACHE_DIR), ignore_errors=True)
|
| 45 |
+
st.cache_resource.clear()
|
| 46 |
+
for key in list(st.session_state.keys()):
|
| 47 |
+
del st.session_state[key]
|
| 48 |
+
|
| 49 |
+
|
| 50 |
# ── Sidebar ──────────────────────────────────────────────────────────────────
|
| 51 |
with st.sidebar:
|
| 52 |
st.markdown("## ⚖️ TenderIQ")
|
|
|
|
| 64 |
st.caption("Using pre-computed fallback data.")
|
| 65 |
|
| 66 |
st.divider()
|
| 67 |
+
|
| 68 |
if st.button("Reset Session", use_container_width=True):
|
| 69 |
for key in list(st.session_state.keys()):
|
| 70 |
del st.session_state[key]
|
| 71 |
st.rerun()
|
| 72 |
|
| 73 |
+
if st.button("🗑 Reset for Demo", use_container_width=True, type="secondary"):
|
| 74 |
+
st.session_state["confirm_demo_reset"] = True
|
| 75 |
+
|
| 76 |
+
if st.session_state.get("confirm_demo_reset"):
|
| 77 |
+
st.warning("Clears audit log, vector index, OCR cache, and session. Sure?")
|
| 78 |
+
col1, col2 = st.columns(2)
|
| 79 |
+
if col1.button("Yes, reset", type="primary", use_container_width=True):
|
| 80 |
+
_reset_demo()
|
| 81 |
+
st.rerun()
|
| 82 |
+
if col2.button("Cancel", use_container_width=True):
|
| 83 |
+
st.session_state.pop("confirm_demo_reset", None)
|
| 84 |
+
st.rerun()
|
| 85 |
+
|
| 86 |
# ── Tabs ─────────────────────────────────────────────────────────────────────
|
| 87 |
+
tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
|
| 88 |
"Overview",
|
| 89 |
"Tender Analysis",
|
| 90 |
"Bidder Evaluation",
|
| 91 |
"Human Review",
|
| 92 |
"Audit Log",
|
| 93 |
+
"Interpretability",
|
| 94 |
])
|
| 95 |
|
| 96 |
with tab1:
|
|
|
|
| 107 |
|
| 108 |
with tab5:
|
| 109 |
render_audit()
|
| 110 |
+
|
| 111 |
+
with tab6:
|
| 112 |
+
render_interpretability()
|
|
@@ -47,6 +47,14 @@ def log(action: str, actor: str = "system", **fields) -> int:
|
|
| 47 |
return row_id
|
| 48 |
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
def query(filters: dict | None = None) -> list[dict]:
|
| 51 |
conn = _conn()
|
| 52 |
sql = "SELECT * FROM audit_log"
|
|
|
|
| 47 |
return row_id
|
| 48 |
|
| 49 |
|
| 50 |
+
def clear() -> None:
|
| 51 |
+
conn = _conn()
|
| 52 |
+
conn.execute("DELETE FROM audit_log")
|
| 53 |
+
conn.execute("DELETE FROM sqlite_sequence WHERE name='audit_log'")
|
| 54 |
+
conn.commit()
|
| 55 |
+
conn.close()
|
| 56 |
+
|
| 57 |
+
|
| 58 |
def query(filters: dict | None = None) -> list[dict]:
|
| 59 |
conn = _conn()
|
| 60 |
sql = "SELECT * FROM audit_log"
|
|
@@ -19,3 +19,9 @@ CHROMA_DIR = str(BASE_DIR / ".chroma")
|
|
| 19 |
AUDIT_DB = str(BASE_DIR / "audit.db")
|
| 20 |
PRECOMPUTED_DIR = DATA_DIR / "precomputed"
|
| 21 |
OCR_CACHE_DIR = BASE_DIR / ".ocr_cache"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
AUDIT_DB = str(BASE_DIR / "audit.db")
|
| 20 |
PRECOMPUTED_DIR = DATA_DIR / "precomputed"
|
| 21 |
OCR_CACHE_DIR = BASE_DIR / ".ocr_cache"
|
| 22 |
+
|
| 23 |
+
BIDDER_NAMES = {
|
| 24 |
+
"bidder_a": "Apex Constructions Pvt. Ltd.",
|
| 25 |
+
"bidder_b": "BuildRight Enterprises",
|
| 26 |
+
"bidder_c": "Shree Constructions & Services",
|
| 27 |
+
}
|
|
@@ -60,7 +60,9 @@ def evaluate(bidder_id: str, criterion: Criterion) -> Verdict:
|
|
| 60 |
)
|
| 61 |
audit.log("criterion_evaluated", bidder_id=bidder_id,
|
| 62 |
criterion_id=criterion.id, verdict="needs_review",
|
| 63 |
-
|
|
|
|
|
|
|
| 64 |
return v
|
| 65 |
|
| 66 |
evidence_dicts = [
|
|
@@ -148,9 +150,26 @@ Rules:
|
|
| 148 |
timestamp=_now_iso(),
|
| 149 |
review_status="pending",
|
| 150 |
)
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
return v
|
| 155 |
|
| 156 |
|
|
|
|
| 60 |
)
|
| 61 |
audit.log("criterion_evaluated", bidder_id=bidder_id,
|
| 62 |
criterion_id=criterion.id, verdict="needs_review",
|
| 63 |
+
llm_verdict="needs_review", extracted_value="",
|
| 64 |
+
llm_confidence=0.0, combined_confidence=0.0,
|
| 65 |
+
ocr_tier="", escalation_reason="no evidence found", reason=v.reason)
|
| 66 |
return v
|
| 67 |
|
| 68 |
evidence_dicts = [
|
|
|
|
| 150 |
timestamp=_now_iso(),
|
| 151 |
review_status="pending",
|
| 152 |
)
|
| 153 |
+
escalation_reason = None
|
| 154 |
+
if llm_verdict != final_verdict:
|
| 155 |
+
if combined < CONFIDENCE_REVIEW:
|
| 156 |
+
escalation_reason = f"auto-escalated: combined confidence {combined:.0%} below threshold"
|
| 157 |
+
elif combined < CONFIDENCE_HIGH and llm_verdict == "not_eligible":
|
| 158 |
+
escalation_reason = f"auto-escalated: borderline confidence {combined:.0%} on disqualification"
|
| 159 |
+
|
| 160 |
+
audit.log(
|
| 161 |
+
"criterion_evaluated",
|
| 162 |
+
bidder_id=bidder_id,
|
| 163 |
+
criterion_id=criterion.id,
|
| 164 |
+
verdict=final_verdict,
|
| 165 |
+
llm_verdict=llm_verdict,
|
| 166 |
+
extracted_value=extracted_value or "",
|
| 167 |
+
llm_confidence=round(llm_confidence, 4),
|
| 168 |
+
combined_confidence=round(combined, 4),
|
| 169 |
+
ocr_tier=source_type,
|
| 170 |
+
escalation_reason=escalation_reason or "",
|
| 171 |
+
reason=reason,
|
| 172 |
+
)
|
| 173 |
return v
|
| 174 |
|
| 175 |
|
|
@@ -1,14 +1,99 @@
|
|
| 1 |
import io
|
|
|
|
| 2 |
|
| 3 |
import pandas as pd
|
| 4 |
import streamlit as st
|
| 5 |
|
| 6 |
from core import audit
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
def render() -> None:
|
| 10 |
st.header("Audit Log")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
|
|
|
| 12 |
col1, col2, col3 = st.columns(3)
|
| 13 |
with col1:
|
| 14 |
bidder_filter = st.selectbox(
|
|
@@ -18,11 +103,24 @@ def render() -> None:
|
|
| 18 |
with col2:
|
| 19 |
action_filter = st.selectbox(
|
| 20 |
"Filter by action",
|
| 21 |
-
options=["All"
|
| 22 |
-
|
| 23 |
)
|
| 24 |
with col3:
|
| 25 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
filters: dict = {}
|
| 28 |
if bidder_filter != "All":
|
|
@@ -36,17 +134,61 @@ def render() -> None:
|
|
| 36 |
st.info("No audit entries yet. Run an evaluation to generate entries.")
|
| 37 |
return
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
df = pd.DataFrame(rows)
|
| 40 |
-
display_cols = ["id", "ts", "action", "actor", "bidder_id", "criterion_id", "payload_json"]
|
| 41 |
-
display_cols = [c for c in display_cols if c in df.columns]
|
| 42 |
-
df_display = df[display_cols].copy()
|
| 43 |
-
df_display["ts"] = df_display["ts"].str[:19].str.replace("T", " ")
|
| 44 |
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
|
|
|
|
|
|
|
|
|
| 48 |
csv_buf = io.StringIO()
|
| 49 |
-
|
| 50 |
st.download_button(
|
| 51 |
label="Export CSV",
|
| 52 |
data=csv_buf.getvalue().encode("utf-8"),
|
|
|
|
| 1 |
import io
|
| 2 |
+
import json
|
| 3 |
|
| 4 |
import pandas as pd
|
| 5 |
import streamlit as st
|
| 6 |
|
| 7 |
from core import audit
|
| 8 |
|
| 9 |
+
_ACTION_LABELS = {
|
| 10 |
+
"criteria_extracted": "📋 Criteria Extracted",
|
| 11 |
+
"bidder_processed": "📥 Bidder Document Indexed",
|
| 12 |
+
"criterion_evaluated": "⚖️ Criterion Evaluated",
|
| 13 |
+
"human_review_action": "👤 Human Review Action",
|
| 14 |
+
"precomputed_fallback_used":"⚠️ Fallback Used",
|
| 15 |
+
"vision_ocr_invoked": "👁️ Vision OCR Invoked",
|
| 16 |
+
"smoke_test": "🧪 Smoke Test",
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
_ACTION_CATEGORIES = {
|
| 20 |
+
"criteria_extracted": "system",
|
| 21 |
+
"bidder_processed": "system",
|
| 22 |
+
"criterion_evaluated": "system",
|
| 23 |
+
"human_review_action": "human",
|
| 24 |
+
"precomputed_fallback_used": "warning",
|
| 25 |
+
"vision_ocr_invoked": "system",
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
_VERDICT_ICONS = {
|
| 29 |
+
"eligible": "✅ Eligible",
|
| 30 |
+
"not_eligible": "❌ Not Eligible",
|
| 31 |
+
"needs_review": "⚠️ Needs Review",
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def _make_summary(row: dict) -> str:
|
| 36 |
+
action = row.get("action", "")
|
| 37 |
+
bidder = row.get("bidder_id") or ""
|
| 38 |
+
crit = row.get("criterion_id") or ""
|
| 39 |
+
try:
|
| 40 |
+
p = json.loads(row.get("payload_json") or "{}")
|
| 41 |
+
except Exception:
|
| 42 |
+
p = {}
|
| 43 |
+
|
| 44 |
+
if action == "criteria_extracted":
|
| 45 |
+
return f"Extracted {p.get('count', '?')} criteria from {p.get('source', 'tender PDF')}"
|
| 46 |
+
|
| 47 |
+
if action == "bidder_processed":
|
| 48 |
+
return f"{bidder} — {p.get('doc_name', '?')} indexed ({p.get('chunk_count', '?')} chunks)"
|
| 49 |
+
|
| 50 |
+
if action == "criterion_evaluated":
|
| 51 |
+
verdict = _VERDICT_ICONS.get(p.get("verdict", ""), p.get("verdict", "?"))
|
| 52 |
+
conf = p.get("combined_confidence", "?")
|
| 53 |
+
conf_str = f"{float(conf):.0%}" if conf != "?" else "?"
|
| 54 |
+
extracted = p.get("extracted_value", "")
|
| 55 |
+
esc = p.get("escalation_reason", "")
|
| 56 |
+
base = f"{bidder} / {crit} → {verdict} (confidence: {conf_str})"
|
| 57 |
+
if extracted:
|
| 58 |
+
base += f" | Extracted: {extracted}"
|
| 59 |
+
if esc:
|
| 60 |
+
base += f" | ⚠️ {esc}"
|
| 61 |
+
return base
|
| 62 |
+
|
| 63 |
+
if action == "human_review_action":
|
| 64 |
+
taken = p.get("action_taken", "?").capitalize()
|
| 65 |
+
orig = p.get("original_extracted_value", "")
|
| 66 |
+
edited = p.get("edited_value", "")
|
| 67 |
+
base = f"Officer {taken}: {bidder} / {crit}"
|
| 68 |
+
if orig:
|
| 69 |
+
base += f" | Original value: {orig}"
|
| 70 |
+
if edited:
|
| 71 |
+
base += f" → Edited to: {edited}"
|
| 72 |
+
return base
|
| 73 |
+
|
| 74 |
+
if action == "precomputed_fallback_used":
|
| 75 |
+
return f"API unavailable — pre-computed data used | {p.get('reason', '')}"
|
| 76 |
+
|
| 77 |
+
if action == "vision_ocr_invoked":
|
| 78 |
+
tc = p.get("tesseract_conf", "?")
|
| 79 |
+
tc_str = f"{float(tc):.0%}" if tc != "?" else "?"
|
| 80 |
+
return f"{bidder} page {p.get('page', '?')} — Tesseract confidence {tc_str}, escalated to Vision LLM"
|
| 81 |
+
|
| 82 |
+
return action
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def _category_color(category: str) -> str:
|
| 86 |
+
return {"system": "🔵", "human": "🟢", "warning": "🟡"}.get(category, "⚪")
|
| 87 |
+
|
| 88 |
|
| 89 |
def render() -> None:
|
| 90 |
st.header("Audit Log")
|
| 91 |
+
st.caption(
|
| 92 |
+
"Every system action and human decision is recorded here. "
|
| 93 |
+
"This log is the compliance trail — it can be exported and submitted as part of the evaluation record."
|
| 94 |
+
)
|
| 95 |
|
| 96 |
+
# ── Filters ──────────────────────────────────────────────────────────────
|
| 97 |
col1, col2, col3 = st.columns(3)
|
| 98 |
with col1:
|
| 99 |
bidder_filter = st.selectbox(
|
|
|
|
| 103 |
with col2:
|
| 104 |
action_filter = st.selectbox(
|
| 105 |
"Filter by action",
|
| 106 |
+
options=["All"] + list(_ACTION_LABELS.keys()),
|
| 107 |
+
format_func=lambda x: "All" if x == "All" else _ACTION_LABELS.get(x, x),
|
| 108 |
)
|
| 109 |
with col3:
|
| 110 |
+
if st.button("🗑 Clear Log", type="secondary", use_container_width=True):
|
| 111 |
+
st.session_state["confirm_clear_audit"] = True
|
| 112 |
+
|
| 113 |
+
if st.session_state.get("confirm_clear_audit"):
|
| 114 |
+
st.warning("This will permanently delete all audit entries. Are you sure?")
|
| 115 |
+
c1, c2 = st.columns(2)
|
| 116 |
+
if c1.button("Yes, clear everything", type="primary", use_container_width=True):
|
| 117 |
+
audit.clear()
|
| 118 |
+
st.session_state.pop("confirm_clear_audit", None)
|
| 119 |
+
st.success("Audit log cleared.")
|
| 120 |
+
st.rerun()
|
| 121 |
+
if c2.button("Cancel", use_container_width=True):
|
| 122 |
+
st.session_state.pop("confirm_clear_audit", None)
|
| 123 |
+
st.rerun()
|
| 124 |
|
| 125 |
filters: dict = {}
|
| 126 |
if bidder_filter != "All":
|
|
|
|
| 134 |
st.info("No audit entries yet. Run an evaluation to generate entries.")
|
| 135 |
return
|
| 136 |
|
| 137 |
+
# ── Summary counts ────────────────────────────────────────────────────────
|
| 138 |
+
total = len(rows)
|
| 139 |
+
human_actions = sum(1 for r in rows if r["action"] == "human_review_action")
|
| 140 |
+
fallbacks = sum(1 for r in rows if r["action"] == "precomputed_fallback_used")
|
| 141 |
+
vision_ocr = sum(1 for r in rows if r["action"] == "vision_ocr_invoked")
|
| 142 |
+
|
| 143 |
+
m1, m2, m3, m4 = st.columns(4)
|
| 144 |
+
m1.metric("Total entries", total)
|
| 145 |
+
m2.metric("Human actions", human_actions)
|
| 146 |
+
m3.metric("Fallback events", fallbacks)
|
| 147 |
+
m4.metric("Vision OCR calls", vision_ocr)
|
| 148 |
+
|
| 149 |
+
st.divider()
|
| 150 |
+
|
| 151 |
+
# ── Human-readable table ──────────────────────────────────────────────────
|
| 152 |
df = pd.DataFrame(rows)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
+
df["Action"] = df["action"].map(lambda x: _ACTION_LABELS.get(x, x))
|
| 155 |
+
df["Category"] = df["action"].map(
|
| 156 |
+
lambda x: _category_color(_ACTION_CATEGORIES.get(x, "system"))
|
| 157 |
+
)
|
| 158 |
+
df["Summary"] = df.apply(_make_summary, axis=1)
|
| 159 |
+
df["Timestamp"] = df["ts"].str[:19].str.replace("T", " ")
|
| 160 |
+
df["Actor"] = df["actor"]
|
| 161 |
+
df["Bidder"] = df["bidder_id"].fillna("—")
|
| 162 |
+
df["Criterion"] = df["criterion_id"].fillna("—")
|
| 163 |
+
|
| 164 |
+
display = df[["Category", "Timestamp", "Action", "Bidder", "Criterion", "Summary", "Actor"]].copy()
|
| 165 |
+
|
| 166 |
+
st.dataframe(
|
| 167 |
+
display,
|
| 168 |
+
use_container_width=True,
|
| 169 |
+
hide_index=True,
|
| 170 |
+
column_config={
|
| 171 |
+
"Category": st.column_config.TextColumn("", width="small"),
|
| 172 |
+
"Timestamp": st.column_config.TextColumn("Timestamp", width="medium"),
|
| 173 |
+
"Action": st.column_config.TextColumn("Action", width="medium"),
|
| 174 |
+
"Bidder": st.column_config.TextColumn("Bidder", width="small"),
|
| 175 |
+
"Criterion": st.column_config.TextColumn("Criterion", width="small"),
|
| 176 |
+
"Summary": st.column_config.TextColumn("Summary", width="large"),
|
| 177 |
+
"Actor": st.column_config.TextColumn("Actor", width="small"),
|
| 178 |
+
},
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
# ── Raw detail expander ───────────────────────────────────────────────────
|
| 182 |
+
with st.expander("Raw payload data (for compliance / full detail)", expanded=False):
|
| 183 |
+
raw_df = df[["Timestamp", "action", "actor", "bidder_id", "criterion_id", "payload_json"]].copy()
|
| 184 |
+
raw_df.columns = ["Timestamp", "action", "actor", "bidder_id", "criterion_id", "payload_json"]
|
| 185 |
+
st.dataframe(raw_df, use_container_width=True, hide_index=True)
|
| 186 |
|
| 187 |
+
# ── Export ────────────────────────────────────────────────────────────────
|
| 188 |
+
export_df = df[["Timestamp", "Action", "Actor", "Bidder", "Criterion", "Summary"]].copy()
|
| 189 |
+
export_df["raw_payload"] = df["payload_json"]
|
| 190 |
csv_buf = io.StringIO()
|
| 191 |
+
export_df.to_csv(csv_buf, index=False)
|
| 192 |
st.download_button(
|
| 193 |
label="Export CSV",
|
| 194 |
data=csv_buf.getvalue().encode("utf-8"),
|
|
@@ -1,17 +1,15 @@
|
|
| 1 |
-
from pathlib import Path
|
| 2 |
-
|
| 3 |
import streamlit as st
|
| 4 |
|
| 5 |
from core import bidder_processor, evaluator
|
| 6 |
-
from core.config import DATA_DIR
|
| 7 |
from core.fallback import load_criteria
|
| 8 |
from core.schemas import Criterion
|
| 9 |
from ui.components import category_badge, confidence_bar, ocr_tier_badge, verdict_pill
|
| 10 |
|
| 11 |
_BIDDER_LABELS = {
|
| 12 |
-
"bidder_a": "Bidder A — Apex Constructions (Clearly Eligible)",
|
| 13 |
"bidder_b": "Bidder B — BuildRight Enterprises (Ineligible: Low Turnover)",
|
| 14 |
-
"bidder_c": "Bidder C — Shree Constructions (Scanned Cert: Needs Review)",
|
| 15 |
}
|
| 16 |
|
| 17 |
|
|
@@ -22,8 +20,12 @@ def _get_criteria() -> list[Criterion]:
|
|
| 22 |
return load_criteria()
|
| 23 |
|
| 24 |
|
| 25 |
-
def _overall_verdict(verdicts: list[dict]) -> str:
|
| 26 |
-
mandatory
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
if any(v["verdict"] == "not_eligible" for v in mandatory):
|
| 28 |
return "not_eligible"
|
| 29 |
if any(v["verdict"] == "needs_review" for v in mandatory):
|
|
@@ -52,14 +54,15 @@ def render() -> None:
|
|
| 52 |
f for f in (DATA_DIR / "bidders" / bidder_id).iterdir()
|
| 53 |
if f.suffix.lower() in {".pdf", ".png", ".jpg"}
|
| 54 |
)
|
| 55 |
-
with st.spinner(f"Processing {bidder_id} documents…"):
|
| 56 |
bidder_processor.process_bidder(bidder_id, files)
|
| 57 |
verdicts_list = []
|
| 58 |
for c in criteria:
|
| 59 |
v = evaluator.evaluate(bidder_id, c)
|
| 60 |
verdicts_list.append(v.model_dump())
|
| 61 |
done += 1
|
| 62 |
-
progress.progress(done / total,
|
|
|
|
| 63 |
verdicts_dict[bidder_id] = verdicts_list
|
| 64 |
st.session_state["verdicts"] = verdicts_dict
|
| 65 |
progress.empty()
|
|
@@ -77,13 +80,34 @@ def render() -> None:
|
|
| 77 |
if bidder_id not in verdicts_data:
|
| 78 |
continue
|
| 79 |
verdicts = verdicts_data[bidder_id]
|
| 80 |
-
overall = _overall_verdict(verdicts)
|
| 81 |
overall_pill = verdict_pill(overall)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
-
with st.expander(
|
| 84 |
-
f"**{_BIDDER_LABELS.get(bidder_id, bidder_id)}** — Overall: {overall_pill}",
|
| 85 |
-
expanded=True,
|
| 86 |
-
):
|
| 87 |
for v in verdicts:
|
| 88 |
crit = crit_map.get(v["criterion_id"])
|
| 89 |
crit_title = crit.title if crit else v["criterion_id"]
|
|
@@ -105,7 +129,7 @@ def render() -> None:
|
|
| 105 |
conf = v.get("combined_confidence", 0.0)
|
| 106 |
confidence_bar(conf)
|
| 107 |
|
| 108 |
-
if v.get("reason") or v.get("source"):
|
| 109 |
with st.expander("Details", expanded=False):
|
| 110 |
if v.get("reason"):
|
| 111 |
st.markdown(f"**Reason:** {v['reason']}")
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
|
| 3 |
from core import bidder_processor, evaluator
|
| 4 |
+
from core.config import BIDDER_NAMES, DATA_DIR
|
| 5 |
from core.fallback import load_criteria
|
| 6 |
from core.schemas import Criterion
|
| 7 |
from ui.components import category_badge, confidence_bar, ocr_tier_badge, verdict_pill
|
| 8 |
|
| 9 |
_BIDDER_LABELS = {
|
| 10 |
+
"bidder_a": "Bidder A — Apex Constructions Pvt. Ltd. (Clearly Eligible)",
|
| 11 |
"bidder_b": "Bidder B — BuildRight Enterprises (Ineligible: Low Turnover)",
|
| 12 |
+
"bidder_c": "Bidder C — Shree Constructions & Services (Scanned Cert: Needs Review)",
|
| 13 |
}
|
| 14 |
|
| 15 |
|
|
|
|
| 20 |
return load_criteria()
|
| 21 |
|
| 22 |
|
| 23 |
+
def _overall_verdict(verdicts: list[dict], crit_map: dict) -> str:
|
| 24 |
+
"""Only mandatory criteria determine overall eligibility."""
|
| 25 |
+
mandatory = [v for v in verdicts if crit_map.get(v["criterion_id"], None) and
|
| 26 |
+
crit_map[v["criterion_id"]].mandatory]
|
| 27 |
+
if not mandatory:
|
| 28 |
+
mandatory = verdicts # fallback if crit_map is missing
|
| 29 |
if any(v["verdict"] == "not_eligible" for v in mandatory):
|
| 30 |
return "not_eligible"
|
| 31 |
if any(v["verdict"] == "needs_review" for v in mandatory):
|
|
|
|
| 54 |
f for f in (DATA_DIR / "bidders" / bidder_id).iterdir()
|
| 55 |
if f.suffix.lower() in {".pdf", ".png", ".jpg"}
|
| 56 |
)
|
| 57 |
+
with st.spinner(f"Processing {BIDDER_NAMES.get(bidder_id, bidder_id)} documents…"):
|
| 58 |
bidder_processor.process_bidder(bidder_id, files)
|
| 59 |
verdicts_list = []
|
| 60 |
for c in criteria:
|
| 61 |
v = evaluator.evaluate(bidder_id, c)
|
| 62 |
verdicts_list.append(v.model_dump())
|
| 63 |
done += 1
|
| 64 |
+
progress.progress(done / total,
|
| 65 |
+
text=f"Evaluated {c.id} for {BIDDER_NAMES.get(bidder_id, bidder_id)}")
|
| 66 |
verdicts_dict[bidder_id] = verdicts_list
|
| 67 |
st.session_state["verdicts"] = verdicts_dict
|
| 68 |
progress.empty()
|
|
|
|
| 80 |
if bidder_id not in verdicts_data:
|
| 81 |
continue
|
| 82 |
verdicts = verdicts_data[bidder_id]
|
| 83 |
+
overall = _overall_verdict(verdicts, crit_map)
|
| 84 |
overall_pill = verdict_pill(overall)
|
| 85 |
+
friendly = BIDDER_NAMES.get(bidder_id, bidder_id)
|
| 86 |
+
mandatory_count = sum(1 for v in verdicts
|
| 87 |
+
if crit_map.get(v["criterion_id"]) and
|
| 88 |
+
crit_map[v["criterion_id"]].mandatory)
|
| 89 |
+
passed = sum(1 for v in verdicts
|
| 90 |
+
if v["verdict"] == "eligible" and
|
| 91 |
+
crit_map.get(v["criterion_id"]) and
|
| 92 |
+
crit_map[v["criterion_id"]].mandatory)
|
| 93 |
+
|
| 94 |
+
with st.container(border=True):
|
| 95 |
+
st.markdown(
|
| 96 |
+
f"#### {friendly} — Overall: {overall_pill}"
|
| 97 |
+
f" <span style='font-size:0.85em; color:grey;'>"
|
| 98 |
+
f"({passed}/{mandatory_count} mandatory criteria met)</span>",
|
| 99 |
+
unsafe_allow_html=True,
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
# Column headers
|
| 103 |
+
hcols = st.columns([3, 2, 2, 2, 1])
|
| 104 |
+
hcols[0].caption("Criterion")
|
| 105 |
+
hcols[1].caption("Verdict")
|
| 106 |
+
hcols[2].caption("Extracted Value")
|
| 107 |
+
hcols[3].caption("Source / OCR Tier")
|
| 108 |
+
hcols[4].caption("Category")
|
| 109 |
+
st.divider()
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
for v in verdicts:
|
| 112 |
crit = crit_map.get(v["criterion_id"])
|
| 113 |
crit_title = crit.title if crit else v["criterion_id"]
|
|
|
|
| 129 |
conf = v.get("combined_confidence", 0.0)
|
| 130 |
confidence_bar(conf)
|
| 131 |
|
| 132 |
+
if v.get("reason") or (v.get("source") and v["source"].get("snippet")):
|
| 133 |
with st.expander("Details", expanded=False):
|
| 134 |
if v.get("reason"):
|
| 135 |
st.markdown(f"**Reason:** {v['reason']}")
|
|
@@ -0,0 +1,311 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
from core.config import BIDDER_NAMES, DATA_DIR, MODEL_VERSION
|
| 6 |
+
from core.fallback import load_criteria
|
| 7 |
+
from core.llm_client import LLM, LLMUnavailable
|
| 8 |
+
from core.pdf_utils import render_page_to_image
|
| 9 |
+
from core.schemas import Criterion
|
| 10 |
+
|
| 11 |
+
_VERDICT_PLAIN = {
|
| 12 |
+
"eligible": ("✅", "PASSED", "green"),
|
| 13 |
+
"not_eligible": ("❌", "FAILED", "red"),
|
| 14 |
+
"needs_review": ("⚠️", "NEEDS REVIEW", "orange"),
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
_CRITERION_RULE_PLAIN = {
|
| 18 |
+
"numeric_threshold": lambda r: (
|
| 19 |
+
f"must be ≥ {r['value']:,} {r.get('unit') or ''}" if r["operator"] == ">="
|
| 20 |
+
else f"must be ≤ {r['value']:,} {r.get('unit') or ''}"
|
| 21 |
+
),
|
| 22 |
+
"count_threshold": lambda r: f"must have completed at least {int(r['value'])}",
|
| 23 |
+
"certification_present": lambda _: "valid certificate must be present",
|
| 24 |
+
"document_present": lambda _: "supporting document must be present",
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def _get_criteria() -> list[Criterion]:
|
| 29 |
+
data = st.session_state.get("criteria")
|
| 30 |
+
if data:
|
| 31 |
+
return [Criterion(**c) for c in data]
|
| 32 |
+
return load_criteria()
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def _plain_explanation(v: dict, crit: Criterion | None) -> str:
|
| 36 |
+
verdict = v.get("verdict", "")
|
| 37 |
+
extracted = v.get("extracted_value") or ""
|
| 38 |
+
reason = v.get("reason") or ""
|
| 39 |
+
src = v.get("source") or {}
|
| 40 |
+
|
| 41 |
+
if not crit:
|
| 42 |
+
return reason
|
| 43 |
+
|
| 44 |
+
icon, label, _ = _VERDICT_PLAIN.get(verdict, ("❓", verdict, "grey"))
|
| 45 |
+
rule = crit.rule
|
| 46 |
+
|
| 47 |
+
if verdict == "eligible":
|
| 48 |
+
rule_desc = _CRITERION_RULE_PLAIN.get(rule.type, lambda _: "")(rule.model_dump())
|
| 49 |
+
val_part = f" Found: **{extracted}**." if extracted else ""
|
| 50 |
+
return f"{icon} **{crit.title}** — {label}.{val_part} {reason}"
|
| 51 |
+
|
| 52 |
+
elif verdict == "not_eligible":
|
| 53 |
+
rule_desc = _CRITERION_RULE_PLAIN.get(rule.type, lambda _: "")(rule.model_dump())
|
| 54 |
+
val_part = f" Found: **{extracted}** — this does not meet the requirement ({rule_desc})." if extracted else f" Required: {rule_desc}."
|
| 55 |
+
return f"{icon} **{crit.title}** — {label}.{val_part} {reason}"
|
| 56 |
+
|
| 57 |
+
else: # needs_review
|
| 58 |
+
val_part = f" Extracted value: **{extracted}**." if extracted else ""
|
| 59 |
+
return f"{icon} **{crit.title}** — {label}.{val_part} {reason}"
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def _source_citation(v: dict) -> str | None:
|
| 63 |
+
src = v.get("source")
|
| 64 |
+
if not src:
|
| 65 |
+
return None
|
| 66 |
+
doc = src.get("doc_name", "")
|
| 67 |
+
page = src.get("page", "")
|
| 68 |
+
tier = src.get("source_type", "")
|
| 69 |
+
tier_labels = {"text_pdf": "typed PDF", "tesseract": "Tesseract OCR",
|
| 70 |
+
"vision_llm": "Vision LLM OCR"}
|
| 71 |
+
return f"📄 **{doc}**, page {page} · read by _{tier_labels.get(tier, tier)}_"
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def _build_qa_context(bidder_id: str, verdicts: list[dict],
|
| 75 |
+
criteria: list[Criterion]) -> str:
|
| 76 |
+
crit_map = {c.id: c for c in criteria}
|
| 77 |
+
lines = [
|
| 78 |
+
f"BIDDER: {BIDDER_NAMES.get(bidder_id, bidder_id)} ({bidder_id})",
|
| 79 |
+
"",
|
| 80 |
+
"EVALUATION RESULTS:",
|
| 81 |
+
]
|
| 82 |
+
for v in verdicts:
|
| 83 |
+
crit = crit_map.get(v["criterion_id"])
|
| 84 |
+
crit_title = crit.title if crit else v["criterion_id"]
|
| 85 |
+
mandatory = ("Mandatory" if crit and crit.mandatory else "Optional") if crit else "Unknown"
|
| 86 |
+
lines.append(
|
| 87 |
+
f" {v['criterion_id']} — {crit_title} [{mandatory}]: "
|
| 88 |
+
f"{v['verdict'].upper()}"
|
| 89 |
+
)
|
| 90 |
+
if v.get("extracted_value"):
|
| 91 |
+
lines.append(f" Extracted value: {v['extracted_value']}")
|
| 92 |
+
if v.get("source"):
|
| 93 |
+
src = v["source"]
|
| 94 |
+
lines.append(
|
| 95 |
+
f" Evidence source: {src.get('doc_name')} page {src.get('page')} "
|
| 96 |
+
f"(read by {src.get('source_type')})"
|
| 97 |
+
)
|
| 98 |
+
if v.get("source") and v["source"].get("snippet"):
|
| 99 |
+
lines.append(f" Evidence snippet: \"{v['source']['snippet'][:200]}\"")
|
| 100 |
+
lines.append(
|
| 101 |
+
f" Confidence: {v.get('combined_confidence', 0):.0%} | "
|
| 102 |
+
f"Reason: {v.get('reason', '')}"
|
| 103 |
+
)
|
| 104 |
+
if crit:
|
| 105 |
+
rule = crit.rule
|
| 106 |
+
rule_desc = _CRITERION_RULE_PLAIN.get(rule.type, lambda _: "")(rule.model_dump())
|
| 107 |
+
lines.append(f" Requirement: {rule_desc}")
|
| 108 |
+
lines.append("")
|
| 109 |
+
return "\n".join(lines)
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def _answer_question(question: str, context: str) -> str:
|
| 113 |
+
system = """You are a procurement compliance assistant helping an evaluation officer
|
| 114 |
+
understand AI-generated eligibility verdicts. Answer questions about the bidder's evaluation
|
| 115 |
+
in plain, professional English. Always cite specific document names and page numbers from the
|
| 116 |
+
evidence. Be concise (2-4 sentences). Do not invent information not present in the context."""
|
| 117 |
+
|
| 118 |
+
user = f"""{context}
|
| 119 |
+
|
| 120 |
+
OFFICER'S QUESTION: {question}
|
| 121 |
+
|
| 122 |
+
Answer the question based only on the evaluation results above.
|
| 123 |
+
Cite the specific document and page number when referring to evidence."""
|
| 124 |
+
|
| 125 |
+
try:
|
| 126 |
+
llm = LLM()
|
| 127 |
+
result = llm.chat_json(
|
| 128 |
+
system + " Return JSON: {\"answer\": \"<your answer>\"}",
|
| 129 |
+
user,
|
| 130 |
+
)
|
| 131 |
+
return result.get("answer", "")
|
| 132 |
+
except LLMUnavailable:
|
| 133 |
+
return _rule_based_answer(question, context)
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def _rule_based_answer(question: str, context: str) -> str:
|
| 137 |
+
q = question.lower()
|
| 138 |
+
lines = context.splitlines()
|
| 139 |
+
|
| 140 |
+
if any(w in q for w in ["reject", "fail", "not eligible", "disqualif"]):
|
| 141 |
+
fails = [l for l in lines if "NOT_ELIGIBLE" in l or "NEEDS_REVIEW" in l]
|
| 142 |
+
if fails:
|
| 143 |
+
return ("Based on the evaluation: " +
|
| 144 |
+
"; ".join(f.strip() for f in fails[:3]) +
|
| 145 |
+
". See the Bidder Evaluation tab for full details.")
|
| 146 |
+
return "No failing criteria were found in the evaluation."
|
| 147 |
+
|
| 148 |
+
if any(w in q for w in ["pass", "eligible", "meet", "satisfy"]):
|
| 149 |
+
passes = [l for l in lines if "ELIGIBLE" in l and "NOT_ELIGIBLE" not in l]
|
| 150 |
+
if passes:
|
| 151 |
+
return ("Criteria passed: " +
|
| 152 |
+
"; ".join(f.strip() for f in passes[:3]) + ".")
|
| 153 |
+
return "No passing criteria were found."
|
| 154 |
+
|
| 155 |
+
if any(w in q for w in ["turnover", "financial", "revenue", "c1"]):
|
| 156 |
+
relevant = [l for l in lines if "C1" in l or "turnover" in l.lower() or
|
| 157 |
+
"Extracted value" in l]
|
| 158 |
+
if relevant:
|
| 159 |
+
return " ".join(l.strip() for l in relevant[:4])
|
| 160 |
+
|
| 161 |
+
return ("I cannot answer that specific question without the live LLM. "
|
| 162 |
+
"The evaluation summary above contains the full details.")
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def render() -> None:
|
| 166 |
+
st.header("Interpretability")
|
| 167 |
+
st.caption(
|
| 168 |
+
"Plain-English explanations of why each bidder was evaluated the way it was, "
|
| 169 |
+
"with full source citations. Ask any question about the evaluation."
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
verdicts_data = st.session_state.get("verdicts", {})
|
| 173 |
+
if not verdicts_data:
|
| 174 |
+
st.info("No evaluation results yet. Run the evaluation in Bidder Evaluation tab or "
|
| 175 |
+
"click **Load Pre-computed Demo** in the Overview tab.")
|
| 176 |
+
return
|
| 177 |
+
|
| 178 |
+
criteria = _get_criteria()
|
| 179 |
+
crit_map = {c.id: c for c in criteria}
|
| 180 |
+
|
| 181 |
+
bidder_id = st.selectbox(
|
| 182 |
+
"Select bidder",
|
| 183 |
+
options=list(verdicts_data.keys()),
|
| 184 |
+
format_func=lambda x: BIDDER_NAMES.get(x, x),
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
verdicts = verdicts_data.get(bidder_id, [])
|
| 188 |
+
if not verdicts:
|
| 189 |
+
st.warning("No verdicts available for this bidder.")
|
| 190 |
+
return
|
| 191 |
+
|
| 192 |
+
# ── Overall summary ───────────────────────────────────────────────────────
|
| 193 |
+
mandatory_verdicts = [v for v in verdicts
|
| 194 |
+
if crit_map.get(v["criterion_id"]) and
|
| 195 |
+
crit_map[v["criterion_id"]].mandatory]
|
| 196 |
+
failed = [v for v in mandatory_verdicts if v["verdict"] == "not_eligible"]
|
| 197 |
+
review = [v for v in mandatory_verdicts if v["verdict"] == "needs_review"]
|
| 198 |
+
passed = [v for v in mandatory_verdicts if v["verdict"] == "eligible"]
|
| 199 |
+
|
| 200 |
+
friendly = BIDDER_NAMES.get(bidder_id, bidder_id)
|
| 201 |
+
|
| 202 |
+
if failed:
|
| 203 |
+
st.error(
|
| 204 |
+
f"**{friendly} — NOT ELIGIBLE**\n\n"
|
| 205 |
+
f"Failed {len(failed)} mandatory criterion/criteria. "
|
| 206 |
+
f"A bidder must meet all mandatory criteria to qualify."
|
| 207 |
+
)
|
| 208 |
+
elif review:
|
| 209 |
+
st.warning(
|
| 210 |
+
f"**{friendly} — NEEDS REVIEW**\n\n"
|
| 211 |
+
f"Passed {len(passed)} mandatory criteria, but {len(review)} could not be "
|
| 212 |
+
f"automatically confirmed and require officer verification."
|
| 213 |
+
)
|
| 214 |
+
else:
|
| 215 |
+
st.success(
|
| 216 |
+
f"**{friendly} — ELIGIBLE**\n\n"
|
| 217 |
+
f"All {len(passed)} mandatory criteria satisfied."
|
| 218 |
+
)
|
| 219 |
+
|
| 220 |
+
st.divider()
|
| 221 |
+
|
| 222 |
+
# ── Per-criterion plain-English cards ─────────────────────────────────────
|
| 223 |
+
st.subheader("Criterion-by-Criterion Breakdown")
|
| 224 |
+
|
| 225 |
+
for v in verdicts:
|
| 226 |
+
crit = crit_map.get(v["criterion_id"])
|
| 227 |
+
_, label, color = _VERDICT_PLAIN.get(v["verdict"], ("❓", v["verdict"], "grey"))
|
| 228 |
+
mandatory_tag = "🔴 Mandatory" if (crit and crit.mandatory) else "🟡 Optional"
|
| 229 |
+
|
| 230 |
+
with st.container(border=True):
|
| 231 |
+
col_status, col_detail = st.columns([1, 4])
|
| 232 |
+
|
| 233 |
+
with col_status:
|
| 234 |
+
if color == "green":
|
| 235 |
+
st.success(label)
|
| 236 |
+
elif color == "red":
|
| 237 |
+
st.error(label)
|
| 238 |
+
else:
|
| 239 |
+
st.warning(label)
|
| 240 |
+
st.caption(mandatory_tag)
|
| 241 |
+
conf = v.get("combined_confidence", 0.0)
|
| 242 |
+
st.caption(f"Confidence: {conf:.0%}")
|
| 243 |
+
|
| 244 |
+
with col_detail:
|
| 245 |
+
explanation = _plain_explanation(v, crit)
|
| 246 |
+
st.markdown(explanation)
|
| 247 |
+
|
| 248 |
+
citation = _source_citation(v)
|
| 249 |
+
if citation:
|
| 250 |
+
st.markdown(citation)
|
| 251 |
+
|
| 252 |
+
# Page preview
|
| 253 |
+
src = v.get("source", {})
|
| 254 |
+
doc_name = src.get("doc_name", "")
|
| 255 |
+
page_no = src.get("page", 1)
|
| 256 |
+
bidder_dir = DATA_DIR / "bidders" / bidder_id
|
| 257 |
+
doc_path = bidder_dir / doc_name
|
| 258 |
+
|
| 259 |
+
if doc_path.exists() and doc_path.suffix.lower() == ".pdf":
|
| 260 |
+
with st.expander(f"View source page ({doc_name}, p{page_no})",
|
| 261 |
+
expanded=False):
|
| 262 |
+
try:
|
| 263 |
+
img = render_page_to_image(doc_path, page_no)
|
| 264 |
+
st.image(img, caption=f"{doc_name} — Page {page_no}",
|
| 265 |
+
use_container_width=True)
|
| 266 |
+
except Exception:
|
| 267 |
+
st.caption("Page preview unavailable.")
|
| 268 |
+
elif doc_path.exists() and doc_path.suffix.lower() in {".png", ".jpg"}:
|
| 269 |
+
with st.expander(f"View source image ({doc_name})", expanded=False):
|
| 270 |
+
st.image(str(doc_path), caption=doc_name,
|
| 271 |
+
use_container_width=True)
|
| 272 |
+
|
| 273 |
+
st.divider()
|
| 274 |
+
|
| 275 |
+
# ── Q&A section ───────────────────────────────────────────────────────────
|
| 276 |
+
st.subheader("Ask About This Evaluation")
|
| 277 |
+
st.caption(
|
| 278 |
+
"Ask any question about why this bidder was evaluated the way it was. "
|
| 279 |
+
"Answers cite specific documents and pages."
|
| 280 |
+
)
|
| 281 |
+
|
| 282 |
+
suggestions = [
|
| 283 |
+
"Why was this bidder rejected?",
|
| 284 |
+
"Which criteria did this bidder fail?",
|
| 285 |
+
"What turnover figure was found and which document was it from?",
|
| 286 |
+
"Is this bidder ISO certified?",
|
| 287 |
+
"Why is the turnover verdict in review?",
|
| 288 |
+
]
|
| 289 |
+
with st.expander("Example questions", expanded=False):
|
| 290 |
+
for s in suggestions:
|
| 291 |
+
st.markdown(f"- _{s}_")
|
| 292 |
+
|
| 293 |
+
question = st.text_input(
|
| 294 |
+
"Your question",
|
| 295 |
+
placeholder="e.g. Why was this bidder's turnover flagged for review?",
|
| 296 |
+
key=f"qa_input_{bidder_id}",
|
| 297 |
+
)
|
| 298 |
+
|
| 299 |
+
if st.button("Get Answer", type="primary", key=f"qa_btn_{bidder_id}"):
|
| 300 |
+
if not question.strip():
|
| 301 |
+
st.warning("Please enter a question.")
|
| 302 |
+
else:
|
| 303 |
+
context = _build_qa_context(bidder_id, verdicts, criteria)
|
| 304 |
+
with st.spinner("Looking up the answer…"):
|
| 305 |
+
answer = _answer_question(question, context)
|
| 306 |
+
|
| 307 |
+
st.markdown("**Answer:**")
|
| 308 |
+
st.info(answer)
|
| 309 |
+
|
| 310 |
+
with st.expander("Full evaluation context used to answer", expanded=False):
|
| 311 |
+
st.code(context, language="text")
|
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
|
| 3 |
from core import audit
|
|
|
|
| 4 |
from core.fallback import load_criteria
|
| 5 |
|
| 6 |
|
|
@@ -30,23 +31,94 @@ def render() -> None:
|
|
| 30 |
|
| 31 |
st.divider()
|
| 32 |
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
col_a, col_b = st.columns(2)
|
| 35 |
with col_a:
|
| 36 |
st.markdown("""
|
| 37 |
-
**
|
| 38 |
-
DeepSeek
|
|
|
|
|
|
|
| 39 |
|
| 40 |
-
**
|
| 41 |
-
Three-tier
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
""")
|
| 43 |
with col_b:
|
| 44 |
st.markdown("""
|
| 45 |
-
**
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
-
**
|
| 49 |
-
|
|
|
|
|
|
|
| 50 |
""")
|
| 51 |
|
| 52 |
st.divider()
|
|
@@ -59,13 +131,13 @@ Low-confidence verdicts are routed to the review queue. Every action is logged w
|
|
| 59 |
criteria = lc()
|
| 60 |
st.session_state["criteria"] = [c.model_dump() for c in criteria]
|
| 61 |
verdicts_dict: dict = {}
|
| 62 |
-
for bidder_id in
|
| 63 |
verdicts_dict[bidder_id] = [
|
| 64 |
load_evaluation(bidder_id, c.id).model_dump()
|
| 65 |
for c in criteria
|
| 66 |
]
|
| 67 |
st.session_state["verdicts"] = verdicts_dict
|
| 68 |
-
st.success("Pre-computed demo
|
| 69 |
st.rerun()
|
| 70 |
with col2:
|
| 71 |
-
st.info("Or go to **Tender Analysis**
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
|
| 3 |
from core import audit
|
| 4 |
+
from core.config import BIDDER_NAMES
|
| 5 |
from core.fallback import load_criteria
|
| 6 |
|
| 7 |
|
|
|
|
| 31 |
|
| 32 |
st.divider()
|
| 33 |
|
| 34 |
+
# Architecture diagram
|
| 35 |
+
st.subheader("System Architecture")
|
| 36 |
+
st.markdown("""
|
| 37 |
+
```
|
| 38 |
+
┌─────────────────────────────────────────────────────────────────────┐
|
| 39 |
+
│ TenderIQ Pipeline │
|
| 40 |
+
└─────────────────────────────────────────────────────────────────────┘
|
| 41 |
+
|
| 42 |
+
📄 Tender PDF 📁 Bidder Documents
|
| 43 |
+
│ (PDFs, scans, photos)
|
| 44 |
+
│ │
|
| 45 |
+
▼ ▼
|
| 46 |
+
┌───────────┐ ┌────────────────────────┐
|
| 47 |
+
│ DeepSeek │ │ 3-Tier OCR Pipeline │
|
| 48 |
+
│ LLM │ │ ① PyMuPDF (typed) │
|
| 49 |
+
│ (Stage 1) │ │ ② Tesseract (scans) │
|
| 50 |
+
└───────────┘ │ ③ Vision LLM (poor) │
|
| 51 |
+
│ └────────────────────────┘
|
| 52 |
+
│ │
|
| 53 |
+
▼ ▼
|
| 54 |
+
┌───────────┐ ┌────────────────────────┐
|
| 55 |
+
│ Criteria │ │ ChromaDB Vector │
|
| 56 |
+
│ C1 – C5 │ │ Index (per bidder) │
|
| 57 |
+
│ (JSON) │ └────────────────────────┘
|
| 58 |
+
└───────────┘ │
|
| 59 |
+
│ │ semantic search
|
| 60 |
+
└──────────────────┬───────────────────┘
|
| 61 |
+
│
|
| 62 |
+
▼
|
| 63 |
+
┌─────────────────────┐
|
| 64 |
+
│ DeepSeek LLM │
|
| 65 |
+
│ (Stage 3 eval) │
|
| 66 |
+
│ │
|
| 67 |
+
│ evidence → verdict │
|
| 68 |
+
│ + confidence score │
|
| 69 |
+
└─────────────────────┘
|
| 70 |
+
│
|
| 71 |
+
┌─────────────┴──────────────┐
|
| 72 |
+
│ │
|
| 73 |
+
▼ ▼
|
| 74 |
+
confidence ≥ 0.80 confidence < 0.80
|
| 75 |
+
verdict kept downgraded to
|
| 76 |
+
needs_review
|
| 77 |
+
│
|
| 78 |
+
▼
|
| 79 |
+
┌─────────────────┐
|
| 80 |
+
│ Human Review │
|
| 81 |
+
│ Queue (Tab 4) │
|
| 82 |
+
└─────────────────┘
|
| 83 |
+
│
|
| 84 |
+
▼
|
| 85 |
+
┌─────────────────┐
|
| 86 |
+
│ Audit Log │
|
| 87 |
+
│ (every action) │
|
| 88 |
+
└─────────────────┘
|
| 89 |
+
```
|
| 90 |
+
""")
|
| 91 |
+
|
| 92 |
+
st.divider()
|
| 93 |
+
|
| 94 |
+
st.subheader("Pipeline Stages")
|
| 95 |
col_a, col_b = st.columns(2)
|
| 96 |
with col_a:
|
| 97 |
st.markdown("""
|
| 98 |
+
**① Extract Criteria**
|
| 99 |
+
DeepSeek reads the full tender PDF and extracts each eligibility criterion as structured JSON —
|
| 100 |
+
category, mandatory flag, rule (threshold / certification / count), source clause, and query hints
|
| 101 |
+
for downstream retrieval.
|
| 102 |
|
| 103 |
+
**② OCR & Index Bidder Documents**
|
| 104 |
+
Three-tier pipeline handles any document format:
|
| 105 |
+
PyMuPDF for typed PDFs (instant, lossless) →
|
| 106 |
+
Tesseract for scans (free, fast) →
|
| 107 |
+
DeepSeek Vision LLM when Tesseract confidence < 65%.
|
| 108 |
+
All text is chunked and indexed into ChromaDB with full provenance metadata.
|
| 109 |
""")
|
| 110 |
with col_b:
|
| 111 |
st.markdown("""
|
| 112 |
+
**③ Evaluate per Criterion**
|
| 113 |
+
For each (bidder × criterion) pair: semantic search retrieves the most relevant evidence chunks,
|
| 114 |
+
DeepSeek decides eligible / not_eligible / needs_review with a combined confidence score
|
| 115 |
+
that weights LLM certainty against OCR quality.
|
| 116 |
+
The safety rule: never silently disqualify — borderline cases always go to human review.
|
| 117 |
|
| 118 |
+
**④ Human Review & Audit**
|
| 119 |
+
Flagged verdicts surface in the Review Queue with full evidence and source citations.
|
| 120 |
+
Every action — extraction, indexing, evaluation, review — is logged to SQLite with
|
| 121 |
+
timestamp, model version, actor, and payload.
|
| 122 |
""")
|
| 123 |
|
| 124 |
st.divider()
|
|
|
|
| 131 |
criteria = lc()
|
| 132 |
st.session_state["criteria"] = [c.model_dump() for c in criteria]
|
| 133 |
verdicts_dict: dict = {}
|
| 134 |
+
for bidder_id in BIDDER_NAMES:
|
| 135 |
verdicts_dict[bidder_id] = [
|
| 136 |
load_evaluation(bidder_id, c.id).model_dump()
|
| 137 |
for c in criteria
|
| 138 |
]
|
| 139 |
st.session_state["verdicts"] = verdicts_dict
|
| 140 |
+
st.success("Pre-computed demo loaded. Navigate to the other tabs.")
|
| 141 |
st.rerun()
|
| 142 |
with col2:
|
| 143 |
+
st.info("Or go to **Tender Analysis** to run the live LLM pipeline.")
|
|
@@ -33,6 +33,11 @@ def render() -> None:
|
|
| 33 |
return
|
| 34 |
|
| 35 |
st.markdown(f"**{len(pending_items)} item(s) pending review**")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
st.divider()
|
| 37 |
|
| 38 |
for bidder_id, idx, v in pending_items:
|
|
@@ -52,7 +57,7 @@ def render() -> None:
|
|
| 52 |
st.markdown(f"Source snippet: _{v['source']['snippet']}_")
|
| 53 |
with col2:
|
| 54 |
conf = v.get("combined_confidence", 0.0)
|
| 55 |
-
confidence_bar(conf, "
|
| 56 |
|
| 57 |
btn_col1, btn_col2, btn_col3 = st.columns(3)
|
| 58 |
key_prefix = f"review_{bidder_id}_{v['criterion_id']}"
|
|
@@ -66,6 +71,9 @@ def render() -> None:
|
|
| 66 |
bidder_id=bidder_id,
|
| 67 |
criterion_id=v["criterion_id"],
|
| 68 |
action_taken="approved",
|
|
|
|
|
|
|
|
|
|
| 69 |
)
|
| 70 |
st.rerun()
|
| 71 |
|
|
@@ -82,7 +90,10 @@ def render() -> None:
|
|
| 82 |
bidder_id=bidder_id,
|
| 83 |
criterion_id=v["criterion_id"],
|
| 84 |
action_taken="edited",
|
|
|
|
|
|
|
| 85 |
edited_value=edit_val,
|
|
|
|
| 86 |
)
|
| 87 |
st.rerun()
|
| 88 |
|
|
@@ -95,5 +106,8 @@ def render() -> None:
|
|
| 95 |
bidder_id=bidder_id,
|
| 96 |
criterion_id=v["criterion_id"],
|
| 97 |
action_taken="rejected",
|
|
|
|
|
|
|
|
|
|
| 98 |
)
|
| 99 |
st.rerun()
|
|
|
|
| 33 |
return
|
| 34 |
|
| 35 |
st.markdown(f"**{len(pending_items)} item(s) pending review**")
|
| 36 |
+
st.caption(
|
| 37 |
+
"These verdicts require human confirmation before being finalised. "
|
| 38 |
+
"The certainty bar shows how confident the model is in its decision to flag the item — "
|
| 39 |
+
"not how likely the bidder meets the criterion."
|
| 40 |
+
)
|
| 41 |
st.divider()
|
| 42 |
|
| 43 |
for bidder_id, idx, v in pending_items:
|
|
|
|
| 57 |
st.markdown(f"Source snippet: _{v['source']['snippet']}_")
|
| 58 |
with col2:
|
| 59 |
conf = v.get("combined_confidence", 0.0)
|
| 60 |
+
confidence_bar(conf, "Certainty in assessment")
|
| 61 |
|
| 62 |
btn_col1, btn_col2, btn_col3 = st.columns(3)
|
| 63 |
key_prefix = f"review_{bidder_id}_{v['criterion_id']}"
|
|
|
|
| 71 |
bidder_id=bidder_id,
|
| 72 |
criterion_id=v["criterion_id"],
|
| 73 |
action_taken="approved",
|
| 74 |
+
original_verdict=v["verdict"],
|
| 75 |
+
original_extracted_value=v.get("extracted_value", ""),
|
| 76 |
+
combined_confidence=v.get("combined_confidence", 0.0),
|
| 77 |
)
|
| 78 |
st.rerun()
|
| 79 |
|
|
|
|
| 90 |
bidder_id=bidder_id,
|
| 91 |
criterion_id=v["criterion_id"],
|
| 92 |
action_taken="edited",
|
| 93 |
+
original_verdict=v["verdict"],
|
| 94 |
+
original_extracted_value=v.get("extracted_value", ""),
|
| 95 |
edited_value=edit_val,
|
| 96 |
+
combined_confidence=v.get("combined_confidence", 0.0),
|
| 97 |
)
|
| 98 |
st.rerun()
|
| 99 |
|
|
|
|
| 106 |
bidder_id=bidder_id,
|
| 107 |
criterion_id=v["criterion_id"],
|
| 108 |
action_taken="rejected",
|
| 109 |
+
original_verdict=v["verdict"],
|
| 110 |
+
original_extracted_value=v.get("extracted_value", ""),
|
| 111 |
+
combined_confidence=v.get("combined_confidence", 0.0),
|
| 112 |
)
|
| 113 |
st.rerun()
|