Spaces:

JaydeepR
/

TenderIQ

Sleeping

JaydeepR Claude Sonnet 4.6 commited on 15 days ago

Commit

61e2cc7

1 Parent(s): f42bfb0

Step 6: criteria extractor, audit, fallback, and Tab 2 wiring

Implements specs/07_criteria_extractor.md. extract_criteria calls DeepSeek
with the full tender text and parses Criterion objects; falls back to
hardcoded precomputed criteria on LLMUnavailable. audit.py writes to SQLite;
fallback.py loads precomputed JSON or hardcoded defaults. Tab 2 renders
criteria cards with category/mandatory badges and rule details.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (5) hide show

core/audit.py +69 -2
core/criteria_extractor.py +51 -1
core/fallback.py +75 -2
specs/07_criteria_extractor.md +79 -0
ui/tab_tender.py +65 -1

core/audit.py CHANGED Viewed

@@ -1,6 +1,73 @@
 def log(action: str, actor: str = "system", **fields) -> int:
-    raise NotImplementedError
 def query(filters: dict | None = None) -> list[dict]:
-    raise NotImplementedError

+import json
+import sqlite3
+from datetime import datetime, timezone
+from pathlib import Path
+from core.config import AUDIT_DB, MODEL_VERSION
+_SCHEMA = """
+CREATE TABLE IF NOT EXISTS audit_log (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    ts TEXT NOT NULL,
+    action TEXT NOT NULL,
+    actor TEXT NOT NULL,
+    model_version TEXT,
+    bidder_id TEXT,
+    criterion_id TEXT,
+    payload_json TEXT
+);
+"""
+def _conn() -> sqlite3.Connection:
+    Path(AUDIT_DB).parent.mkdir(parents=True, exist_ok=True)
+    conn = sqlite3.connect(AUDIT_DB)
+    conn.row_factory = sqlite3.Row
+    conn.execute(_SCHEMA)
+    conn.commit()
+    return conn
 def log(action: str, actor: str = "system", **fields) -> int:
+    ts = datetime.now(timezone.utc).isoformat()
+    model_version = fields.pop("model_version", MODEL_VERSION)
+    bidder_id = fields.pop("bidder_id", None)
+    criterion_id = fields.pop("criterion_id", None)
+    payload_json = json.dumps(fields) if fields else None
+    conn = _conn()
+    cur = conn.execute(
+        "INSERT INTO audit_log (ts, action, actor, model_version, bidder_id, criterion_id, payload_json) "
+        "VALUES (?, ?, ?, ?, ?, ?, ?)",
+        (ts, action, actor, model_version, bidder_id, criterion_id, payload_json),
+    )
+    conn.commit()
+    row_id = cur.lastrowid
+    conn.close()
+    return row_id
 def query(filters: dict | None = None) -> list[dict]:
+    conn = _conn()
+    sql = "SELECT * FROM audit_log"
+    params: list = []
+    if filters:
+        clauses = []
+        if "bidder_id" in filters:
+            clauses.append("bidder_id = ?")
+            params.append(filters["bidder_id"])
+        if "action" in filters:
+            clauses.append("action = ?")
+            params.append(filters["action"])
+        if "date_from" in filters:
+            clauses.append("ts >= ?")
+            params.append(filters["date_from"])
+        if "date_to" in filters:
+            clauses.append("ts <= ?")
+            params.append(filters["date_to"])
+        if clauses:
+            sql += " WHERE " + " AND ".join(clauses)
+    sql += " ORDER BY id DESC"
+    rows = conn.execute(sql, params).fetchall()
+    conn.close()
+    return [dict(r) for r in rows]

core/criteria_extractor.py CHANGED Viewed

@@ -1,6 +1,56 @@
 from pathlib import Path
 from core.schemas import Criterion
 def extract_criteria(tender_pdf_path: Path) -> list[Criterion]:
-    raise NotImplementedError

+import json
 from pathlib import Path
+import streamlit as st
+from core import audit, fallback
+from core.config import MODEL_VERSION
+from core.llm_client import LLM, LLMUnavailable
+from core.pdf_utils import extract_pages
+from core.prompts import EXTRACT_CRITERIA_PROMPT_SYSTEM
 from core.schemas import Criterion
+@st.cache_resource
+def _get_llm() -> LLM:
+    return LLM()
 def extract_criteria(tender_pdf_path: Path) -> list[Criterion]:
+    pages = extract_pages(tender_pdf_path)
+    tender_text = "\n\n".join(
+        f"--- PAGE {p['page']} ---\n{p['text']}" for p in pages
+    )
+    user_prompt = f"""{tender_text}
+---
+Return JSON in this exact format:
+{{"criteria": [
+  {{"id": "C1", "title": "...", "category": "financial|technical|compliance",
+   "mandatory": true, "description": "...",
+   "rule": {{"type": "numeric_threshold|count_threshold|certification_present|document_present",
+            "field": "...", "operator": ">=|<=|==|exists", "value": null, "unit": null}},
+   "query_hints": ["...", "...", "..."],
+   "source_page": 1, "source_clause": "3.2(a)"}},
+  ...
+]}}
+Each criterion must have all fields. Assign sequential IDs C1, C2, ...
+"""
+    try:
+        llm = _get_llm()
+        result = llm.chat_json(EXTRACT_CRITERIA_PROMPT_SYSTEM, user_prompt)
+        raw_list = result.get("criteria", [])
+        criteria = [Criterion(**c) for c in raw_list]
+        audit.log(
+            "criteria_extracted",
+            model_version=MODEL_VERSION,
+            count=len(criteria),
+            source=str(tender_pdf_path.name),
+        )
+        return criteria
+    except LLMUnavailable:
+        audit.log("precomputed_fallback_used", reason="LLMUnavailable in extract_criteria")
+        st.session_state["fallback_active"] = True
+        return fallback.load_criteria()

core/fallback.py CHANGED Viewed

@@ -1,9 +1,82 @@
 from core.schemas import Criterion, Verdict
 def load_criteria() -> list[Criterion]:
-    raise NotImplementedError
 def load_evaluation(bidder_id: str, criterion_id: str) -> Verdict:
-    raise NotImplementedError

+import json
+from core.config import PRECOMPUTED_DIR
 from core.schemas import Criterion, Verdict
+_HARDCODED_CRITERIA = [
+    {
+        "id": "C1", "title": "Minimum Annual Turnover",
+        "category": "financial", "mandatory": True,
+        "description": "The bidder shall have a minimum average annual turnover of INR 5 Crore during the last three financial years (2022-23, 2023-24, 2024-25).",
+        "rule": {"type": "numeric_threshold", "field": "annual_turnover_inr",
+                 "operator": ">=", "value": 50000000, "unit": "INR"},
+        "query_hints": ["annual turnover", "total revenue", "INR crore", "audited financials", "CA certificate"],
+        "source_page": 2, "source_clause": "3.2(a)",
+    },
+    {
+        "id": "C2", "title": "Completed Construction Projects",
+        "category": "technical", "mandatory": True,
+        "description": "The bidder must have successfully completed at least three (3) similar construction projects of value not less than INR 1 Crore each in the last five financial years.",
+        "rule": {"type": "count_threshold", "field": "completed_projects",
+                 "operator": ">=", "value": 3, "unit": None},
+        "query_hints": ["completed projects", "construction experience", "work order", "completion certificate", "similar projects"],
+        "source_page": 2, "source_clause": "3.2(b)",
+    },
+    {
+        "id": "C3", "title": "GST Registration",
+        "category": "compliance", "mandatory": True,
+        "description": "The bidder shall possess a valid Goods and Services Tax (GST) registration certificate. The GSTIN must be active as on the date of submission.",
+        "rule": {"type": "certification_present", "field": "gstin",
+                 "operator": "exists", "value": None, "unit": None},
+        "query_hints": ["GSTIN", "GST certificate", "GST registration", "tax registration"],
+        "source_page": 2, "source_clause": "3.2(c)",
+    },
+    {
+        "id": "C4", "title": "ISO 9001:2015 Certification",
+        "category": "compliance", "mandatory": True,
+        "description": "The bidder shall hold a valid ISO 9001:2015 Quality Management System certification issued by an accredited certification body.",
+        "rule": {"type": "certification_present", "field": "iso_9001",
+                 "operator": "exists", "value": None, "unit": None},
+        "query_hints": ["ISO 9001", "quality management", "ISO certificate", "QMS certification"],
+        "source_page": 2, "source_clause": "3.2(d)",
+    },
+    {
+        "id": "C5", "title": "Paramilitary Infrastructure Experience",
+        "category": "technical", "mandatory": False,
+        "description": "Preferably, the bidder may have prior experience with construction or maintenance of paramilitary or defence infrastructure.",
+        "rule": {"type": "document_present", "field": "paramilitary_experience",
+                 "operator": "exists", "value": None, "unit": None},
+        "query_hints": ["paramilitary", "defence infrastructure", "CRPF", "BSF", "security forces"],
+        "source_page": 2, "source_clause": "3.2(e)",
+    },
+]
 def load_criteria() -> list[Criterion]:
+    criteria_file = PRECOMPUTED_DIR / "criteria.json"
+    if criteria_file.exists():
+        data = json.loads(criteria_file.read_text(encoding="utf-8"))
+        return [Criterion(**c) for c in data.get("criteria", data)]
+    return [Criterion(**c) for c in _HARDCODED_CRITERIA]
 def load_evaluation(bidder_id: str, criterion_id: str) -> Verdict:
+    eval_file = PRECOMPUTED_DIR / f"eval_{bidder_id}.json"
+    if eval_file.exists():
+        data = json.loads(eval_file.read_text(encoding="utf-8"))
+        verdicts = data if isinstance(data, list) else data.get("verdicts", [])
+        for v in verdicts:
+            if v.get("criterion_id") == criterion_id:
+                return Verdict(**v)
+    # Return a needs_review verdict as safe default
+    from core.config import MODEL_VERSION
+    from datetime import datetime, timezone
+    return Verdict(
+        bidder_id=bidder_id,
+        criterion_id=criterion_id,
+        verdict="needs_review",
+        reason="Pre-computed evaluation not available. Manual review required.",
+        model_version=MODEL_VERSION,
+        timestamp=datetime.now(timezone.utc).isoformat(),
+        combined_confidence=0.0,
+    )

specs/07_criteria_extractor.md ADDED Viewed

	@@ -0,0 +1,79 @@

+# Spec 07 — Criteria Extractor
+**Step:** 6 of 15
+**Time budget:** ~30 min
+**Checkpoint:** Tab 2 in the running app shows 5 criteria extracted from the mock tender.
+---
+## Goal
+Implement `core/criteria_extractor.py` and wire up `ui/tab_tender.py` to call it. On `LLMUnavailable`, fall back to `fallback.load_criteria()`. Cache result in `st.session_state["criteria"]`.
+---
+## `core/criteria_extractor.py`
+### `extract_criteria(tender_pdf_path: Path) -> list[Criterion]`
+1. Call `pdf_utils.extract_pages(tender_pdf_path)` → list of `{"page": int, "text": str}`.
+2. Join pages: `tender_text = "\n\n--- PAGE {n} ---\n\n".join(p["text"] for p in pages)`.
+3. Build user prompt:
+   ```
+   {tender_text}
+   ---
+   Return JSON in this exact format:
+   {"criteria": [
+     {"id": "C1", "title": "...", "category": "financial|technical|compliance",
+      "mandatory": true|false, "description": "...",
+      "rule": {"type": "numeric_threshold|count_threshold|certification_present|document_present",
+               "field": "...", "operator": ">=|<=|==|exists", "value": null_or_number, "unit": null_or_string},
+      "query_hints": ["...", "..."],
+      "source_page": <int>, "source_clause": "..."},
+     ...
+   ]}
+   ```
+4. Call `llm.chat_json(EXTRACT_CRITERIA_PROMPT_SYSTEM, user_prompt)`.
+5. Parse `result["criteria"]` → validate each item as `Criterion(**item)`.
+6. Log `criteria_extracted` to audit with `payload_json=json.dumps({"count": len(criteria)})`.
+7. Return `list[Criterion]`.
+On `LLMUnavailable`:
+- Log `precomputed_fallback_used` to audit.
+- Set `st.session_state["fallback_active"] = True`.
+- Return `fallback.load_criteria()`.
+LLM singleton: use `@st.cache_resource` on a getter `_get_llm()` so the client is created once per Streamlit session.
+---
+## `ui/tab_tender.py`
+Renders the Tender Analysis tab. Replaces the stub.
+Layout:
+1. `st.header("Tender Analysis")`
+2. File uploader: `uploaded = st.file_uploader("Upload tender PDF", type=["pdf"])`. If nothing uploaded, use the preloaded mock: `data/tender/crpf_construction_tender.pdf`.
+3. Show the filename being used.
+4. Button **"Extract Criteria (Live LLM)"**:
+   - Save uploaded bytes to a temp file (or use the mock path directly).
+   - Call `criteria_extractor.extract_criteria(path)`.
+   - Store in `st.session_state["criteria"]`.
+5. If `st.session_state.get("criteria")`:
+   - Show `st.success(f"Extracted {len(criteria)} criteria")`.
+   - For each criterion, render a card using `st.expander`:
+     - Title + mandatory/optional badge (🔴 Mandatory / 🟡 Optional).
+     - Category badge (color-coded: financial=blue, technical=green, compliance=orange).
+     - Description text.
+     - Source: page + clause.
+     - Rule details (type, operator, value, unit).
+---
+## Acceptance Criteria
+1. `extract_criteria(Path("data/tender/crpf_construction_tender.pdf"))` returns a list of 5 `Criterion` objects (when LLM is available) or the precomputed fallback (when not).
+2. Tab 2 renders without error in both modes.
+3. Each extracted criterion shows title, mandatory status, category, and source clause.
+4. `st.session_state["criteria"]` is populated after the button is clicked.

ui/tab_tender.py CHANGED Viewed

@@ -1,5 +1,69 @@
 import streamlit as st
 def render() -> None:
     st.header("Tender Analysis")
-    st.info("Coming soon — upload tender and extract eligibility criteria.")

+import tempfile
+from pathlib import Path
 import streamlit as st
+from core import criteria_extractor
+from core.config import DATA_DIR
+_MOCK_TENDER = DATA_DIR / "tender" / "crpf_construction_tender.pdf"
+_CATEGORY_COLORS = {
+    "financial": "🔵",
+    "technical": "🟢",
+    "compliance": "🟠",
+}
 def render() -> None:
     st.header("Tender Analysis")
+    uploaded = st.file_uploader("Upload tender PDF (leave blank to use pre-loaded mock)", type=["pdf"])
+    if uploaded:
+        tender_bytes = uploaded.read()
+        tender_name = uploaded.name
+        with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
+            tmp.write(tender_bytes)
+            tender_path = Path(tmp.name)
+    else:
+        tender_path = _MOCK_TENDER
+        tender_name = _MOCK_TENDER.name
+    st.caption(f"Using: **{tender_name}**")
+    if st.button("Extract Criteria (Live LLM)", type="primary"):
+        with st.spinner("Calling DeepSeek to extract eligibility criteria…"):
+            criteria = criteria_extractor.extract_criteria(tender_path)
+        st.session_state["criteria"] = [c.model_dump() for c in criteria]
+        st.session_state["tender_path"] = str(tender_path)
+    criteria_data = st.session_state.get("criteria")
+    if criteria_data:
+        st.success(f"Extracted **{len(criteria_data)}** criteria")
+        if st.session_state.get("fallback_active"):
+            st.warning("⚠ Live API unavailable — showing pre-computed criteria.")
+        for c in criteria_data:
+            mandatory_badge = "🔴 Mandatory" if c["mandatory"] else "🟡 Optional"
+            cat_icon = _CATEGORY_COLORS.get(c["category"], "⚪")
+            label = f"{cat_icon} **{c['id']}** — {c['title']}  {mandatory_badge}"
+            with st.expander(label, expanded=False):
+                col1, col2 = st.columns([2, 1])
+                with col1:
+                    st.markdown(f"**Description:** {c['description']}")
+                    rule = c["rule"]
+                    rule_parts = [f"Type: `{rule['type']}`", f"Field: `{rule['field']}`",
+                                  f"Operator: `{rule['operator']}`"]
+                    if rule.get("value") is not None:
+                        rule_parts.append(f"Value: `{rule['value']}`")
+                    if rule.get("unit"):
+                        rule_parts.append(f"Unit: `{rule['unit']}`")
+                    st.markdown(" · ".join(rule_parts))
+                with col2:
+                    st.markdown(f"**Category:** {c['category'].capitalize()}")
+                    st.markdown(f"**Source:** Page {c['source_page']}, Clause {c['source_clause']}")
+                    if c.get("query_hints"):
+                        hints = ", ".join(f"`{h}`" for h in c["query_hints"])
+                        st.markdown(f"**Query hints:** {hints}")