Step 6: criteria extractor, audit, fallback, and Tab 2 wiring
Browse filesImplements specs/07_criteria_extractor.md. extract_criteria calls DeepSeek
with the full tender text and parses Criterion objects; falls back to
hardcoded precomputed criteria on LLMUnavailable. audit.py writes to SQLite;
fallback.py loads precomputed JSON or hardcoded defaults. Tab 2 renders
criteria cards with category/mandatory badges and rule details.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- core/audit.py +69 -2
- core/criteria_extractor.py +51 -1
- core/fallback.py +75 -2
- specs/07_criteria_extractor.md +79 -0
- ui/tab_tender.py +65 -1
core/audit.py
CHANGED
|
@@ -1,6 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
def log(action: str, actor: str = "system", **fields) -> int:
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
def query(filters: dict | None = None) -> list[dict]:
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import sqlite3
|
| 3 |
+
from datetime import datetime, timezone
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
from core.config import AUDIT_DB, MODEL_VERSION
|
| 7 |
+
|
| 8 |
+
_SCHEMA = """
|
| 9 |
+
CREATE TABLE IF NOT EXISTS audit_log (
|
| 10 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 11 |
+
ts TEXT NOT NULL,
|
| 12 |
+
action TEXT NOT NULL,
|
| 13 |
+
actor TEXT NOT NULL,
|
| 14 |
+
model_version TEXT,
|
| 15 |
+
bidder_id TEXT,
|
| 16 |
+
criterion_id TEXT,
|
| 17 |
+
payload_json TEXT
|
| 18 |
+
);
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _conn() -> sqlite3.Connection:
|
| 23 |
+
Path(AUDIT_DB).parent.mkdir(parents=True, exist_ok=True)
|
| 24 |
+
conn = sqlite3.connect(AUDIT_DB)
|
| 25 |
+
conn.row_factory = sqlite3.Row
|
| 26 |
+
conn.execute(_SCHEMA)
|
| 27 |
+
conn.commit()
|
| 28 |
+
return conn
|
| 29 |
+
|
| 30 |
+
|
| 31 |
def log(action: str, actor: str = "system", **fields) -> int:
|
| 32 |
+
ts = datetime.now(timezone.utc).isoformat()
|
| 33 |
+
model_version = fields.pop("model_version", MODEL_VERSION)
|
| 34 |
+
bidder_id = fields.pop("bidder_id", None)
|
| 35 |
+
criterion_id = fields.pop("criterion_id", None)
|
| 36 |
+
payload_json = json.dumps(fields) if fields else None
|
| 37 |
+
|
| 38 |
+
conn = _conn()
|
| 39 |
+
cur = conn.execute(
|
| 40 |
+
"INSERT INTO audit_log (ts, action, actor, model_version, bidder_id, criterion_id, payload_json) "
|
| 41 |
+
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
| 42 |
+
(ts, action, actor, model_version, bidder_id, criterion_id, payload_json),
|
| 43 |
+
)
|
| 44 |
+
conn.commit()
|
| 45 |
+
row_id = cur.lastrowid
|
| 46 |
+
conn.close()
|
| 47 |
+
return row_id
|
| 48 |
|
| 49 |
|
| 50 |
def query(filters: dict | None = None) -> list[dict]:
|
| 51 |
+
conn = _conn()
|
| 52 |
+
sql = "SELECT * FROM audit_log"
|
| 53 |
+
params: list = []
|
| 54 |
+
if filters:
|
| 55 |
+
clauses = []
|
| 56 |
+
if "bidder_id" in filters:
|
| 57 |
+
clauses.append("bidder_id = ?")
|
| 58 |
+
params.append(filters["bidder_id"])
|
| 59 |
+
if "action" in filters:
|
| 60 |
+
clauses.append("action = ?")
|
| 61 |
+
params.append(filters["action"])
|
| 62 |
+
if "date_from" in filters:
|
| 63 |
+
clauses.append("ts >= ?")
|
| 64 |
+
params.append(filters["date_from"])
|
| 65 |
+
if "date_to" in filters:
|
| 66 |
+
clauses.append("ts <= ?")
|
| 67 |
+
params.append(filters["date_to"])
|
| 68 |
+
if clauses:
|
| 69 |
+
sql += " WHERE " + " AND ".join(clauses)
|
| 70 |
+
sql += " ORDER BY id DESC"
|
| 71 |
+
rows = conn.execute(sql, params).fetchall()
|
| 72 |
+
conn.close()
|
| 73 |
+
return [dict(r) for r in rows]
|
core/criteria_extractor.py
CHANGED
|
@@ -1,6 +1,56 @@
|
|
|
|
|
| 1 |
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
from core.schemas import Criterion
|
| 3 |
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
def extract_criteria(tender_pdf_path: Path) -> list[Criterion]:
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
from core import audit, fallback
|
| 7 |
+
from core.config import MODEL_VERSION
|
| 8 |
+
from core.llm_client import LLM, LLMUnavailable
|
| 9 |
+
from core.pdf_utils import extract_pages
|
| 10 |
+
from core.prompts import EXTRACT_CRITERIA_PROMPT_SYSTEM
|
| 11 |
from core.schemas import Criterion
|
| 12 |
|
| 13 |
|
| 14 |
+
@st.cache_resource
|
| 15 |
+
def _get_llm() -> LLM:
|
| 16 |
+
return LLM()
|
| 17 |
+
|
| 18 |
+
|
| 19 |
def extract_criteria(tender_pdf_path: Path) -> list[Criterion]:
|
| 20 |
+
pages = extract_pages(tender_pdf_path)
|
| 21 |
+
tender_text = "\n\n".join(
|
| 22 |
+
f"--- PAGE {p['page']} ---\n{p['text']}" for p in pages
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
user_prompt = f"""{tender_text}
|
| 26 |
+
|
| 27 |
+
---
|
| 28 |
+
Return JSON in this exact format:
|
| 29 |
+
{{"criteria": [
|
| 30 |
+
{{"id": "C1", "title": "...", "category": "financial|technical|compliance",
|
| 31 |
+
"mandatory": true, "description": "...",
|
| 32 |
+
"rule": {{"type": "numeric_threshold|count_threshold|certification_present|document_present",
|
| 33 |
+
"field": "...", "operator": ">=|<=|==|exists", "value": null, "unit": null}},
|
| 34 |
+
"query_hints": ["...", "...", "..."],
|
| 35 |
+
"source_page": 1, "source_clause": "3.2(a)"}},
|
| 36 |
+
...
|
| 37 |
+
]}}
|
| 38 |
+
Each criterion must have all fields. Assign sequential IDs C1, C2, ...
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
try:
|
| 42 |
+
llm = _get_llm()
|
| 43 |
+
result = llm.chat_json(EXTRACT_CRITERIA_PROMPT_SYSTEM, user_prompt)
|
| 44 |
+
raw_list = result.get("criteria", [])
|
| 45 |
+
criteria = [Criterion(**c) for c in raw_list]
|
| 46 |
+
audit.log(
|
| 47 |
+
"criteria_extracted",
|
| 48 |
+
model_version=MODEL_VERSION,
|
| 49 |
+
count=len(criteria),
|
| 50 |
+
source=str(tender_pdf_path.name),
|
| 51 |
+
)
|
| 52 |
+
return criteria
|
| 53 |
+
except LLMUnavailable:
|
| 54 |
+
audit.log("precomputed_fallback_used", reason="LLMUnavailable in extract_criteria")
|
| 55 |
+
st.session_state["fallback_active"] = True
|
| 56 |
+
return fallback.load_criteria()
|
core/fallback.py
CHANGED
|
@@ -1,9 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from core.schemas import Criterion, Verdict
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
def load_criteria() -> list[Criterion]:
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
def load_evaluation(bidder_id: str, criterion_id: str) -> Verdict:
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
from core.config import PRECOMPUTED_DIR
|
| 4 |
from core.schemas import Criterion, Verdict
|
| 5 |
|
| 6 |
+
_HARDCODED_CRITERIA = [
|
| 7 |
+
{
|
| 8 |
+
"id": "C1", "title": "Minimum Annual Turnover",
|
| 9 |
+
"category": "financial", "mandatory": True,
|
| 10 |
+
"description": "The bidder shall have a minimum average annual turnover of INR 5 Crore during the last three financial years (2022-23, 2023-24, 2024-25).",
|
| 11 |
+
"rule": {"type": "numeric_threshold", "field": "annual_turnover_inr",
|
| 12 |
+
"operator": ">=", "value": 50000000, "unit": "INR"},
|
| 13 |
+
"query_hints": ["annual turnover", "total revenue", "INR crore", "audited financials", "CA certificate"],
|
| 14 |
+
"source_page": 2, "source_clause": "3.2(a)",
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"id": "C2", "title": "Completed Construction Projects",
|
| 18 |
+
"category": "technical", "mandatory": True,
|
| 19 |
+
"description": "The bidder must have successfully completed at least three (3) similar construction projects of value not less than INR 1 Crore each in the last five financial years.",
|
| 20 |
+
"rule": {"type": "count_threshold", "field": "completed_projects",
|
| 21 |
+
"operator": ">=", "value": 3, "unit": None},
|
| 22 |
+
"query_hints": ["completed projects", "construction experience", "work order", "completion certificate", "similar projects"],
|
| 23 |
+
"source_page": 2, "source_clause": "3.2(b)",
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"id": "C3", "title": "GST Registration",
|
| 27 |
+
"category": "compliance", "mandatory": True,
|
| 28 |
+
"description": "The bidder shall possess a valid Goods and Services Tax (GST) registration certificate. The GSTIN must be active as on the date of submission.",
|
| 29 |
+
"rule": {"type": "certification_present", "field": "gstin",
|
| 30 |
+
"operator": "exists", "value": None, "unit": None},
|
| 31 |
+
"query_hints": ["GSTIN", "GST certificate", "GST registration", "tax registration"],
|
| 32 |
+
"source_page": 2, "source_clause": "3.2(c)",
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"id": "C4", "title": "ISO 9001:2015 Certification",
|
| 36 |
+
"category": "compliance", "mandatory": True,
|
| 37 |
+
"description": "The bidder shall hold a valid ISO 9001:2015 Quality Management System certification issued by an accredited certification body.",
|
| 38 |
+
"rule": {"type": "certification_present", "field": "iso_9001",
|
| 39 |
+
"operator": "exists", "value": None, "unit": None},
|
| 40 |
+
"query_hints": ["ISO 9001", "quality management", "ISO certificate", "QMS certification"],
|
| 41 |
+
"source_page": 2, "source_clause": "3.2(d)",
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"id": "C5", "title": "Paramilitary Infrastructure Experience",
|
| 45 |
+
"category": "technical", "mandatory": False,
|
| 46 |
+
"description": "Preferably, the bidder may have prior experience with construction or maintenance of paramilitary or defence infrastructure.",
|
| 47 |
+
"rule": {"type": "document_present", "field": "paramilitary_experience",
|
| 48 |
+
"operator": "exists", "value": None, "unit": None},
|
| 49 |
+
"query_hints": ["paramilitary", "defence infrastructure", "CRPF", "BSF", "security forces"],
|
| 50 |
+
"source_page": 2, "source_clause": "3.2(e)",
|
| 51 |
+
},
|
| 52 |
+
]
|
| 53 |
+
|
| 54 |
|
| 55 |
def load_criteria() -> list[Criterion]:
|
| 56 |
+
criteria_file = PRECOMPUTED_DIR / "criteria.json"
|
| 57 |
+
if criteria_file.exists():
|
| 58 |
+
data = json.loads(criteria_file.read_text(encoding="utf-8"))
|
| 59 |
+
return [Criterion(**c) for c in data.get("criteria", data)]
|
| 60 |
+
return [Criterion(**c) for c in _HARDCODED_CRITERIA]
|
| 61 |
|
| 62 |
|
| 63 |
def load_evaluation(bidder_id: str, criterion_id: str) -> Verdict:
|
| 64 |
+
eval_file = PRECOMPUTED_DIR / f"eval_{bidder_id}.json"
|
| 65 |
+
if eval_file.exists():
|
| 66 |
+
data = json.loads(eval_file.read_text(encoding="utf-8"))
|
| 67 |
+
verdicts = data if isinstance(data, list) else data.get("verdicts", [])
|
| 68 |
+
for v in verdicts:
|
| 69 |
+
if v.get("criterion_id") == criterion_id:
|
| 70 |
+
return Verdict(**v)
|
| 71 |
+
# Return a needs_review verdict as safe default
|
| 72 |
+
from core.config import MODEL_VERSION
|
| 73 |
+
from datetime import datetime, timezone
|
| 74 |
+
return Verdict(
|
| 75 |
+
bidder_id=bidder_id,
|
| 76 |
+
criterion_id=criterion_id,
|
| 77 |
+
verdict="needs_review",
|
| 78 |
+
reason="Pre-computed evaluation not available. Manual review required.",
|
| 79 |
+
model_version=MODEL_VERSION,
|
| 80 |
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
| 81 |
+
combined_confidence=0.0,
|
| 82 |
+
)
|
specs/07_criteria_extractor.md
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Spec 07 — Criteria Extractor
|
| 2 |
+
|
| 3 |
+
**Step:** 6 of 15
|
| 4 |
+
**Time budget:** ~30 min
|
| 5 |
+
**Checkpoint:** Tab 2 in the running app shows 5 criteria extracted from the mock tender.
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## Goal
|
| 10 |
+
|
| 11 |
+
Implement `core/criteria_extractor.py` and wire up `ui/tab_tender.py` to call it. On `LLMUnavailable`, fall back to `fallback.load_criteria()`. Cache result in `st.session_state["criteria"]`.
|
| 12 |
+
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
+
## `core/criteria_extractor.py`
|
| 16 |
+
|
| 17 |
+
### `extract_criteria(tender_pdf_path: Path) -> list[Criterion]`
|
| 18 |
+
|
| 19 |
+
1. Call `pdf_utils.extract_pages(tender_pdf_path)` → list of `{"page": int, "text": str}`.
|
| 20 |
+
2. Join pages: `tender_text = "\n\n--- PAGE {n} ---\n\n".join(p["text"] for p in pages)`.
|
| 21 |
+
3. Build user prompt:
|
| 22 |
+
```
|
| 23 |
+
{tender_text}
|
| 24 |
+
|
| 25 |
+
---
|
| 26 |
+
Return JSON in this exact format:
|
| 27 |
+
{"criteria": [
|
| 28 |
+
{"id": "C1", "title": "...", "category": "financial|technical|compliance",
|
| 29 |
+
"mandatory": true|false, "description": "...",
|
| 30 |
+
"rule": {"type": "numeric_threshold|count_threshold|certification_present|document_present",
|
| 31 |
+
"field": "...", "operator": ">=|<=|==|exists", "value": null_or_number, "unit": null_or_string},
|
| 32 |
+
"query_hints": ["...", "..."],
|
| 33 |
+
"source_page": <int>, "source_clause": "..."},
|
| 34 |
+
...
|
| 35 |
+
]}
|
| 36 |
+
```
|
| 37 |
+
4. Call `llm.chat_json(EXTRACT_CRITERIA_PROMPT_SYSTEM, user_prompt)`.
|
| 38 |
+
5. Parse `result["criteria"]` → validate each item as `Criterion(**item)`.
|
| 39 |
+
6. Log `criteria_extracted` to audit with `payload_json=json.dumps({"count": len(criteria)})`.
|
| 40 |
+
7. Return `list[Criterion]`.
|
| 41 |
+
|
| 42 |
+
On `LLMUnavailable`:
|
| 43 |
+
- Log `precomputed_fallback_used` to audit.
|
| 44 |
+
- Set `st.session_state["fallback_active"] = True`.
|
| 45 |
+
- Return `fallback.load_criteria()`.
|
| 46 |
+
|
| 47 |
+
LLM singleton: use `@st.cache_resource` on a getter `_get_llm()` so the client is created once per Streamlit session.
|
| 48 |
+
|
| 49 |
+
---
|
| 50 |
+
|
| 51 |
+
## `ui/tab_tender.py`
|
| 52 |
+
|
| 53 |
+
Renders the Tender Analysis tab. Replaces the stub.
|
| 54 |
+
|
| 55 |
+
Layout:
|
| 56 |
+
1. `st.header("Tender Analysis")`
|
| 57 |
+
2. File uploader: `uploaded = st.file_uploader("Upload tender PDF", type=["pdf"])`. If nothing uploaded, use the preloaded mock: `data/tender/crpf_construction_tender.pdf`.
|
| 58 |
+
3. Show the filename being used.
|
| 59 |
+
4. Button **"Extract Criteria (Live LLM)"**:
|
| 60 |
+
- Save uploaded bytes to a temp file (or use the mock path directly).
|
| 61 |
+
- Call `criteria_extractor.extract_criteria(path)`.
|
| 62 |
+
- Store in `st.session_state["criteria"]`.
|
| 63 |
+
5. If `st.session_state.get("criteria")`:
|
| 64 |
+
- Show `st.success(f"Extracted {len(criteria)} criteria")`.
|
| 65 |
+
- For each criterion, render a card using `st.expander`:
|
| 66 |
+
- Title + mandatory/optional badge (🔴 Mandatory / 🟡 Optional).
|
| 67 |
+
- Category badge (color-coded: financial=blue, technical=green, compliance=orange).
|
| 68 |
+
- Description text.
|
| 69 |
+
- Source: page + clause.
|
| 70 |
+
- Rule details (type, operator, value, unit).
|
| 71 |
+
|
| 72 |
+
---
|
| 73 |
+
|
| 74 |
+
## Acceptance Criteria
|
| 75 |
+
|
| 76 |
+
1. `extract_criteria(Path("data/tender/crpf_construction_tender.pdf"))` returns a list of 5 `Criterion` objects (when LLM is available) or the precomputed fallback (when not).
|
| 77 |
+
2. Tab 2 renders without error in both modes.
|
| 78 |
+
3. Each extracted criterion shows title, mandatory status, category, and source clause.
|
| 79 |
+
4. `st.session_state["criteria"]` is populated after the button is clicked.
|
ui/tab_tender.py
CHANGED
|
@@ -1,5 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
def render() -> None:
|
| 4 |
st.header("Tender Analysis")
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import tempfile
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
import streamlit as st
|
| 5 |
|
| 6 |
+
from core import criteria_extractor
|
| 7 |
+
from core.config import DATA_DIR
|
| 8 |
+
|
| 9 |
+
_MOCK_TENDER = DATA_DIR / "tender" / "crpf_construction_tender.pdf"
|
| 10 |
+
|
| 11 |
+
_CATEGORY_COLORS = {
|
| 12 |
+
"financial": "🔵",
|
| 13 |
+
"technical": "🟢",
|
| 14 |
+
"compliance": "🟠",
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
|
| 18 |
def render() -> None:
|
| 19 |
st.header("Tender Analysis")
|
| 20 |
+
|
| 21 |
+
uploaded = st.file_uploader("Upload tender PDF (leave blank to use pre-loaded mock)", type=["pdf"])
|
| 22 |
+
|
| 23 |
+
if uploaded:
|
| 24 |
+
tender_bytes = uploaded.read()
|
| 25 |
+
tender_name = uploaded.name
|
| 26 |
+
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
|
| 27 |
+
tmp.write(tender_bytes)
|
| 28 |
+
tender_path = Path(tmp.name)
|
| 29 |
+
else:
|
| 30 |
+
tender_path = _MOCK_TENDER
|
| 31 |
+
tender_name = _MOCK_TENDER.name
|
| 32 |
+
|
| 33 |
+
st.caption(f"Using: **{tender_name}**")
|
| 34 |
+
|
| 35 |
+
if st.button("Extract Criteria (Live LLM)", type="primary"):
|
| 36 |
+
with st.spinner("Calling DeepSeek to extract eligibility criteria…"):
|
| 37 |
+
criteria = criteria_extractor.extract_criteria(tender_path)
|
| 38 |
+
st.session_state["criteria"] = [c.model_dump() for c in criteria]
|
| 39 |
+
st.session_state["tender_path"] = str(tender_path)
|
| 40 |
+
|
| 41 |
+
criteria_data = st.session_state.get("criteria")
|
| 42 |
+
if criteria_data:
|
| 43 |
+
st.success(f"Extracted **{len(criteria_data)}** criteria")
|
| 44 |
+
|
| 45 |
+
if st.session_state.get("fallback_active"):
|
| 46 |
+
st.warning("⚠ Live API unavailable — showing pre-computed criteria.")
|
| 47 |
+
|
| 48 |
+
for c in criteria_data:
|
| 49 |
+
mandatory_badge = "🔴 Mandatory" if c["mandatory"] else "🟡 Optional"
|
| 50 |
+
cat_icon = _CATEGORY_COLORS.get(c["category"], "⚪")
|
| 51 |
+
label = f"{cat_icon} **{c['id']}** — {c['title']} {mandatory_badge}"
|
| 52 |
+
with st.expander(label, expanded=False):
|
| 53 |
+
col1, col2 = st.columns([2, 1])
|
| 54 |
+
with col1:
|
| 55 |
+
st.markdown(f"**Description:** {c['description']}")
|
| 56 |
+
rule = c["rule"]
|
| 57 |
+
rule_parts = [f"Type: `{rule['type']}`", f"Field: `{rule['field']}`",
|
| 58 |
+
f"Operator: `{rule['operator']}`"]
|
| 59 |
+
if rule.get("value") is not None:
|
| 60 |
+
rule_parts.append(f"Value: `{rule['value']}`")
|
| 61 |
+
if rule.get("unit"):
|
| 62 |
+
rule_parts.append(f"Unit: `{rule['unit']}`")
|
| 63 |
+
st.markdown(" · ".join(rule_parts))
|
| 64 |
+
with col2:
|
| 65 |
+
st.markdown(f"**Category:** {c['category'].capitalize()}")
|
| 66 |
+
st.markdown(f"**Source:** Page {c['source_page']}, Clause {c['source_clause']}")
|
| 67 |
+
if c.get("query_hints"):
|
| 68 |
+
hints = ", ".join(f"`{h}`" for h in c["query_hints"])
|
| 69 |
+
st.markdown(f"**Query hints:** {hints}")
|