File size: 1,789 Bytes
61e2cc7 661eb14 61e2cc7 661eb14 61e2cc7 661eb14 61e2cc7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | import json
from pathlib import Path
import streamlit as st
from core import audit, fallback
from core.config import MODEL_VERSION
from core.llm_client import LLM, LLMUnavailable
from core.pdf_utils import extract_pages
from core.prompts import EXTRACT_CRITERIA_PROMPT_SYSTEM
from core.schemas import Criterion
@st.cache_resource
def _get_llm() -> LLM:
return LLM()
def extract_criteria(tender_pdf_path: Path) -> list[Criterion]:
pages = extract_pages(tender_pdf_path)
tender_text = "\n\n".join(
f"--- PAGE {p['page']} ---\n{p['text']}" for p in pages
)
user_prompt = f"""{tender_text}
---
Return JSON in this exact format:
{{"criteria": [
{{"id": "C1", "title": "...", "category": "financial|technical|compliance",
"mandatory": true, "description": "...",
"rule": {{"type": "numeric_threshold|count_threshold|certification_present|document_present",
"field": "...", "operator": ">=|<=|==|exists", "value": null, "unit": null}},
"query_hints": ["...", "...", "..."],
"source_page": 1, "source_clause": "3.2(a)"}},
...
]}}
Each criterion must have all fields. Assign sequential IDs C1, C2, ...
"""
try:
llm = _get_llm()
result = llm.chat_json(EXTRACT_CRITERIA_PROMPT_SYSTEM, user_prompt)
raw_list = result.get("criteria", [])
criteria = [Criterion(**c) for c in raw_list]
audit.log(
"criteria_extracted",
model_version=MODEL_VERSION,
count=len(criteria),
source=str(tender_pdf_path.name),
)
return criteria
except LLMUnavailable:
audit.log("precomputed_fallback_used", reason="LLMUnavailable in extract_criteria")
st.session_state["fallback_active"] = True
return fallback.load_criteria()
|