| import json |
| from pathlib import Path |
|
|
| import streamlit as st |
|
|
| from core import audit, fallback |
| from core.config import MODEL_VERSION |
| from core.llm_client import LLM, LLMUnavailable |
| from core.pdf_utils import extract_pages |
| from core.prompts import EXTRACT_CRITERIA_PROMPT_SYSTEM |
| from core.schemas import Criterion |
|
|
|
|
| @st.cache_resource |
| def _get_llm() -> LLM: |
| return LLM() |
|
|
|
|
| def extract_criteria(tender_pdf_path: Path) -> list[Criterion]: |
| pages = extract_pages(tender_pdf_path) |
| tender_text = "\n\n".join( |
| f"--- PAGE {p['page']} ---\n{p['text']}" for p in pages |
| ) |
|
|
| user_prompt = f"""{tender_text} |
| |
| --- |
| Return JSON in this exact format: |
| {{"criteria": [ |
| {{"id": "C1", "title": "...", "category": "financial|technical|compliance", |
| "mandatory": true, "description": "...", |
| "rule": {{"type": "numeric_threshold|count_threshold|certification_present|document_present", |
| "field": "...", "operator": ">=|<=|==|exists", "value": null, "unit": null}}, |
| "query_hints": ["...", "...", "..."], |
| "source_page": 1, "source_clause": "3.2(a)"}}, |
| ... |
| ]}} |
| Each criterion must have all fields. Assign sequential IDs C1, C2, ... |
| """ |
|
|
| try: |
| llm = _get_llm() |
| result = llm.chat_json(EXTRACT_CRITERIA_PROMPT_SYSTEM, user_prompt) |
| raw_list = result.get("criteria", []) |
| criteria = [Criterion(**c) for c in raw_list] |
| audit.log( |
| "criteria_extracted", |
| model_version=MODEL_VERSION, |
| count=len(criteria), |
| source=str(tender_pdf_path.name), |
| ) |
| return criteria |
| except LLMUnavailable: |
| audit.log("precomputed_fallback_used", reason="LLMUnavailable in extract_criteria") |
| st.session_state["fallback_active"] = True |
| return fallback.load_criteria() |
|
|