Spaces:

JaydeepR
/

TenderIQ

Sleeping

File size: 1,789 Bytes

61e2cc7
661eb14
61e2cc7
 
 
 
 
 
 
 
661eb14
 
 
61e2cc7
 
 
 
 
661eb14
61e2cc7

import json
from pathlib import Path

import streamlit as st

from core import audit, fallback
from core.config import MODEL_VERSION
from core.llm_client import LLM, LLMUnavailable
from core.pdf_utils import extract_pages
from core.prompts import EXTRACT_CRITERIA_PROMPT_SYSTEM
from core.schemas import Criterion


@st.cache_resource
def _get_llm() -> LLM:
    return LLM()


def extract_criteria(tender_pdf_path: Path) -> list[Criterion]:
    pages = extract_pages(tender_pdf_path)
    tender_text = "\n\n".join(
        f"--- PAGE {p['page']} ---\n{p['text']}" for p in pages
    )

    user_prompt = f"""{tender_text}

---
Return JSON in this exact format:
{{"criteria": [
  {{"id": "C1", "title": "...", "category": "financial|technical|compliance",
   "mandatory": true, "description": "...",
   "rule": {{"type": "numeric_threshold|count_threshold|certification_present|document_present",
            "field": "...", "operator": ">=|<=|==|exists", "value": null, "unit": null}},
   "query_hints": ["...", "...", "..."],
   "source_page": 1, "source_clause": "3.2(a)"}},
  ...
]}}
Each criterion must have all fields. Assign sequential IDs C1, C2, ...
"""

    try:
        llm = _get_llm()
        result = llm.chat_json(EXTRACT_CRITERIA_PROMPT_SYSTEM, user_prompt)
        raw_list = result.get("criteria", [])
        criteria = [Criterion(**c) for c in raw_list]
        audit.log(
            "criteria_extracted",
            model_version=MODEL_VERSION,
            count=len(criteria),
            source=str(tender_pdf_path.name),
        )
        return criteria
    except LLMUnavailable:
        audit.log("precomputed_fallback_used", reason="LLMUnavailable in extract_criteria")
        st.session_state["fallback_active"] = True
        return fallback.load_criteria()