Spaces:

JaydeepR
/

TenderIQ

Sleeping

App Files Files Community

TenderIQ / core /criteria_extractor.py

JaydeepR

Step 6: criteria extractor, audit, fallback, and Tab 2 wiring

61e2cc7 15 days ago

raw

history blame contribute delete

1.79 kB

	import json
	from pathlib import Path

	import streamlit as st

	from core import audit, fallback
	from core.config import MODEL_VERSION
	from core.llm_client import LLM, LLMUnavailable
	from core.pdf_utils import extract_pages
	from core.prompts import EXTRACT_CRITERIA_PROMPT_SYSTEM
	from core.schemas import Criterion


	@st.cache_resource
	def _get_llm() -> LLM:
	return LLM()


	def extract_criteria(tender_pdf_path: Path) -> list[Criterion]:
	pages = extract_pages(tender_pdf_path)
	tender_text = "\n\n".join(
	f"--- PAGE {p['page']} ---\n{p['text']}" for p in pages
	)

	user_prompt = f"""{tender_text}

	---
	Return JSON in this exact format:
	{{"criteria": [
	{{"id": "C1", "title": "...", "category": "financial\|technical\|compliance",
	"mandatory": true, "description": "...",
	"rule": {{"type": "numeric_threshold\|count_threshold\|certification_present\|document_present",
	"field": "...", "operator": ">=\|<=\|==\|exists", "value": null, "unit": null}},
	"query_hints": ["...", "...", "..."],
	"source_page": 1, "source_clause": "3.2(a)"}},
	...
	]}}
	Each criterion must have all fields. Assign sequential IDs C1, C2, ...
	"""

	try:
	llm = _get_llm()
	result = llm.chat_json(EXTRACT_CRITERIA_PROMPT_SYSTEM, user_prompt)
	raw_list = result.get("criteria", [])
	criteria = [Criterion(**c) for c in raw_list]
	audit.log(
	"criteria_extracted",
	model_version=MODEL_VERSION,
	count=len(criteria),
	source=str(tender_pdf_path.name),
	)
	return criteria
	except LLMUnavailable:
	audit.log("precomputed_fallback_used", reason="LLMUnavailable in extract_criteria")
	st.session_state["fallback_active"] = True
	return fallback.load_criteria()