Spaces:

JaydeepR
/

TenderIQ

Sleeping

JaydeepR Claude Sonnet 4.6 commited on 15 days ago

Commit

661eb14

1 Parent(s): c589fa3

Step 1: project skeleton — all stubs, directory structure, and entry point

Creates every file listed in specs/00_skeleton.md: app.py with 5-tab Streamlit
shell, all core/ and ui/ stubs importable without logic, data/scripts/assets
directory tree, requirements.txt, packages.txt, .env.example.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (35) hide show

.env.example +1 -0
app.py +50 -0
assets/screenshots/.gitkeep +0 -0
core/__init__.py +0 -0
core/audit.py +6 -0
core/bidder_processor.py +10 -0
core/chunker.py +11 -0
core/config.py +21 -0
core/criteria_extractor.py +6 -0
core/evaluator.py +9 -0
core/fallback.py +9 -0
core/llm_client.py +22 -0
core/ocr_pipeline.py +13 -0
core/pdf_utils.py +14 -0
core/prompts.py +27 -0
core/schemas.py +68 -0
core/vectorstore.py +16 -0
data/bidders/bidder_a/.gitkeep +0 -0
data/bidders/bidder_b/.gitkeep +0 -0
data/bidders/bidder_c/.gitkeep +0 -0
data/precomputed/.gitkeep +0 -0
data/tender/.gitkeep +0 -0
deck/.gitkeep +0 -0
packages.txt +2 -0
requirements.txt +12 -0
scripts/generate_mock_data.py +1 -0
scripts/precompute_results.py +1 -0
scripts/smoke_test.py +1 -0
ui/__init__.py +0 -0
ui/components.py +1 -0
ui/tab_audit.py +5 -0
ui/tab_bidders.py +5 -0
ui/tab_overview.py +5 -0
ui/tab_review.py +5 -0
ui/tab_tender.py +5 -0

.env.example ADDED Viewed

	@@ -0,0 +1 @@


1	+ DEEPSEEK_API_KEY=your_key_here

app.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import streamlit as st
+from ui.tab_overview import render as render_overview
+from ui.tab_tender import render as render_tender
+from ui.tab_bidders import render as render_bidders
+from ui.tab_review import render as render_review
+from ui.tab_audit import render as render_audit
+st.set_page_config(
+    page_title="TenderIQ",
+    page_icon="⚖️",
+    layout="wide",
+)
+# ── Sidebar ──────────────────────────────────────────────────────────────────
+with st.sidebar:
+    st.markdown("## ⚖️ TenderIQ")
+    st.caption("Explainable AI for Tender Evaluation")
+    st.divider()
+    # Connection status — placeholder until core/llm_client.py is wired
+    st.markdown("🔴 **DeepSeek:** not connected")
+    st.divider()
+    if st.button("Reset Session", use_container_width=True):
+        for key in list(st.session_state.keys()):
+            del st.session_state[key]
+        st.rerun()
+# ── Tabs ─────────────────────────────────────────────────────────────────────
+tab1, tab2, tab3, tab4, tab5 = st.tabs([
+    "Overview",
+    "Tender Analysis",
+    "Bidder Evaluation",
+    "Human Review",
+    "Audit Log",
+])
+with tab1:
+    render_overview()
+with tab2:
+    render_tender()
+with tab3:
+    render_bidders()
+with tab4:
+    render_review()
+with tab5:
+    render_audit()

assets/screenshots/.gitkeep ADDED Viewed

File without changes

core/__init__.py ADDED Viewed

File without changes

core/audit.py ADDED Viewed

	@@ -0,0 +1,6 @@

+def log(action: str, actor: str = "system", **fields) -> int:
+    raise NotImplementedError
+def query(filters: dict | None = None) -> list[dict]:
+    raise NotImplementedError

core/bidder_processor.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from pathlib import Path
+from core.schemas import Criterion, Evidence
+def process_bidder(bidder_id: str, files: list[Path]) -> None:
+    raise NotImplementedError
+def gather_evidence(bidder_id: str, criterion: Criterion, k: int = 4) -> list[Evidence]:
+    raise NotImplementedError

core/chunker.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from core.ocr_pipeline import ExtractedPage
+def chunk_tender(pages: list[dict], tender_id: str) -> list[dict]:
+    raise NotImplementedError
+def chunk_bidder(
+    pages: list[ExtractedPage], bidder_id: str, doc_name: str
+) -> list[dict]:
+    raise NotImplementedError

core/config.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import os
+from pathlib import Path
+from dotenv import load_dotenv
+load_dotenv()
+DEEPSEEK_API_KEY: str | None = os.getenv("DEEPSEEK_API_KEY")
+DEEPSEEK_BASE_URL = "https://api.deepseek.com/v1"
+MODEL_NAME = "deepseek-chat"
+MODEL_VERSION = f"{MODEL_NAME}@2026-05-07"
+CONFIDENCE_HIGH = 0.80
+CONFIDENCE_REVIEW = 0.55
+OCR_TESSERACT_MIN_CONF = 0.65
+BASE_DIR = Path(__file__).resolve().parent.parent
+DATA_DIR = BASE_DIR / "data"
+CHROMA_DIR = str(BASE_DIR / ".chroma")
+AUDIT_DB = str(BASE_DIR / "audit.db")
+PRECOMPUTED_DIR = DATA_DIR / "precomputed"
+OCR_CACHE_DIR = BASE_DIR / ".ocr_cache"

core/criteria_extractor.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from pathlib import Path
+from core.schemas import Criterion
+def extract_criteria(tender_pdf_path: Path) -> list[Criterion]:
+    raise NotImplementedError

core/evaluator.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from core.schemas import Criterion, Verdict
+def evaluate(bidder_id: str, criterion: Criterion) -> Verdict:
+    raise NotImplementedError
+def evaluate_bidder(bidder_id: str, criteria: list[Criterion]) -> list[Verdict]:
+    raise NotImplementedError

core/fallback.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from core.schemas import Criterion, Verdict
+def load_criteria() -> list[Criterion]:
+    raise NotImplementedError
+def load_evaluation(bidder_id: str, criterion_id: str) -> Verdict:
+    raise NotImplementedError

core/llm_client.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from pathlib import Path
+class LLMUnavailable(Exception):
+    pass
+class LLM:
+    def __init__(self, api_key: str | None = None):
+        pass
+    def chat_json(self, system: str, user: str, max_retries: int = 2) -> dict:
+        raise NotImplementedError
+    def chat_vision(
+        self,
+        system: str,
+        user_text: str,
+        image: bytes | str | Path,
+        max_retries: int = 2,
+    ) -> str:
+        raise NotImplementedError

core/ocr_pipeline.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from pathlib import Path
+class ExtractedPage:
+    page: int
+    text: str
+    source_type: str  # "text_pdf" | "tesseract" | "vision_llm"
+    confidence: float
+    raw_tier_results: dict
+def extract_document(file_path: Path) -> list[ExtractedPage]:
+    raise NotImplementedError

core/pdf_utils.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from pathlib import Path
+import PIL.Image
+def extract_pages(path: Path) -> list[dict]:
+    raise NotImplementedError
+def is_text_pdf(path: Path) -> bool:
+    raise NotImplementedError
+def render_page_to_image(path: Path, page_no: int, dpi: int = 200) -> PIL.Image.Image:
+    raise NotImplementedError

core/prompts.py ADDED Viewed

	@@ -0,0 +1,27 @@

+EXTRACT_CRITERIA_PROMPT_SYSTEM = """\
+You are an expert in Indian government tender analysis (CRPF context). Your job is to extract \
+eligibility criteria from a tender document and return them as STRICT JSON. Never invent criteria \
+not present in the text. Classify each criterion as mandatory or optional based on cue words: \
+"shall", "must", "mandatory", "required", "minimum" → mandatory; "preferred", "desirable", \
+"may", "optionally" → optional. For each criterion, generate 3–5 short noun-phrase query_hints \
+that an evaluator would search for in bidder documents.\
+"""
+EVALUATE_CRITERION_PROMPT_SYSTEM = """\
+You are a procurement evaluator. Given ONE criterion and a list of retrieved evidence chunks from \
+a bidder's documents, decide eligible / not_eligible / needs_review. Always cite the strongest \
+single source. NEVER guess values not present in the evidence. If evidence is missing or \
+ambiguous, return needs_review with reason. Output STRICT JSON.\
+"""
+VISION_OCR_PROMPT_SYSTEM = """\
+You are an OCR engine for Indian government procurement documents. Transcribe the image text \
+faithfully, preserving numeric values, dates, certificate IDs, and tabular structure (use \
+markdown tables). Do NOT summarize, interpret, or omit anything. Output transcribed text only — \
+no commentary.\
+"""
+VISION_OCR_USER = (
+    "Transcribe this document page completely. Pay special attention to numeric values like "
+    "turnover figures (INR / Crore / Lakh), dates, and registration numbers."
+)

core/schemas.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from __future__ import annotations
+from typing import Literal, Optional
+from pydantic import BaseModel, Field
+import uuid
+class Rule(BaseModel):
+    type: Literal["numeric_threshold", "count_threshold", "certification_present", "document_present"]
+    field: str
+    operator: Literal[">=", "<=", "==", "exists"]
+    value: float | int | None = None
+    unit: str | None = None
+class Criterion(BaseModel):
+    id: str
+    title: str
+    category: Literal["financial", "technical", "compliance"]
+    mandatory: bool
+    description: str
+    rule: Rule
+    query_hints: list[str]
+    source_page: int
+    source_clause: str
+class Evidence(BaseModel):
+    bidder_id: str
+    doc_name: str
+    page: int
+    text: str
+    source_type: Literal["text_pdf", "tesseract", "vision_llm"]
+    ocr_confidence: float | None = None
+class Source(BaseModel):
+    doc_name: str
+    page: int
+    snippet: str
+    source_type: Literal["text_pdf", "tesseract", "vision_llm"]
+class Verdict(BaseModel):
+    verdict_id: str = Field(default_factory=lambda: f"V-{uuid.uuid4().hex[:8]}")
+    bidder_id: str
+    criterion_id: str
+    verdict: Literal["eligible", "not_eligible", "needs_review"]
+    extracted_value: str | None = None
+    normalized_value: float | int | None = None
+    source: Source | None = None
+    llm_confidence: float = 0.0
+    ocr_confidence: float | None = None
+    combined_confidence: float = 0.0
+    reason: str = ""
+    model_version: str = ""
+    timestamp: str = ""
+    review_status: Literal["pending", "approved", "edited", "rejected"] = "pending"
+class AuditEntry(BaseModel):
+    id: int | None = None
+    ts: str
+    action: str
+    actor: str
+    model_version: str | None = None
+    bidder_id: str | None = None
+    criterion_id: str | None = None
+    payload_json: str | None = None

core/vectorstore.py ADDED Viewed

	@@ -0,0 +1,16 @@

+def get_client():
+    raise NotImplementedError
+def get_collection(name: str):
+    raise NotImplementedError
+def add_chunks(collection, chunks: list[dict], metadatas: list[dict]) -> None:
+    raise NotImplementedError
+def query(
+    collection, text: str, k: int = 4, where: dict | None = None
+) -> list[dict]:
+    raise NotImplementedError

data/bidders/bidder_a/.gitkeep ADDED Viewed

File without changes

data/bidders/bidder_b/.gitkeep ADDED Viewed

File without changes

data/bidders/bidder_c/.gitkeep ADDED Viewed

File without changes

data/precomputed/.gitkeep ADDED Viewed

File without changes

data/tender/.gitkeep ADDED Viewed

File without changes

deck/.gitkeep ADDED Viewed

File without changes

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ tesseract-ocr
2	+ poppler-utils

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+streamlit==1.39.0
+openai==1.51.0
+pymupdf==1.24.10
+pytesseract==0.3.13
+Pillow==10.4.0
+numpy==1.26.4
+chromadb==0.5.5
+sentence-transformers==3.1.1
+pydantic==2.9.2
+python-dotenv==1.0.1
+reportlab==4.2.5
+pandas==2.2.3

scripts/generate_mock_data.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Step 2 — generates mock tender and bidder PDFs + noisy scan PNG."""

scripts/precompute_results.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Step 11 — runs the full pipeline and writes data/precomputed/*.json."""

scripts/smoke_test.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Step 13 — programmatic end-to-end check; exits 0 on success."""

ui/__init__.py ADDED Viewed

File without changes

ui/components.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Shared UI widgets — implemented incrementally as Tab 3 and Tab 4 need them.

ui/tab_audit.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import streamlit as st
+def render() -> None:
+    st.header("Audit Log")
+    st.info("Coming soon — sortable audit log with CSV export.")

ui/tab_bidders.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import streamlit as st
+def render() -> None:
+    st.header("Bidder Evaluation")
+    st.info("Coming soon — per-bidder, per-criterion verdict table.")

ui/tab_overview.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import streamlit as st
+def render() -> None:
+    st.header("Overview")
+    st.info("Coming soon — architecture diagram, KPIs, and demo CTA.")

ui/tab_review.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import streamlit as st
+def render() -> None:
+    st.header("Human Review Queue")
+    st.info("Coming soon — approve / edit / reject flagged verdicts.")

ui/tab_tender.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import streamlit as st
+def render() -> None:
+    st.header("Tender Analysis")
+    st.info("Coming soon — upload tender and extract eligibility criteria.")