JaydeepR Claude Sonnet 4.6 commited on
Commit
661eb14
·
1 Parent(s): c589fa3

Step 1: project skeleton — all stubs, directory structure, and entry point

Browse files

Creates every file listed in specs/00_skeleton.md: app.py with 5-tab Streamlit
shell, all core/ and ui/ stubs importable without logic, data/scripts/assets
directory tree, requirements.txt, packages.txt, .env.example.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

.env.example ADDED
@@ -0,0 +1 @@
 
 
1
+ DEEPSEEK_API_KEY=your_key_here
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from ui.tab_overview import render as render_overview
4
+ from ui.tab_tender import render as render_tender
5
+ from ui.tab_bidders import render as render_bidders
6
+ from ui.tab_review import render as render_review
7
+ from ui.tab_audit import render as render_audit
8
+
9
+ st.set_page_config(
10
+ page_title="TenderIQ",
11
+ page_icon="⚖️",
12
+ layout="wide",
13
+ )
14
+
15
+ # ── Sidebar ──────────────────────────────────────────────────────────────────
16
+ with st.sidebar:
17
+ st.markdown("## ⚖️ TenderIQ")
18
+ st.caption("Explainable AI for Tender Evaluation")
19
+ st.divider()
20
+ # Connection status — placeholder until core/llm_client.py is wired
21
+ st.markdown("🔴 **DeepSeek:** not connected")
22
+ st.divider()
23
+ if st.button("Reset Session", use_container_width=True):
24
+ for key in list(st.session_state.keys()):
25
+ del st.session_state[key]
26
+ st.rerun()
27
+
28
+ # ── Tabs ─────────────────────────────────────────────────────────────────────
29
+ tab1, tab2, tab3, tab4, tab5 = st.tabs([
30
+ "Overview",
31
+ "Tender Analysis",
32
+ "Bidder Evaluation",
33
+ "Human Review",
34
+ "Audit Log",
35
+ ])
36
+
37
+ with tab1:
38
+ render_overview()
39
+
40
+ with tab2:
41
+ render_tender()
42
+
43
+ with tab3:
44
+ render_bidders()
45
+
46
+ with tab4:
47
+ render_review()
48
+
49
+ with tab5:
50
+ render_audit()
assets/screenshots/.gitkeep ADDED
File without changes
core/__init__.py ADDED
File without changes
core/audit.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ def log(action: str, actor: str = "system", **fields) -> int:
2
+ raise NotImplementedError
3
+
4
+
5
+ def query(filters: dict | None = None) -> list[dict]:
6
+ raise NotImplementedError
core/bidder_processor.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from core.schemas import Criterion, Evidence
3
+
4
+
5
+ def process_bidder(bidder_id: str, files: list[Path]) -> None:
6
+ raise NotImplementedError
7
+
8
+
9
+ def gather_evidence(bidder_id: str, criterion: Criterion, k: int = 4) -> list[Evidence]:
10
+ raise NotImplementedError
core/chunker.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from core.ocr_pipeline import ExtractedPage
2
+
3
+
4
+ def chunk_tender(pages: list[dict], tender_id: str) -> list[dict]:
5
+ raise NotImplementedError
6
+
7
+
8
+ def chunk_bidder(
9
+ pages: list[ExtractedPage], bidder_id: str, doc_name: str
10
+ ) -> list[dict]:
11
+ raise NotImplementedError
core/config.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+ DEEPSEEK_API_KEY: str | None = os.getenv("DEEPSEEK_API_KEY")
8
+ DEEPSEEK_BASE_URL = "https://api.deepseek.com/v1"
9
+ MODEL_NAME = "deepseek-chat"
10
+ MODEL_VERSION = f"{MODEL_NAME}@2026-05-07"
11
+
12
+ CONFIDENCE_HIGH = 0.80
13
+ CONFIDENCE_REVIEW = 0.55
14
+ OCR_TESSERACT_MIN_CONF = 0.65
15
+
16
+ BASE_DIR = Path(__file__).resolve().parent.parent
17
+ DATA_DIR = BASE_DIR / "data"
18
+ CHROMA_DIR = str(BASE_DIR / ".chroma")
19
+ AUDIT_DB = str(BASE_DIR / "audit.db")
20
+ PRECOMPUTED_DIR = DATA_DIR / "precomputed"
21
+ OCR_CACHE_DIR = BASE_DIR / ".ocr_cache"
core/criteria_extractor.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from core.schemas import Criterion
3
+
4
+
5
+ def extract_criteria(tender_pdf_path: Path) -> list[Criterion]:
6
+ raise NotImplementedError
core/evaluator.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from core.schemas import Criterion, Verdict
2
+
3
+
4
+ def evaluate(bidder_id: str, criterion: Criterion) -> Verdict:
5
+ raise NotImplementedError
6
+
7
+
8
+ def evaluate_bidder(bidder_id: str, criteria: list[Criterion]) -> list[Verdict]:
9
+ raise NotImplementedError
core/fallback.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from core.schemas import Criterion, Verdict
2
+
3
+
4
+ def load_criteria() -> list[Criterion]:
5
+ raise NotImplementedError
6
+
7
+
8
+ def load_evaluation(bidder_id: str, criterion_id: str) -> Verdict:
9
+ raise NotImplementedError
core/llm_client.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+
4
+ class LLMUnavailable(Exception):
5
+ pass
6
+
7
+
8
+ class LLM:
9
+ def __init__(self, api_key: str | None = None):
10
+ pass
11
+
12
+ def chat_json(self, system: str, user: str, max_retries: int = 2) -> dict:
13
+ raise NotImplementedError
14
+
15
+ def chat_vision(
16
+ self,
17
+ system: str,
18
+ user_text: str,
19
+ image: bytes | str | Path,
20
+ max_retries: int = 2,
21
+ ) -> str:
22
+ raise NotImplementedError
core/ocr_pipeline.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+
4
+ class ExtractedPage:
5
+ page: int
6
+ text: str
7
+ source_type: str # "text_pdf" | "tesseract" | "vision_llm"
8
+ confidence: float
9
+ raw_tier_results: dict
10
+
11
+
12
+ def extract_document(file_path: Path) -> list[ExtractedPage]:
13
+ raise NotImplementedError
core/pdf_utils.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import PIL.Image
3
+
4
+
5
+ def extract_pages(path: Path) -> list[dict]:
6
+ raise NotImplementedError
7
+
8
+
9
+ def is_text_pdf(path: Path) -> bool:
10
+ raise NotImplementedError
11
+
12
+
13
+ def render_page_to_image(path: Path, page_no: int, dpi: int = 200) -> PIL.Image.Image:
14
+ raise NotImplementedError
core/prompts.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ EXTRACT_CRITERIA_PROMPT_SYSTEM = """\
2
+ You are an expert in Indian government tender analysis (CRPF context). Your job is to extract \
3
+ eligibility criteria from a tender document and return them as STRICT JSON. Never invent criteria \
4
+ not present in the text. Classify each criterion as mandatory or optional based on cue words: \
5
+ "shall", "must", "mandatory", "required", "minimum" → mandatory; "preferred", "desirable", \
6
+ "may", "optionally" → optional. For each criterion, generate 3–5 short noun-phrase query_hints \
7
+ that an evaluator would search for in bidder documents.\
8
+ """
9
+
10
+ EVALUATE_CRITERION_PROMPT_SYSTEM = """\
11
+ You are a procurement evaluator. Given ONE criterion and a list of retrieved evidence chunks from \
12
+ a bidder's documents, decide eligible / not_eligible / needs_review. Always cite the strongest \
13
+ single source. NEVER guess values not present in the evidence. If evidence is missing or \
14
+ ambiguous, return needs_review with reason. Output STRICT JSON.\
15
+ """
16
+
17
+ VISION_OCR_PROMPT_SYSTEM = """\
18
+ You are an OCR engine for Indian government procurement documents. Transcribe the image text \
19
+ faithfully, preserving numeric values, dates, certificate IDs, and tabular structure (use \
20
+ markdown tables). Do NOT summarize, interpret, or omit anything. Output transcribed text only — \
21
+ no commentary.\
22
+ """
23
+
24
+ VISION_OCR_USER = (
25
+ "Transcribe this document page completely. Pay special attention to numeric values like "
26
+ "turnover figures (INR / Crore / Lakh), dates, and registration numbers."
27
+ )
core/schemas.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from typing import Literal, Optional
3
+ from pydantic import BaseModel, Field
4
+ import uuid
5
+
6
+
7
+ class Rule(BaseModel):
8
+ type: Literal["numeric_threshold", "count_threshold", "certification_present", "document_present"]
9
+ field: str
10
+ operator: Literal[">=", "<=", "==", "exists"]
11
+ value: float | int | None = None
12
+ unit: str | None = None
13
+
14
+
15
+ class Criterion(BaseModel):
16
+ id: str
17
+ title: str
18
+ category: Literal["financial", "technical", "compliance"]
19
+ mandatory: bool
20
+ description: str
21
+ rule: Rule
22
+ query_hints: list[str]
23
+ source_page: int
24
+ source_clause: str
25
+
26
+
27
+ class Evidence(BaseModel):
28
+ bidder_id: str
29
+ doc_name: str
30
+ page: int
31
+ text: str
32
+ source_type: Literal["text_pdf", "tesseract", "vision_llm"]
33
+ ocr_confidence: float | None = None
34
+
35
+
36
+ class Source(BaseModel):
37
+ doc_name: str
38
+ page: int
39
+ snippet: str
40
+ source_type: Literal["text_pdf", "tesseract", "vision_llm"]
41
+
42
+
43
+ class Verdict(BaseModel):
44
+ verdict_id: str = Field(default_factory=lambda: f"V-{uuid.uuid4().hex[:8]}")
45
+ bidder_id: str
46
+ criterion_id: str
47
+ verdict: Literal["eligible", "not_eligible", "needs_review"]
48
+ extracted_value: str | None = None
49
+ normalized_value: float | int | None = None
50
+ source: Source | None = None
51
+ llm_confidence: float = 0.0
52
+ ocr_confidence: float | None = None
53
+ combined_confidence: float = 0.0
54
+ reason: str = ""
55
+ model_version: str = ""
56
+ timestamp: str = ""
57
+ review_status: Literal["pending", "approved", "edited", "rejected"] = "pending"
58
+
59
+
60
+ class AuditEntry(BaseModel):
61
+ id: int | None = None
62
+ ts: str
63
+ action: str
64
+ actor: str
65
+ model_version: str | None = None
66
+ bidder_id: str | None = None
67
+ criterion_id: str | None = None
68
+ payload_json: str | None = None
core/vectorstore.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def get_client():
2
+ raise NotImplementedError
3
+
4
+
5
+ def get_collection(name: str):
6
+ raise NotImplementedError
7
+
8
+
9
+ def add_chunks(collection, chunks: list[dict], metadatas: list[dict]) -> None:
10
+ raise NotImplementedError
11
+
12
+
13
+ def query(
14
+ collection, text: str, k: int = 4, where: dict | None = None
15
+ ) -> list[dict]:
16
+ raise NotImplementedError
data/bidders/bidder_a/.gitkeep ADDED
File without changes
data/bidders/bidder_b/.gitkeep ADDED
File without changes
data/bidders/bidder_c/.gitkeep ADDED
File without changes
data/precomputed/.gitkeep ADDED
File without changes
data/tender/.gitkeep ADDED
File without changes
deck/.gitkeep ADDED
File without changes
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ tesseract-ocr
2
+ poppler-utils
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.39.0
2
+ openai==1.51.0
3
+ pymupdf==1.24.10
4
+ pytesseract==0.3.13
5
+ Pillow==10.4.0
6
+ numpy==1.26.4
7
+ chromadb==0.5.5
8
+ sentence-transformers==3.1.1
9
+ pydantic==2.9.2
10
+ python-dotenv==1.0.1
11
+ reportlab==4.2.5
12
+ pandas==2.2.3
scripts/generate_mock_data.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Step 2 — generates mock tender and bidder PDFs + noisy scan PNG."""
scripts/precompute_results.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Step 11 — runs the full pipeline and writes data/precomputed/*.json."""
scripts/smoke_test.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Step 13 — programmatic end-to-end check; exits 0 on success."""
ui/__init__.py ADDED
File without changes
ui/components.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Shared UI widgets — implemented incrementally as Tab 3 and Tab 4 need them.
ui/tab_audit.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def render() -> None:
4
+ st.header("Audit Log")
5
+ st.info("Coming soon — sortable audit log with CSV export.")
ui/tab_bidders.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def render() -> None:
4
+ st.header("Bidder Evaluation")
5
+ st.info("Coming soon — per-bidder, per-criterion verdict table.")
ui/tab_overview.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def render() -> None:
4
+ st.header("Overview")
5
+ st.info("Coming soon — architecture diagram, KPIs, and demo CTA.")
ui/tab_review.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def render() -> None:
4
+ st.header("Human Review Queue")
5
+ st.info("Coming soon — approve / edit / reject flagged verdicts.")
ui/tab_tender.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def render() -> None:
4
+ st.header("Tender Analysis")
5
+ st.info("Coming soon — upload tender and extract eligibility criteria.")