JaydeepR Claude Sonnet 4.6 commited on
Commit
b384020
·
1 Parent(s): 3e90306

Fix ChromaDB tenant error; add real CRPF tenders; update Tab 2

Browse files

- core/vectorstore.py: switch to EphemeralClient (in-memory) — eliminates
ChromaDB 0.5.x tenant/SQLite compatibility error entirely; index rebuilds
on each evaluation run so persistence is not needed
- data/tender/real_tenders/: two real CRPF tender PDFs downloaded from
crpf.gov.in (Bhopal water tanks NIT-71 Aug-2025, J&K office repair Jan-2026)
- ui/tab_tender.py: pre-loaded tender selector with tabs (pre-loaded vs upload);
real tenders shown with green badge; active tender name always displayed

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

core/vectorstore.py CHANGED
@@ -7,14 +7,11 @@ from core.config import CHROMA_DIR
7
 
8
  @st.cache_resource
9
  def get_client():
10
- import shutil
11
  import chromadb
12
- try:
13
- return chromadb.PersistentClient(path=CHROMA_DIR)
14
- except ValueError:
15
- # Stale or version-incompatible .chroma directory — wipe and recreate
16
- shutil.rmtree(CHROMA_DIR, ignore_errors=True)
17
- return chromadb.PersistentClient(path=CHROMA_DIR)
18
 
19
 
20
  def get_collection(name: str):
 
7
 
8
  @st.cache_resource
9
  def get_client():
 
10
  import chromadb
11
+ # EphemeralClient (in-memory) avoids all ChromaDB 0.5.x tenant/SQLite
12
+ # compatibility issues. The index is rebuilt each evaluation run, so
13
+ # persistence is not needed.
14
+ return chromadb.EphemeralClient()
 
 
15
 
16
 
17
  def get_collection(name: str):
data/tender/real_tenders/crpf_bhopal_water_tanks_2025.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:718a035f8911294cd1777805dd388b1781edbc5527af7ba5275bb1e8343d888b
3
+ size 1267097
data/tender/real_tenders/crpf_jammu_office_repair_2026.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdc49ca6e2bc7a733c11b8672bf4adeceb487537f344d57ab0da67f18788e566
3
+ size 484042
ui/tab_overview.py CHANGED
@@ -27,7 +27,7 @@ def render() -> None:
27
  )
28
 
29
  # KPIs
30
- criteria_count = len(st.session_state.get("criteria", load_criteria()))
31
  verdicts = st.session_state.get("verdicts", {})
32
  checked = sum(1 for bv in verdicts.values() for _ in bv)
33
  audit_count = len(audit.query())
 
27
  )
28
 
29
  # KPIs
30
+ criteria_count = len(st.session_state.get("criteria") or [])
31
  verdicts = st.session_state.get("verdicts", {})
32
  checked = sum(1 for bv in verdicts.values() for _ in bv)
33
  audit_count = len(audit.query())
ui/tab_tender.py CHANGED
@@ -7,63 +7,105 @@ from core import criteria_extractor
7
  from core.config import DATA_DIR
8
 
9
  _MOCK_TENDER = DATA_DIR / "tender" / "crpf_construction_tender.pdf"
 
10
 
11
- _CATEGORY_COLORS = {
12
- "financial": "🔵",
13
- "technical": "🟢",
14
- "compliance": "🟠",
 
15
  }
16
 
 
 
17
 
18
  def render() -> None:
19
- st.header("Tender Analysis")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- uploaded = st.file_uploader("Upload tender PDF (leave blank to use pre-loaded mock)", type=["pdf"])
 
 
 
 
 
 
22
 
23
- if uploaded:
24
- tender_bytes = uploaded.read()
25
- tender_name = uploaded.name
26
- with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
27
- tmp.write(tender_bytes)
28
- tender_path = Path(tmp.name)
29
- else:
30
- tender_path = _MOCK_TENDER
31
- tender_name = _MOCK_TENDER.name
32
 
33
- st.caption(f"Using: **{tender_name}**")
34
 
 
35
  if st.button("Extract Criteria (Live LLM)", type="primary"):
36
  with st.spinner("Calling DeepSeek to extract eligibility criteria…"):
37
  criteria = criteria_extractor.extract_criteria(tender_path)
38
- st.session_state["criteria"] = [c.model_dump() for c in criteria]
39
- st.session_state["tender_path"] = str(tender_path)
 
40
 
 
41
  criteria_data = st.session_state.get("criteria")
42
- if criteria_data:
43
- st.success(f"Extracted **{len(criteria_data)}** criteria")
44
-
45
- if st.session_state.get("fallback_active"):
46
- st.warning("⚠ Live API unavailable — showing pre-computed criteria.")
47
-
48
- for c in criteria_data:
49
- mandatory_badge = "🔴 Mandatory" if c["mandatory"] else "🟡 Optional"
50
- cat_icon = _CATEGORY_COLORS.get(c["category"], "⚪")
51
- label = f"{cat_icon} **{c['id']}** — {c['title']} {mandatory_badge}"
52
- with st.expander(label, expanded=False):
53
- col1, col2 = st.columns([2, 1])
54
- with col1:
55
- st.markdown(f"**Description:** {c['description']}")
56
- rule = c["rule"]
57
- rule_parts = [f"Type: `{rule['type']}`", f"Field: `{rule['field']}`",
58
- f"Operator: `{rule['operator']}`"]
59
- if rule.get("value") is not None:
60
- rule_parts.append(f"Value: `{rule['value']}`")
61
- if rule.get("unit"):
62
- rule_parts.append(f"Unit: `{rule['unit']}`")
63
- st.markdown(" · ".join(rule_parts))
64
- with col2:
65
- st.markdown(f"**Category:** {c['category'].capitalize()}")
66
- st.markdown(f"**Source:** Page {c['source_page']}, Clause {c['source_clause']}")
67
- if c.get("query_hints"):
68
- hints = ", ".join(f"`{h}`" for h in c["query_hints"])
69
- st.markdown(f"**Query hints:** {hints}")
 
 
 
 
 
 
 
 
 
 
7
  from core.config import DATA_DIR
8
 
9
  _MOCK_TENDER = DATA_DIR / "tender" / "crpf_construction_tender.pdf"
10
+ _REAL_DIR = DATA_DIR / "tender" / "real_tenders"
11
 
12
+ _REAL_LABELS = {
13
+ "crpf_bhopal_water_tanks_2025.pdf":
14
+ "CRPF GC Bhopal — Water Storage Tanks (NIT-71, Aug 2025, Est. ₹62.9L)",
15
+ "crpf_jammu_office_repair_2026.pdf":
16
+ "CRPF J&K Zone HQ Jammu — Office Building Repair (Jan 2026, Est. ₹24.3L)",
17
  }
18
 
19
+ _CAT_ICONS = {"financial": "🔵", "technical": "🟢", "compliance": "🟠"}
20
+
21
 
22
  def render() -> None:
23
+ st.markdown(
24
+ '<h2 style="font-family:Inter,sans-serif;font-weight:800;font-size:1.5rem;'
25
+ 'color:#0D1B2A;margin-bottom:4px;">Tender Analysis</h2>'
26
+ '<p style="color:#64748B;font-size:0.875rem;margin-bottom:1rem;">'
27
+ 'Extract eligibility criteria from any tender PDF using DeepSeek.</p>',
28
+ unsafe_allow_html=True,
29
+ )
30
+
31
+ # ── Tender source selector ────────────────────────────────────────────────
32
+ real_files = sorted(_REAL_DIR.glob("*.pdf")) if _REAL_DIR.exists() else []
33
+
34
+ preset_options = {"Mock tender (CRPF Construction, 5 criteria)": _MOCK_TENDER}
35
+ for f in real_files:
36
+ preset_options[_REAL_LABELS.get(f.name, f.name)] = f
37
+
38
+ tab_preset, tab_upload = st.tabs(["📂 Pre-loaded Tenders", "⬆️ Upload Your Own"])
39
+
40
+ with tab_preset:
41
+ chosen_label = st.selectbox("Select tender", options=list(preset_options.keys()))
42
+ tender_path = preset_options[chosen_label]
43
+ tender_name = tender_path.name
44
 
45
+ if real_files and chosen_label != "Mock tender (CRPF Construction, 5 criteria)":
46
+ st.markdown(
47
+ '<div style="background:#F0FDF4;border:1px solid #BBF7D0;border-radius:8px;'
48
+ 'padding:10px 14px;font-size:0.83rem;color:#166534;margin-top:6px;">'
49
+ '✅ <strong>Real government tender</strong> — downloaded from crpf.gov.in</div>',
50
+ unsafe_allow_html=True,
51
+ )
52
 
53
+ with tab_upload:
54
+ uploaded = st.file_uploader("Upload a tender PDF", type=["pdf"],
55
+ label_visibility="collapsed")
56
+ if uploaded:
57
+ tender_bytes = uploaded.read()
58
+ tender_name = uploaded.name
59
+ with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
60
+ tmp.write(tender_bytes)
61
+ tender_path = Path(tmp.name)
62
 
63
+ st.caption(f"Active: **{tender_name}**")
64
 
65
+ # ── Extract button ────────────────────────────────────────────────────────
66
  if st.button("Extract Criteria (Live LLM)", type="primary"):
67
  with st.spinner("Calling DeepSeek to extract eligibility criteria…"):
68
  criteria = criteria_extractor.extract_criteria(tender_path)
69
+ st.session_state["criteria"] = [c.model_dump() for c in criteria]
70
+ st.session_state["tender_path"] = str(tender_path)
71
+ st.success(f"Extracted {len(criteria)} criteria.")
72
 
73
+ # ── Display results ───────────────────────────────────────────────────────
74
  criteria_data = st.session_state.get("criteria")
75
+ if not criteria_data:
76
+ return
77
+
78
+ if st.session_state.get("fallback_active"):
79
+ st.warning("⚠ Live API unavailable — showing pre-computed criteria.")
80
+
81
+ st.markdown(
82
+ f'<div style="font-size:0.9rem;font-weight:700;color:#0D1B2A;margin:1rem 0 0.5rem;">'
83
+ f'Extracted {len(criteria_data)} criteria</div>',
84
+ unsafe_allow_html=True,
85
+ )
86
+
87
+ for c in criteria_data:
88
+ icon = _CAT_ICONS.get(c["category"], "⚪")
89
+ mand_lbl = "🔴 Mandatory" if c["mandatory"] else "🟡 Optional"
90
+ rule = c["rule"]
91
+ rule_str = f'`{rule["type"]}` · `{rule["field"]} {rule["operator"]}'
92
+ if rule.get("value") is not None:
93
+ rule_str += f' {rule["value"]}'
94
+ if rule.get("unit"):
95
+ rule_str += f' {rule["unit"]}'
96
+ rule_str += "`"
97
+
98
+ with st.expander(
99
+ f'{icon} **{c["id"]}** {c["title"]} · {mand_lbl}',
100
+ expanded=False,
101
+ ):
102
+ col1, col2 = st.columns([2, 1])
103
+ with col1:
104
+ st.markdown(f"**Description:** {c['description']}")
105
+ st.markdown(f"**Rule:** {rule_str}")
106
+ if c.get("query_hints"):
107
+ hints = " · ".join(f"`{h}`" for h in c["query_hints"])
108
+ st.markdown(f"**Query hints:** {hints}")
109
+ with col2:
110
+ st.markdown(f"**Category:** {c['category'].capitalize()}")
111
+ st.markdown(f"**Source:** Page {c['source_page']}, Clause `{c['source_clause']}`")