"""PDF -> structured data -> DOCX/XLSX tender package pipeline.""" from __future__ import annotations from pathlib import Path from .checker.rate_checker import check_rates from .generators import fill_docx_template, generate_boq_excel, generate_work_plan_excel from .models import JVPartner, TenderData from .parser import parse_boq, parse_notice, parse_tds from .reports.report_generator import generate_rate_check_excel, generate_summary_txt from .sor import active_sor_paths, build_sor_lookup, detect_bwdb_zone, parse_bwdb_sor from .utils import amount_to_words_bd, date_to_document, date_to_long, ensure_output_folder, format_bdt, save_json, print_summary DOCX_TEMPLATES = { "bg_hb": "BG_HB_template.docx", "bg_credit_line": "BG_Credit_Line_template.docx", "equipment_decl": "Equipment_Declaration_template.docx", "manpower_decl": "Manpower_Declaration_template.docx", "methodology": "Methodology_template.docx", "jv_deed": "JV_DEED_template.docx", "jv_poa": "JV_POA_template.docx", } OUTPUT_FILENAMES = { "bg_hb": "2. BG_HB-{tender_id}.docx", "bg_credit_line": "3. BG_Credit_Line-{tender_id}.docx", "equipment_decl": "4. Equipment_Declaration-{tender_id}.docx", "manpower_decl": "5. Manpower_Declaration-{tender_id}.docx", "methodology": "6. Methodology-{tender_id}.docx", "jv_deed": "JV_DEED-{jv_name}-{tender_id}.docx", "jv_poa": "JV_POA-{jv_name}-{tender_id}.docx", } def run_pipeline(input_folder: str, template_folder: str, output_base: str, firm_config: dict | None = None) -> Path: input_path = Path(input_folder) template_path = Path(template_folder) firm_config = firm_config or {} notice_pdf = _find_pdf(input_path, ["notice", "ift", "1."]) tds1_pdf = _find_pdf(input_path, ["tds_1", "tds1", "2.tds", "2."]) tds2_pdf = _find_pdf(input_path, ["tds_2", "tds2", "3.tds", "3."]) boq_pdf = _find_pdf(input_path, ["boq", "4.boq", "4."]) print(f"\n[PARSING] {input_path}") print(f" Notice: {notice_pdf.name if notice_pdf else 'NOT FOUND'}") print(f" TDS 1 : {tds1_pdf.name if tds1_pdf else 'NOT FOUND'}") print(f" TDS 2 : {tds2_pdf.name if tds2_pdf else 'NOT FOUND'}") print(f" BOQ : {boq_pdf.name if boq_pdf else 'NOT FOUND'}") notice = parse_notice(str(notice_pdf)) if notice_pdf else {} tds = parse_tds(str(tds1_pdf or notice_pdf), str(tds2_pdf) if tds2_pdf else None) if (tds1_pdf or notice_pdf) else {} boq = parse_boq(str(boq_pdf)) if boq_pdf else {"boq_items": [], "departmental_estimate": 0} td = _build_tender_data(notice, tds, boq, firm_config) output_folder = ensure_output_folder(output_base, td.tender_id or "unknown") save_json(td, output_folder) generate_docs = firm_config.get("generate_docs", {}) or {} if _doc_enabled("boq_excel", generate_docs): generate_boq_excel(str(output_folder / f"BOQ-{td.tender_id}.xlsx"), td) if _doc_enabled("work_plan", generate_docs): generate_work_plan_excel(str(output_folder / f"7. Work_Plan-{td.tender_id}.xlsx"), td) if _doc_enabled("rate_check", generate_docs): _generate_rate_reports(td, output_folder) docx_dir = template_path / "docx" for key, template_name in DOCX_TEMPLATES.items(): if not _doc_enabled(key, generate_docs): continue if key in {"jv_deed", "jv_poa"} and not td.is_jv: continue template_file = docx_dir / template_name if not template_file.exists(): print(f" [SKIP] Template missing: {template_name}") continue output_name = OUTPUT_FILENAMES[key].format(tender_id=td.tender_id, jv_name=td.jv_name or "JV") fill_docx_template(str(template_file), str(output_folder / output_name), td) print_summary(output_folder) return output_folder def _doc_enabled(key: str, generate_docs: dict) -> bool: if not generate_docs: return True aliases = { "bg_hb": ["bg_hb", "bg_hb_sbac", "bg_hb-sinamm-ti"], "bg_credit_line": ["bg_credit_line", "bg_credit_line_hb", "bg_credit_line_HB"], "equipment_decl": ["equipment_decl", "equipment_declaration"], "manpower_decl": ["manpower_decl", "manpower_declaration"], "boq_excel": ["boq_excel", "boq"], "work_plan": ["work_plan"], "rate_check": ["rate_check"], "methodology": ["methodology"], "jv_deed": ["jv_deed"], "jv_poa": ["jv_poa"], } return any(bool(generate_docs.get(alias)) for alias in aliases.get(key, [key])) def _generate_rate_reports(td: TenderData, output_folder: Path) -> None: try: bwdb_pdf = active_sor_paths().get("BWDB", "") sor_items = parse_bwdb_sor(str(bwdb_pdf or "")) if not sor_items: print(" [SKIP] No BWDB SOR data found.") return zone = detect_bwdb_zone(td.location or td.pe_division or td.procuring_entity) summary = check_rates(td.boq_items, build_sor_lookup(sor_items), zone, td.tender_id) generate_rate_check_excel(summary, str(output_folder / f"Rate_Check-{td.tender_id}.xlsx")) generate_summary_txt(summary, str(output_folder / f"Rate_Check_Summary-{td.tender_id}.txt")) except Exception as exc: print(f" [SKIP] Rate check failed: {exc}") def _build_tender_data(notice: dict, tds: dict, boq: dict, firm: dict) -> TenderData: tender_id = _first(firm, notice, "tender_id") start_date = _first(firm, notice, "start_date") completion_date = _first(firm, notice, "completion_date") tender_security = float(_first(firm, notice, "tender_security_amount", 0) or 0) quoted_pct = float(_first(firm, boq, "quoted_rate_percent", 0) or 0) dept_estimate = float(_first(firm, boq, "departmental_estimate", 0) or 0) quoted_total = float(_first(firm, boq, "quoted_total", 0) or 0) or dept_estimate * (1 + quoted_pct) work_name = _first(firm, notice, "work_name") partners = _partners_from_context(firm) return TenderData( tender_id=tender_id, invitation_ref_no=_first(firm, notice, "invitation_ref_no"), package_no=_first(firm, notice, "package_no"), project_code=_first(firm, notice, "project_code"), procuring_entity=_first(firm, notice, "procuring_entity"), procuring_entity_short=firm.get("procuring_entity_short", "BWDB"), executive_engineer=_first(firm, notice, "executive_engineer"), pe_address=_first(firm, notice, "pe_address"), pe_division=firm.get("pe_division") or _first(firm, notice, "procuring_entity"), work_name=work_name, work_name_short=work_name[:80] + "..." if len(work_name) > 80 else work_name, location=_first(firm, notice, "location"), project_name=_first(firm, notice, "project_name"), publication_date=_first(firm, notice, "publication_date"), closing_date=_first(firm, notice, "closing_date"), start_date=start_date, completion_date=completion_date, completion_date_long=date_to_long(completion_date) if completion_date else "", bg_validity_date=firm.get("bg_validity_date") or "", document_date=firm.get("document_date") or date_to_document(_first(firm, notice, "closing_date")), tender_security_amount=tender_security, tender_security_amount_words=amount_to_words_bd(tender_security), tender_security_bdt=format_bdt(tender_security), liquid_assets_required_lakh=float(tds.get("liquid_assets_required_lakh", 0) or 0), annual_turnover_required_lakh=float(tds.get("annual_turnover_required_lakh", 0) or 0), tender_capacity_lakh=float(tds.get("tender_capacity_lakh", 0) or 0), document_fee_bdt=float(_first(firm, notice, "document_fee_bdt", 4000) or 4000), quoted_rate_percent=quoted_pct, departmental_estimate=dept_estimate, quoted_total=quoted_total, general_exp_years=int(tds.get("general_exp_years", 5) or 5), specific_exp_contracts=int(firm.get("specific_exp_contracts", 1) or 1), specific_exp_value_lakh=float(tds.get("specific_exp_value_lakh", 0) or 0), specific_exp_years=int(tds.get("specific_exp_years", 5) or 5), specific_exp_nature=tds.get("specific_exp_nature", ""), bank_name=firm.get("bank_name", ""), bank_branch=firm.get("bank_branch", ""), bank_guarantee_no=firm.get("bank_guarantee_no", ""), bg_date=firm.get("bg_date") or date_to_document(_first(firm, notice, "closing_date")), firm_name=firm.get("firm_name", ""), firm_address=firm.get("firm_address", ""), proprietor_name=firm.get("proprietor_name", ""), egp_email=firm.get("egp_email", ""), memo_no=firm.get("memo_no", ""), is_jv=bool(firm.get("is_jv", False)), jv_name=firm.get("jv_name", ""), jv_date=firm.get("jv_date", ""), jv_partner_count=int(firm.get("jv_partner_count", firm.get("jb_partner", 0)) or 0), jv_share_text=firm.get("jv_share_text", firm.get("jv_share", "")), jv_partners=partners, jv_office_address=firm.get("jv_office_address", firm.get("firm_address", "")), jv_phone=firm.get("jv_phone", ""), lead_partner=firm.get("lead_partner", firm.get("Lead Partner", "")), nominated_partner=firm.get("nominated_partner", firm.get("Nominated Partner", firm.get("Nminated Partner", ""))), partner_in_charge_name=firm.get("partner_in_charge_name", ""), partner_in_charge_firm=firm.get("partner_in_charge_firm", ""), equipment=tds.get("equipment", []), manpower=tds.get("manpower", []), boq_items=boq.get("boq_items", []), rate_schedule_ref=firm.get("rate_schedule_ref", "BWDB, 2019-20 Rate Schedule"), work_activities=firm.get("work_activities", []), work_start_year=_year(start_date, 2021), work_end_year=_year(completion_date, 2022), work_months=firm.get("work_months", [ "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "Jan", "Feb", "Mar", "Apr", "May", "Jun", ]), **_partner_kwargs(firm), ) def _partners_from_context(firm: dict) -> list[JVPartner]: partners = [] for idx in range(1, 4): name = firm.get(f"partner{idx}_firm_name", "") if not name: continue partners.append(JVPartner( code=firm.get(f"partner{idx}_code", ""), name=name, legal_type=firm.get(f"partner{idx}_legal_type", ""), address=firm.get(f"partner{idx}_address", ""), signatory_name=firm.get(f"partner{idx}_signatory_name", ""), position=firm.get(f"partner{idx}_position", ""), role="lead" if idx == 1 else "partner", share_percent=float(firm.get(f"partner{idx}_share_percent", 0) or 0), share_words=firm.get(f"partner{idx}_share_words", ""), )) return partners def _partner_kwargs(firm: dict) -> dict: data = {} for idx in range(1, 4): prefix = f"partner{idx}_" data[prefix + "code"] = firm.get(prefix + "code", "") data[prefix + "firm_name"] = firm.get(prefix + "firm_name", "") data[prefix + "legal_type"] = firm.get(prefix + "legal_type", "") data[prefix + "address"] = firm.get(prefix + "address", "") data[prefix + "signatory_name"] = firm.get(prefix + "signatory_name", "") data[prefix + "position"] = firm.get(prefix + "position", "") data[prefix + "share_percent"] = float(firm.get(prefix + "share_percent", 0) or 0) data[prefix + "share_words"] = firm.get(prefix + "share_words", "") return data def _first(primary: dict, secondary: dict, key: str, default=""): value = primary.get(key) if value not in (None, "", []): return value value = secondary.get(key) return default if value in (None, "", []) else value def _year(date_value: str, default: int) -> int: try: return int(str(date_value).split("-")[-1]) except Exception: return default def _find_pdf(folder: Path, keywords: list[str]) -> Path | None: for pdf in sorted(folder.glob("*.pdf")): name = pdf.name.lower().replace(" ", "") for key in keywords: key = key.lower() if key[0].isdigit(): if name.startswith(key): return pdf elif key in name: return pdf return None