| """Pydantic v2 mirror models for the JSON schemas. |
| |
| Purpose: runtime field validation in the extract_subgraph |
| (``InvoiceModel.model_validate(...)``) and type-strong downstream nodes (the |
| risk_subgraph receives Pydantic-typed data). |
| |
| JSON schema remains the source of truth for the LLM ``with_structured_output()`` |
| calls — the Pydantic mirror is for VALIDATION ONLY, it does not replace the |
| JSON schema. |
| |
| The ``_quotes`` and ``_confidence`` fields are aliased in the JSON |
| (``"alias_": ...``); we keep the aliases here too so the JSON parses cleanly. |
| """ |
|
|
| from __future__ import annotations |
|
|
| from pydantic import BaseModel, ConfigDict, Field |
|
|
| |
| |
| |
|
|
|
|
| class Party(BaseModel): |
| """A party (issuer, customer, contracting party).""" |
|
|
| name: str | None = None |
| tax_id: str | None = None |
| address: str | None = None |
| role: str | None = None |
| contact: str | None = None |
|
|
|
|
| class SourceRef(BaseModel): |
| file_name: str | None = None |
| page_number: int | None = None |
|
|
|
|
| |
| |
| |
|
|
|
|
| class InvoiceItem(BaseModel): |
| item_code: str | None = None |
| description: str | None = None |
| quantity: float | None = None |
| unit: str | None = None |
| unit_price_net: float | None = None |
| vat_rate: float | None = None |
| total_net: float | None = None |
| total_vat: float | None = None |
| total_gross: float | None = None |
|
|
|
|
| class InvoiceModel(BaseModel): |
| model_config = ConfigDict(populate_by_name=True, extra="ignore") |
|
|
| invoice_number: str | None = None |
| issue_date: str | None = None |
| fulfillment_date: str | None = None |
| payment_due_date: str | None = None |
| payment_method: str | None = None |
| currency: str = "USD" |
| issuer: Party | None = None |
| customer: Party | None = None |
| line_items: list[InvoiceItem] = Field(default_factory=list) |
| total_net: float | None = None |
| total_vat: float | None = None |
| total_gross: float | None = None |
| quotes: list[str] = Field(default_factory=list, alias="_quotes") |
| confidence: dict = Field(default_factory=dict, alias="_confidence") |
| source: SourceRef | None = Field(default=None, alias="_source") |
|
|
|
|
| |
| |
| |
|
|
|
|
| class ContractPenalty(BaseModel): |
| amount: float | None = None |
| condition: str | None = None |
|
|
|
|
| class AutoRenewal(BaseModel): |
| enabled: bool = False |
| condition: str | None = None |
|
|
|
|
| class KeyClause(BaseModel): |
| name: str |
| content: str |
| risk_level: str = "low" |
|
|
|
|
| class ContractModel(BaseModel): |
| model_config = ConfigDict(populate_by_name=True, extra="ignore") |
|
|
| contract_type: str | None = Field( |
| None, |
| description="The type of contract, e.g. 'NDA', 'service', 'works contract', " |
| "'lease', 'MSA', 'rental', 'IT framework agreement'. If the title " |
| "of the contract ('NON-DISCLOSURE AGREEMENT', 'LEASE AGREEMENT', etc.) " |
| "or the first paragraph contains it, fill it in.", |
| ) |
| parties: list[Party] = Field(default_factory=list) |
| effective_date: str | None = Field( |
| None, |
| description="Effective date of the contract. If 'Effective date', " |
| "'Vertragsbeginn', 'Hatály kezdete' appears in the text, " |
| "fill in ISO 8601 (YYYY-MM-DD) format.", |
| ) |
| expiry_date: str | None = Field( |
| None, |
| description="Expiration date of the contract. If 'Expiry date', " |
| "'Vertragsende', 'Lejárat' appears, fill it in.", |
| ) |
| total_value: float | None = None |
| currency: str = "USD" |
| monthly_fee: float | None = None |
| monthly_fee_currency: str = "USD" |
| termination_terms: str | None = Field( |
| None, |
| description="Textual summary of the termination conditions. MANDATORY to " |
| "fill in if the contract anywhere mentions 'Termination', " |
| "'Felmondás', 'Megszűnés', 'Kündigung' — whether 30/60/90 day " |
| "notice or immediate termination for material breach. ONLY null " |
| "if the contract has NO termination clause whatsoever.", |
| ) |
| termination_period_days: int | None = Field( |
| None, |
| description="Number of days for the termination notice period (e.g. 30, 60, 90). Numeric.", |
| ) |
| penalty: ContractPenalty | None = Field( |
| None, |
| description="Penalty / liquidated damages clause if mentioned. Fill in if " |
| "'Penalty', 'Liquidated damages', 'Kötbér', 'Vertragsstrafe' or a " |
| "concrete amount/condition is referenced.", |
| ) |
| confidentiality_clause: bool | None = Field( |
| None, |
| description="True if the contract contains a 'Confidentiality', 'NDA', " |
| "'Titoktartás' clause as a separate section or by reference.", |
| ) |
| governing_law: str | None = Field( |
| None, |
| description="Applicable law. MANDATORY to fill in if 'Governing law', " |
| "'Applicable law', 'Anwendbares Recht', 'Irányadó jog', " |
| "'Hungarian law', 'BGB' is referenced. E.g.: 'Hungarian Civil Code', " |
| "'Hungarian and German BGB'.", |
| ) |
| auto_renewal: AutoRenewal | None = Field( |
| None, |
| description="Auto-renewal clause. Fill in if 'auto-renewal', 'evergreen " |
| "clause', 'automatically renewed', 'automatische Verlängerung' is mentioned.", |
| ) |
| change_of_control: bool | None = Field( |
| None, |
| description="True if the contract contains a 'change-of-control', " |
| "'change of control', 'kontroll-változás', 'termination on " |
| "ownership change' clause.", |
| ) |
| non_compete: bool | None = Field( |
| None, |
| description="True if the contract contains a 'non-compete', " |
| "'versenytilalom', 'Wettbewerbsverbot' clause.", |
| ) |
| key_clauses: list[KeyClause] = Field(default_factory=list) |
| quotes: list[str] = Field(default_factory=list, alias="_quotes") |
| confidence: dict = Field(default_factory=dict, alias="_confidence") |
| source: SourceRef | None = Field(default=None, alias="_source") |
|
|
|
|
| |
| |
| |
|
|
|
|
| class DeliveryItem(BaseModel): |
| item_code: str | None = None |
| description: str | None = None |
| quantity: float | None = None |
| unit: str | None = None |
|
|
|
|
| class DeliveryNoteModel(BaseModel): |
| model_config = ConfigDict(populate_by_name=True, extra="ignore") |
|
|
| document_number: str | None = None |
| issue_date: str | None = None |
| delivery_date: str | None = None |
| purchase_order_reference: str | None = None |
| supplier: Party | None = None |
| customer: Party | None = None |
| line_items: list[DeliveryItem] = Field(default_factory=list) |
| notes: str | None = None |
| quotes: list[str] = Field(default_factory=list, alias="_quotes") |
| confidence: dict = Field(default_factory=dict, alias="_confidence") |
| source: SourceRef | None = Field(default=None, alias="_source") |
|
|
|
|
| |
| |
| |
|
|
|
|
| class PurchaseOrderItem(BaseModel): |
| item_code: str | None = None |
| description: str | None = None |
| quantity: float | None = None |
| unit: str | None = None |
| unit_price_net: float | None = None |
| total_net: float | None = None |
|
|
|
|
| class PurchaseOrderModel(BaseModel): |
| model_config = ConfigDict(populate_by_name=True, extra="ignore") |
|
|
| document_number: str | None = None |
| date: str | None = None |
| delivery_due_date: str | None = None |
| payment_due_date: str | None = None |
| supplier: Party | None = None |
| customer: Party | None = None |
| line_items: list[PurchaseOrderItem] = Field(default_factory=list) |
| total_net: float | None = None |
| total_vat: float | None = None |
| total_gross: float | None = None |
| quotes: list[str] = Field(default_factory=list, alias="_quotes") |
| confidence: dict = Field(default_factory=dict, alias="_confidence") |
| source: SourceRef | None = Field(default=None, alias="_source") |
|
|
|
|
| |
| |
| |
|
|
|
|
| class FinancialLineItem(BaseModel): |
| description: str |
| value: float | None = None |
| value_prior_period: float | None = None |
|
|
|
|
| class FinancialReportModel(BaseModel): |
| model_config = ConfigDict(populate_by_name=True, extra="ignore") |
|
|
| report_type: str | None = None |
| period_start: str | None = None |
| period_end: str | None = None |
| company_name: str | None = None |
| company_tax_id: str | None = None |
| currency: str = "USD" |
| accounting_standard: str | None = None |
| """One of: 'IFRS' | 'US-GAAP' | 'HU-GAAP' | 'DE-HGB' | None.""" |
| line_items: list[FinancialLineItem] = Field(default_factory=list) |
| revenue: float | None = None |
| operating_income: float | None = None |
| pretax_income: float | None = None |
| tax: float | None = None |
| net_income: float | None = None |
| quotes: list[str] = Field(default_factory=list, alias="_quotes") |
| confidence: dict = Field(default_factory=dict, alias="_confidence") |
| source: SourceRef | None = Field(default=None, alias="_source") |
|
|
|
|
| |
| |
| |
|
|
|
|
| class UniversalDates(BaseModel): |
| issue: str | None = None |
| fulfillment: str | None = None |
| payment_due: str | None = None |
| effective: str | None = None |
| expiry: str | None = None |
| signature: str | None = None |
| other_dates: list[dict] = Field(default_factory=list) |
|
|
|
|
| class UniversalAmounts(BaseModel): |
| total_net: float | None = None |
| total_vat: float | None = None |
| total_gross: float | None = None |
| currency: str = "USD" |
| vat_rate: float | None = None |
|
|
|
|
| class UniversalContractElements(BaseModel): |
| contract_type: str | None = None |
| termination_terms: str | None = None |
| penalty: dict | None = None |
| confidentiality_clause: bool | None = None |
| governing_law: str | None = None |
| key_clauses: list[KeyClause] = Field(default_factory=list) |
|
|
|
|
| class UniversalModel(BaseModel): |
| model_config = ConfigDict(populate_by_name=True, extra="ignore") |
|
|
| document_type: str | None = None |
| document_language: str = "en" |
| document_number: str | None = None |
| parties: list[Party] = Field(default_factory=list) |
| dates: UniversalDates | None = None |
| amounts: UniversalAmounts | None = None |
| line_items: list[InvoiceItem] = Field(default_factory=list) |
| contract_elements: UniversalContractElements | None = None |
| risk_elements: list[str] = Field(default_factory=list) |
| quotes: list[str] = Field(default_factory=list, alias="_quotes") |
| confidence: dict = Field(default_factory=dict, alias="_confidence") |
| source: SourceRef | None = Field(default=None, alias="_source") |
|
|
|
|
| |
| |
| |
|
|
|
|
| def pydantic_for(doc_type: str) -> type[BaseModel]: |
| """Return the Pydantic model class for the given doc_type.""" |
| mapping = { |
| "invoice": InvoiceModel, |
| "delivery_note": DeliveryNoteModel, |
| "purchase_order": PurchaseOrderModel, |
| "contract": ContractModel, |
| "financial_report": FinancialReportModel, |
| "other": UniversalModel, |
| } |
| return mapping.get(doc_type, UniversalModel) |
|
|