File size: 1,459 Bytes
7ff7119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
"""get_extraction tool — fetch a single document's extracted structured data."""

from __future__ import annotations

import json

from langchain_core.tools import tool

from tools.context import ChatToolContext


def build_get_extraction_tool(ctx: ChatToolContext):
    @tool
    def get_extraction(filename: str) -> str:
        """Fetch the structured extraction for a document by filename.

        For an invoice: line items, amounts, dates.
        For a contract: clauses, terms, validity dates.

        Args:
            filename: the document filename (e.g. 'invoice_001.pdf')
        """
        pd = ctx.get_document(filename)
        if pd is None:
            available = ctx.list_filenames()
            return (
                f"Document not found: '{filename}'. "
                f"Available files: {available if available else 'no documents uploaded'}"
            )

        if pd.extracted is None:
            return f"'{filename}' has not been extracted yet (extracted=null)."

        # Return the full ExtractedData as JSON (quotes + confidence included)
        out = {
            "file": filename,
            "doc_type": pd.classification.doc_type if pd.classification else "other",
            "data": pd.extracted.raw,
            "_quotes": pd.extracted.quotes,
            "_confidence": pd.extracted.confidence,
        }
        return json.dumps(out, ensure_ascii=False, indent=2, default=str)

    return get_extraction