File size: 1,459 Bytes
7ff7119 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | """get_extraction tool — fetch a single document's extracted structured data."""
from __future__ import annotations
import json
from langchain_core.tools import tool
from tools.context import ChatToolContext
def build_get_extraction_tool(ctx: ChatToolContext):
@tool
def get_extraction(filename: str) -> str:
"""Fetch the structured extraction for a document by filename.
For an invoice: line items, amounts, dates.
For a contract: clauses, terms, validity dates.
Args:
filename: the document filename (e.g. 'invoice_001.pdf')
"""
pd = ctx.get_document(filename)
if pd is None:
available = ctx.list_filenames()
return (
f"Document not found: '{filename}'. "
f"Available files: {available if available else 'no documents uploaded'}"
)
if pd.extracted is None:
return f"'{filename}' has not been extracted yet (extracted=null)."
# Return the full ExtractedData as JSON (quotes + confidence included)
out = {
"file": filename,
"doc_type": pd.classification.doc_type if pd.classification else "other",
"data": pd.extracted.raw,
"_quotes": pd.extracted.quotes,
"_confidence": pd.extracted.confidence,
}
return json.dumps(out, ensure_ascii=False, indent=2, default=str)
return get_extraction
|