File size: 3,103 Bytes
7ff7119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
[
  {
    "id": "q01",
    "category": "list",
    "question": "How many documents are uploaded and what types are they?",
    "expected_tools": ["list_documents"],
    "expected_substrings": ["invoice", "contract"]
  },
  {
    "id": "q02",
    "category": "list",
    "question": "List the uploaded files",
    "expected_tools": ["list_documents"],
    "expected_substrings": [".pdf"]
  },
  {
    "id": "q03",
    "category": "extract",
    "question": "What is the gross total on as-2026-001.pdf?",
    "expected_tools": ["list_documents", "get_extraction"],
    "expected_substrings": ["24000", "24,000", "gross", "$"]
  },
  {
    "id": "q04",
    "category": "extract",
    "question": "Who issued as-2026-002.pdf?",
    "expected_tools": ["get_extraction"],
    "expected_substrings": ["AcmeSoft", "issuer"]
  },
  {
    "id": "q05",
    "category": "extract",
    "question": "What is the payment due date on bi-inv-2026-0418.pdf?",
    "expected_tools": ["get_extraction"],
    "expected_substrings": ["2026", "due"]
  },
  {
    "id": "q06",
    "category": "search",
    "question": "Which document has information on the delivery due date?",
    "expected_tools": ["search_documents"],
    "expected_substrings": ["purchase_order", "delivery"]
  },
  {
    "id": "q07",
    "category": "search",
    "question": "What is the penalty amount in the NDA? Find it.",
    "expected_tools": ["search_documents"],
    "expected_substrings": ["penalty", "50"]
  },
  {
    "id": "q08",
    "category": "search",
    "question": "What does the change of control clause contain?",
    "expected_tools": ["search_documents"],
    "expected_substrings": ["25", "ownership", "change"]
  },
  {
    "id": "q09",
    "category": "compare",
    "question": "Compare the prices of as-2026-001.pdf and as-2026-003.pdf.",
    "expected_tools": ["get_extraction", "compare_documents"],
    "expected_substrings": ["differ", "diff", "net"]
  },
  {
    "id": "q10",
    "category": "compare",
    "question": "How much more expensive is as-2026-003.pdf compared to as-2026-001.pdf?",
    "expected_tools": ["get_extraction", "compare_documents"],
    "expected_substrings": ["differ", "diff"]
  },
  {
    "id": "q11",
    "category": "compare",
    "question": "Is there a discrepancy between bi-po-2026-0412.pdf and bi-dn-2026-0415.pdf?",
    "expected_tools": ["compare_documents"],
    "expected_substrings": ["HI-100", "differ", "38", "40"]
  },
  {
    "id": "q12",
    "category": "validate",
    "question": "Validate the math on as-2026-001.pdf.",
    "expected_tools": ["validate_document"],
    "expected_substrings": ["ok", "error", "valid", "math"]
  },
  {
    "id": "q13",
    "category": "validate",
    "question": "Is there a math error in adv-inv-2026-0001.pdf?",
    "expected_tools": ["validate_document"],
    "expected_substrings": ["adv-inv-2026-0001", "error", "ok"]
  },
  {
    "id": "q14",
    "category": "validate",
    "question": "Is the tax ID valid on as-2026-002.pdf?",
    "expected_tools": ["validate_document"],
    "expected_substrings": ["tax", "ok", "12-3456789"]
  }
]