Spaces:
Restarting
Restarting
| [ | |
| { | |
| "id": "q01", | |
| "category": "list", | |
| "question": "How many documents are uploaded and what types are they?", | |
| "expected_tools": ["list_documents"], | |
| "expected_substrings": ["invoice", "contract"] | |
| }, | |
| { | |
| "id": "q02", | |
| "category": "list", | |
| "question": "List the uploaded files", | |
| "expected_tools": ["list_documents"], | |
| "expected_substrings": [".pdf"] | |
| }, | |
| { | |
| "id": "q03", | |
| "category": "extract", | |
| "question": "What is the gross total on as-2026-001.pdf?", | |
| "expected_tools": ["list_documents", "get_extraction"], | |
| "expected_substrings": ["24000", "24,000", "gross", "$"] | |
| }, | |
| { | |
| "id": "q04", | |
| "category": "extract", | |
| "question": "Who issued as-2026-002.pdf?", | |
| "expected_tools": ["get_extraction"], | |
| "expected_substrings": ["AcmeSoft", "issuer"] | |
| }, | |
| { | |
| "id": "q05", | |
| "category": "extract", | |
| "question": "What is the payment due date on bi-inv-2026-0418.pdf?", | |
| "expected_tools": ["get_extraction"], | |
| "expected_substrings": ["2026", "due"] | |
| }, | |
| { | |
| "id": "q06", | |
| "category": "search", | |
| "question": "Which document has information on the delivery due date?", | |
| "expected_tools": ["search_documents"], | |
| "expected_substrings": ["purchase_order", "delivery"] | |
| }, | |
| { | |
| "id": "q07", | |
| "category": "search", | |
| "question": "What is the penalty amount in the NDA? Find it.", | |
| "expected_tools": ["search_documents"], | |
| "expected_substrings": ["penalty", "50"] | |
| }, | |
| { | |
| "id": "q08", | |
| "category": "search", | |
| "question": "What does the change of control clause contain?", | |
| "expected_tools": ["search_documents"], | |
| "expected_substrings": ["25", "ownership", "change"] | |
| }, | |
| { | |
| "id": "q09", | |
| "category": "compare", | |
| "question": "Compare the prices of as-2026-001.pdf and as-2026-003.pdf.", | |
| "expected_tools": ["get_extraction", "compare_documents"], | |
| "expected_substrings": ["differ", "diff", "net"] | |
| }, | |
| { | |
| "id": "q10", | |
| "category": "compare", | |
| "question": "How much more expensive is as-2026-003.pdf compared to as-2026-001.pdf?", | |
| "expected_tools": ["get_extraction", "compare_documents"], | |
| "expected_substrings": ["differ", "diff"] | |
| }, | |
| { | |
| "id": "q11", | |
| "category": "compare", | |
| "question": "Is there a discrepancy between bi-po-2026-0412.pdf and bi-dn-2026-0415.pdf?", | |
| "expected_tools": ["compare_documents"], | |
| "expected_substrings": ["HI-100", "differ", "38", "40"] | |
| }, | |
| { | |
| "id": "q12", | |
| "category": "validate", | |
| "question": "Validate the math on as-2026-001.pdf.", | |
| "expected_tools": ["validate_document"], | |
| "expected_substrings": ["ok", "error", "valid", "math"] | |
| }, | |
| { | |
| "id": "q13", | |
| "category": "validate", | |
| "question": "Is there a math error in adv-inv-2026-0001.pdf?", | |
| "expected_tools": ["validate_document"], | |
| "expected_substrings": ["adv-inv-2026-0001", "error", "ok"] | |
| }, | |
| { | |
| "id": "q14", | |
| "category": "validate", | |
| "question": "Is the tax ID valid on as-2026-002.pdf?", | |
| "expected_tools": ["validate_document"], | |
| "expected_substrings": ["tax", "ok", "12-3456789"] | |
| } | |
| ] | |