Spaces:
Sleeping
Sleeping
File size: 3,704 Bytes
2312199 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | {
"task_id": "objective_easy_02",
"version": "2.0.0",
"created_at": "2026-03-12",
"metadata": {
"domain": "credit_card_optimization",
"difficulty": "easy",
"task_number": 2,
"complexity_hint": {
"max_tokens": 4000,
"expected_output": "single card recommendation for a non-traveler with EV calculation"
},
"requires_human_review": false
},
"prompt": {
"system": "",
"user": "You are a financial advisor. Recommend the single best credit card for this user and calculate the expected value over 12 months.\n\nUser profile:\n- Monthly spending: $300 dining, $800 groceries, $0 travel/flights/hotels, $200 gas, $80 streaming, $0 transit, $1200 everything else\n- Does NOT travel (0 trips per year, no lounge use)\n- Does NOT use Uber, rideshare, or food delivery apps\n- Uses streaming services (Netflix, Spotify)\n- No existing credit cards\n- Time horizon: 12 months\n\nThis user wants the simplest, highest-value card with no annual fee. Provide a detailed EV breakdown.",
"knowledge_base_ref": "knowledge_base.md",
"kb_filter": [
"Chase Freedom Unlimited",
"Chase Freedom Flex",
"Citi Double Cash",
"Amex Blue Cash Preferred",
"Capital One Savor",
"BofA Customized Cash Rewards",
"Wells Fargo Autograph"
],
"system_prompt_ref": "system_prompt_template.md"
},
"scoring": {
"dimensions": {
"card_selection": {
"weight": 0.25,
"type": "automated",
"description": "F1 of recommended cards vs. optimal set",
"checks": {
"expected_cards": [
"Amex Blue Cash Preferred"
]
},
"hard_constraint": false
},
"ev_accuracy": {
"weight": 0.3,
"type": "automated",
"description": "EV accuracy vs. computed ground truth",
"reference": {
"user_profile": {
"monthly_spend": {
"dining": 300,
"groceries": 800,
"travel": 0,
"flights": 0,
"hotels": 0,
"gas": 200,
"streaming": 80,
"transit": 0,
"everything_else": 1200
},
"uses_streaming": true,
"time_horizon_months": 12
},
"expected_card_ids": [
"amex_blue_cash_preferred"
],
"ev_tolerance_pct": 0.05
}
},
"factual_fidelity": {
"weight": 0.3,
"type": "automated",
"description": "Accuracy of factual claims about cards",
"reference": {
"extracted_claims": null,
"reference_ev_usd": null
}
},
"constraint_compliance": {
"weight": 0.15,
"type": "automated",
"description": "Respects user constraints (no annual fee preference, no travel cards)",
"checks": {
"expected_cards": [
"Amex Blue Cash Preferred"
],
"expected_housing_option": null
},
"hard_constraint": false
}
},
"passing_threshold": 0.6,
"hard_constraint_failure_zeroes_dimension": true
},
"reference_solution": {
"_status": "COMPUTED",
"recommended_cards": [
"Amex Blue Cash Preferred"
],
"total_ev_usd": null,
"ev_breakdown": null,
"housing_option": null,
"key_constraints_flags": [
"no_annual_fee_preference",
"non_traveler"
],
"expert_notes": "Ground truth EV computed from card_database.json. Blue Cash Preferred wins due to 6% groceries on $800/mo ($576/yr), 6% streaming ($57.60/yr), 3% gas ($72/yr). First year fee waived. Disney streaming credit usable."
}
}
|