Spaces:
Sleeping
Sleeping
File size: 6,922 Bytes
2312199 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | {
"task_id": "objective_hard_20",
"version": "2.0.0",
"created_at": "2026-03-18",
"metadata": {
"domain": "credit_card_optimization",
"difficulty": "hard",
"task_number": 20,
"complexity_hint": {
"max_tokens": 8000,
"expected_output": "multi-card portfolio with sequencing and full EV breakdown"
},
"requires_human_review": false
},
"prompt": {
"system": "",
"user": "You\u2019re an expert financial advisor. Tell the user how to change their credit card setup to maximize EV over the next 3 years.\n\n# **39 years old with 3 credit cards ever - recs for the next one?**\n\nCREDIT PROFILE\n\n* Current credit cards you are the primary account holder of:\n\n* United Explorer Mileage Plus (24.1 k limit)\n\n* American Express Gold\n\n* Bank of America Customized Cash Rewards (8k limit)\n\n* FICO scores with source: Bank of America (TransUnion FICO 8): 834, Wells Fargo (Experian FICO 9): 819\n\n* Oldest credit card account age: 15 years\n\n* Cards approved in the past 6 months: 0\n\n* Cards approved in the past 12 months: 0\n\n* Cards approved in the past 24 months: 1\n\n* Annual income $: 90000\n\nCATEGORIES\n\n* Ok with category-specific cards?: Yes\n\n* Ok with rotating category cards?: Yes\n\n* Estimate average monthly spend in the categories below.\n\n* Dining $: 300\n\n* Groceries $: 200 regular grocery stores\n\n200 Amazon\n\n100 Walmart\n\n* Gas $: 75\n\n* Travel $: 200\n\n* Using abroad?: No\n\n* Other categories or stores: Annually - Car insurance $2000, drugstore $500, Amazon $2000\n\n* Other spend: Music $11 per month\n\n* Pay rent by card? No\n\nMEMBERSHIPS & SUBSCRIPTIONS\n\n* Amazon Prime member: Yes (but I cancel periodically to discourage spending)\n\n* Big bank customer: Chase and Bank of America\n\n* Open to business cards: Yes (small side gig though, not big business)\n\nPURPOSE\n\n* Purpose of next card: Other\n\n* Cards being considered: American Airlines, Southwest, Chase Sapphire Preferred, Amex Blue Cash, Amex Platinum, Chase Ink, Capital One Venture X\n\nADDITIONAL INFO\n\nCashback and travel are what I'm most interested in. I live in Tampa and Southwest, AA, and United seem to cover most of my routes from TPA. I'd like to start getting cards for cash back on specific categories. I only do this with the BofA customized cash rewards for gas.\n\n# Tasks 61-160",
"knowledge_base_ref": "knowledge_base.md",
"kb_filter": [
"American Express Gold",
"American Express Platinum",
"Amex Blue Business Plus",
"Amex Blue Cash Preferred",
"Amex Business Gold",
"Amex Business Platinum",
"Amex Green",
"Bilt Blue",
"Bilt Obsidian",
"Bilt Palladium",
"Capital One Venture",
"Capital One Venture X",
"Chase Freedom Flex",
"Chase Freedom Unlimited",
"Chase Ink Business Preferred",
"Chase Ink Business Unlimited",
"Chase Sapphire Preferred",
"Chase Sapphire Reserve",
"Chase Southwest Rapid Rewards Plus",
"Chase Southwest Rapid Rewards Priority",
"Chase United Club",
"Chase United Explorer",
"Chase United Quest",
"Citi AAdvantage Executive",
"Citi AAdvantage Platinum Select",
"Citi Custom Cash",
"Citi Double Cash",
"Citi Strata Premier",
"Wells Fargo Autograph"
],
"system_prompt_ref": "system_prompt_template.md"
},
"scoring": {
"dimensions": {
"card_selection": {
"weight": 0.25,
"type": "automated",
"description": "F1 of recommended cards vs. optimal set",
"checks": {
"expected_cards": [
"Chase Sapphire Preferred",
"Chase Ink Business Preferred",
"Chase Sapphire Reserve",
"American Express Gold",
"Chase United Explorer"
]
},
"hard_constraint": false
},
"ev_accuracy": {
"weight": 0.3,
"type": "automated",
"description": "EV accuracy vs. computed ground truth",
"reference": {
"user_profile": {
"monthly_spend": {
"dining": 300.0,
"groceries": 200.0,
"gas": 75.0,
"travel": 200.0,
"flights": 0,
"hotels": 0,
"streaming": 0,
"online_shopping": 0,
"everything_else": 0
},
"lounge_visits_per_year": 0,
"delta_flights_per_year": 0,
"united_flights_per_year": 6,
"uses_uber": false,
"uses_doordash": false,
"uses_instacart": false,
"uses_streaming": false,
"has_global_entry": false,
"uses_clear": false,
"has_amazon_prime": true,
"uses_resy_restaurants": false,
"uses_hyatt_hotels": false,
"uses_marriott_hotels": false,
"uses_hilton_hotels": false,
"monthly_rent": 0,
"pays_housing_with_bilt": false,
"time_horizon_months": 36,
"existing_cards": [
"Sapphire Preferred",
"American Express Gold",
"Capital One Venture X"
],
"cards_opened_last_24_months": 1
},
"expected_card_ids": [
"chase_sapphire_preferred",
"chase_ink_business_preferred",
"chase_sapphire_reserve",
"american_express_gold",
"chase_united_explorer"
],
"ev_tolerance_pct": 0.1
}
},
"factual_fidelity": {
"weight": 0.3,
"type": "automated",
"description": "Accuracy of factual claims about cards",
"reference": {
"extracted_claims": null,
"reference_ev_usd": null
}
},
"constraint_compliance": {
"weight": 0.15,
"type": "automated",
"description": "Respects user constraints and issuer rules",
"checks": {
"expected_cards": [
"Chase Sapphire Preferred",
"Chase Ink Business Preferred",
"Chase Sapphire Reserve",
"American Express Gold",
"Chase United Explorer"
],
"expected_housing_option": null
},
"hard_constraint": false
}
},
"passing_threshold": 0.5,
"hard_constraint_failure_zeroes_dimension": true
},
"reference_solution": {
"_status": "COMPUTED",
"recommended_cards": [
"Chase Sapphire Preferred",
"Chase Ink Business Preferred",
"Chase Sapphire Reserve",
"American Express Gold",
"Chase United Explorer"
],
"total_ev_usd": null,
"ev_breakdown": null,
"housing_option": null,
"key_constraints_flags": [],
"expert_notes": "Ground truth EV computed from card_database.json + user_profile at evaluation time."
}
}
|