Spaces:
Sleeping
Sleeping
File size: 6,366 Bytes
2312199 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 | {
"task_id": "objective_hard_12",
"version": "2.0.0",
"created_at": "2026-03-18",
"metadata": {
"domain": "credit_card_optimization",
"difficulty": "hard",
"task_number": 12,
"complexity_hint": {
"max_tokens": 8000,
"expected_output": "multi-card portfolio with sequencing and full EV breakdown"
},
"requires_human_review": false
},
"prompt": {
"system": "",
"user": "You\u2019re an expert financial advisor. Tell the user how to change their credit card setup to maximize EV over the next 3 years.\nHi all,\n\nPretty new to the credit game (about a year and a half of researching and learning) and was wondering what you all could recommend or change. I am starting my real adult life with my soon-to-be wife, just started renting a new place, and am going to be paying for portions of my wedding/honeymoon. I also don't have any other obligations other than car insurance, and your typical expenses (phone service bill, wifi, etc).\n\n* Current cards: (list cards, limits, opening date) \n * Apple Card - $3,500 limit, 10/2020 \n * Capital One Savor One - $7,000 limit, 8/2023 \n * Citi Double Cash - $4,000 limit (Possible increase in a week), 11/2023 \n * Chase Sapphire Preferred - $18,000 limit, 7/2024 \n * Discover It - $0 limit (closed), 7/2019 (closed 02/2023) \n* FICO Score: Citi 675 (FICO 8\\) - Capital One 757 (VantageScore 3.0) - Chase 754 (VantageScore 3.0) \n* Oldest account age: 5 years 3 months \n* Chase 5/24 status: 1/24 \n* Income: $80,000 (individual), $130,000 (joint) \n* Average monthly spend and categories: \n * dining $500 \n * groceries: $600 \n * gas: $120 \n * travel: $100 \n * other: $800 \n* Open to Business Cards: Yes (Explain like I'm 5 how this can work if recommending a business card) \n* What's the purpose of your next card? Travel/Cashback/Upgrade - \n* Do you have any cards you've been looking at? No \n* Are you OK with category spending or do you want a general spending card? Category is fine",
"knowledge_base_ref": "knowledge_base.md",
"kb_filter": [
"American Express Gold",
"American Express Platinum",
"Amex Blue Business Plus",
"Amex Blue Cash Preferred",
"Amex Business Gold",
"Amex Business Platinum",
"Amex Green",
"Bilt Blue",
"Bilt Obsidian",
"Bilt Palladium",
"Capital One Venture",
"Capital One Venture X",
"Chase Freedom Flex",
"Chase Freedom Unlimited",
"Chase Ink Business Preferred",
"Chase Ink Business Unlimited",
"Chase Sapphire Preferred",
"Chase Sapphire Reserve",
"Citi Custom Cash",
"Citi Double Cash",
"Citi Strata Premier",
"Wells Fargo Autograph"
],
"system_prompt_ref": "system_prompt_template.md"
},
"scoring": {
"dimensions": {
"card_selection": {
"weight": 0.25,
"type": "automated",
"description": "F1 of recommended cards vs. optimal set",
"checks": {
"expected_cards": [
"American Express Gold",
"Chase Ink Business Preferred",
"Chase Sapphire Reserve",
"Capital One Venture",
"Amex Business Platinum"
]
},
"hard_constraint": false
},
"ev_accuracy": {
"weight": 0.3,
"type": "automated",
"description": "EV accuracy vs. computed ground truth",
"reference": {
"user_profile": {
"monthly_spend": {
"dining": 500.0,
"groceries": 600.0,
"gas": 120.0,
"travel": 100.0,
"flights": 0,
"hotels": 0,
"streaming": 0,
"online_shopping": 0,
"everything_else": 800.0
},
"lounge_visits_per_year": 0,
"delta_flights_per_year": 0,
"united_flights_per_year": 0,
"uses_uber": false,
"uses_doordash": false,
"uses_instacart": false,
"uses_streaming": false,
"has_global_entry": false,
"uses_clear": false,
"has_amazon_prime": false,
"uses_resy_restaurants": false,
"uses_hyatt_hotels": false,
"uses_marriott_hotels": false,
"uses_hilton_hotels": false,
"monthly_rent": 0,
"pays_housing_with_bilt": false,
"time_horizon_months": 36,
"existing_cards": [
"Sapphire Preferred",
"Capital One Savor",
"Citi Double Cash",
"Discover It",
"Apple Card"
],
"cards_opened_last_24_months": 5
},
"expected_card_ids": [
"american_express_gold",
"chase_ink_business_preferred",
"chase_sapphire_reserve",
"capital_one_venture",
"amex_business_platinum"
],
"ev_tolerance_pct": 0.1
}
},
"factual_fidelity": {
"weight": 0.3,
"type": "automated",
"description": "Accuracy of factual claims about cards",
"reference": {
"extracted_claims": null,
"reference_ev_usd": null
}
},
"constraint_compliance": {
"weight": 0.15,
"type": "automated",
"description": "Respects user constraints and issuer rules",
"checks": {
"expected_cards": [
"American Express Gold",
"Chase Ink Business Preferred",
"Chase Sapphire Reserve",
"Capital One Venture",
"Amex Business Platinum"
],
"expected_housing_option": null
},
"hard_constraint": false
}
},
"passing_threshold": 0.5,
"hard_constraint_failure_zeroes_dimension": true
},
"reference_solution": {
"_status": "COMPUTED",
"recommended_cards": [
"American Express Gold",
"Chase Ink Business Preferred",
"Chase Sapphire Reserve",
"Capital One Venture",
"Amex Business Platinum"
],
"total_ev_usd": null,
"ev_breakdown": null,
"housing_option": null,
"key_constraints_flags": [],
"expert_notes": "Ground truth EV computed from card_database.json + user_profile at evaluation time."
}
}
|