lexenvs-harbor / data /tasks /objective_hard_08.json
endishai's picture
Upload folder using huggingface_hub
2312199 verified
{
"task_id": "objective_hard_08",
"version": "2.0.0",
"created_at": "2026-03-18",
"metadata": {
"domain": "credit_card_optimization",
"difficulty": "hard",
"task_number": 8,
"complexity_hint": {
"max_tokens": 8000,
"expected_output": "multi-card portfolio with sequencing and full EV breakdown"
},
"requires_human_review": false
},
"prompt": {
"system": "",
"user": "You\u2019re an expert financial advisor. Tell the user how to change their credit card setup to maximize EV over the next 3 years.\nHi\\! I'm relatively new to the credit card game. When I initially started getting cards, it was solely with the intent of beefing up my credit file to increase my score and I did very minimal research into the actual reward distribution of my card selections. I'm hoping to look into changing that and maybe getting better bang for my buck. I'd appreciate some advice\\! :)\n\nCREDIT PROFILE\n\n* Current credit cards you are the primary account holder of: \n * Apple Card: $2,250, Nov 29, 2020 \n * Bob's Discount Furniture: $9.6k limit, Feb 28, 2021 (I keep this open only because I was advised to in order to keep my utilization down, I don't use this card at all) \n * Capital One Silver: $4.5k, May 12, 2021 \n * Amazon Prime: $3.2k, May 31, 2021 \n * Target RedCard: $2k, Sep 16, 2021 \n * Capital One SavorOne: $2k, Jan 23, 2022 \n* Experian FICO8: 747 \n* Annual income estimate: Around $35k before tax from W2 work. With S/O, Around $65k, although we both have additional supplemental income. I haven't added it here just because it varies a lot and we mostly keep it for savings/random spending.\n\nCATEGORIES\n\n* OK with category-specific cards?: I am okay with category specific cards. \n* OK with rotating category cards?: I would prefer not to do rotating categories. I'm really bad with keeping up with what's what this month, but if it's going to result in much higher savings and someone has advice on keeping track of the categories, I will definitely consider them\\! \n* Estimate average monthly spend in the categories below. Only include what you can pay by credit card. \n * Dining: \\~$350 \n * Groceries: I don't buy groceries frequently currently, but I'd like to change this. When I have, it's been about \\~$200 for the month and my preferred stores are Ralphs/Kroger and Target. \n * Gas: This varies heavily on if I'm doing Postmates or not in a month. I gas up at Arco mostly. \n * With Postmates: \\~$250/m \n * Without: \\~$50/m \n * * Travel $: $0. I travel very minimally, but would like to change this. If there is a card that would garnish enough points to make traveling worth it/more economical, I would be interested. \n * Do you plan on using this card abroad for a significant length of time (study abroad, digital nomad, expat, extended travel)?: No \n * Any other categories (examples: phone/internet, insurance) or stores (example: Amazon) with significant, regular credit card spend (the more you specify, the better): \\~$250 every other month for car maintenance. \n * Can you pay rent by credit card? If yes, list rent amount and if there's a fee for paying by credit card: $1450 with a 2.99% fee\n\nMEMBERSHIPS\n\n* Aside from lots of streaming services, I have an Amazon Prime Membership. \n* Current member of Chase, Wells Fargo, and BMTX, which I believe is under the Customer's Bank umbrella. \n* Business wise, I do Postmates as a side gig.\n\nPURPOSE\n\n* I'm looking to save money, be that through cash back or a rewards program that saves me money on things I would otherwise be purchasing out of pocket. \n* I've been looking into the Bilt with rent reward card. I don't have any points cards, although I'm not sure that's a good one to start with.",
"knowledge_base_ref": "knowledge_base.md",
"kb_filter": [
"American Express Gold",
"American Express Platinum",
"Amex Blue Cash Preferred",
"Amex Green",
"Bilt Blue",
"Bilt Obsidian",
"Bilt Palladium",
"Capital One Venture",
"Capital One Venture X",
"Chase Freedom Flex",
"Chase Freedom Unlimited",
"Chase Sapphire Preferred",
"Chase Sapphire Reserve",
"Citi Custom Cash",
"Citi Double Cash",
"Citi Strata Premier",
"Wells Fargo Autograph"
],
"system_prompt_ref": "system_prompt_template.md"
},
"scoring": {
"dimensions": {
"card_selection": {
"weight": 0.25,
"type": "automated",
"description": "F1 of recommended cards vs. optimal set",
"checks": {
"expected_cards": [
"American Express Gold",
"American Express Platinum",
"Capital One Venture",
"Chase Sapphire Preferred",
"Chase Sapphire Reserve"
]
},
"hard_constraint": false
},
"ev_accuracy": {
"weight": 0.3,
"type": "automated",
"description": "EV accuracy vs. computed ground truth",
"reference": {
"user_profile": {
"monthly_spend": {
"dining": 300,
"groceries": 400,
"everything_else": 500
},
"time_horizon_months": 36
},
"expected_card_ids": [
"american_express_gold",
"american_express_platinum",
"capital_one_venture",
"chase_sapphire_preferred",
"chase_sapphire_reserve"
],
"ev_tolerance_pct": 0.1
}
},
"factual_fidelity": {
"weight": 0.3,
"type": "automated",
"description": "Accuracy of factual claims about cards",
"reference": {
"extracted_claims": null,
"reference_ev_usd": null
}
},
"constraint_compliance": {
"weight": 0.15,
"type": "automated",
"description": "Respects user constraints and issuer rules",
"checks": {
"expected_cards": [
"American Express Gold",
"American Express Platinum",
"Capital One Venture",
"Chase Sapphire Preferred",
"Chase Sapphire Reserve"
],
"expected_housing_option": null
},
"hard_constraint": false
}
},
"passing_threshold": 0.5,
"hard_constraint_failure_zeroes_dimension": true
},
"reference_solution": {
"_status": "COMPUTED",
"recommended_cards": [
"American Express Gold",
"American Express Platinum",
"Capital One Venture",
"Chase Sapphire Preferred",
"Chase Sapphire Reserve"
],
"total_ev_usd": null,
"ev_breakdown": null,
"housing_option": null,
"key_constraints_flags": [],
"expert_notes": "Ground truth EV computed from card_database.json + user_profile at evaluation time."
}
}