lexenvs-harbor / data /tasks /objective_hard_12.json
endishai's picture
Upload folder using huggingface_hub
2312199 verified
{
"task_id": "objective_hard_12",
"version": "2.0.0",
"created_at": "2026-03-18",
"metadata": {
"domain": "credit_card_optimization",
"difficulty": "hard",
"task_number": 12,
"complexity_hint": {
"max_tokens": 8000,
"expected_output": "multi-card portfolio with sequencing and full EV breakdown"
},
"requires_human_review": false
},
"prompt": {
"system": "",
"user": "You\u2019re an expert financial advisor. Tell the user how to change their credit card setup to maximize EV over the next 3 years.\nHi all,\n\nPretty new to the credit game (about a year and a half of researching and learning) and was wondering what you all could recommend or change. I am starting my real adult life with my soon-to-be wife, just started renting a new place, and am going to be paying for portions of my wedding/honeymoon. I also don't have any other obligations other than car insurance, and your typical expenses (phone service bill, wifi, etc).\n\n* Current cards: (list cards, limits, opening date) \n * Apple Card - $3,500 limit, 10/2020 \n * Capital One Savor One - $7,000 limit, 8/2023 \n * Citi Double Cash - $4,000 limit (Possible increase in a week), 11/2023 \n * Chase Sapphire Preferred - $18,000 limit, 7/2024 \n * Discover It - $0 limit (closed), 7/2019 (closed 02/2023) \n* FICO Score: Citi 675 (FICO 8\\) - Capital One 757 (VantageScore 3.0) - Chase 754 (VantageScore 3.0) \n* Oldest account age: 5 years 3 months \n* Chase 5/24 status: 1/24 \n* Income: $80,000 (individual), $130,000 (joint) \n* Average monthly spend and categories: \n * dining $500 \n * groceries: $600 \n * gas: $120 \n * travel: $100 \n * other: $800 \n* Open to Business Cards: Yes (Explain like I'm 5 how this can work if recommending a business card) \n* What's the purpose of your next card? Travel/Cashback/Upgrade - \n* Do you have any cards you've been looking at? No \n* Are you OK with category spending or do you want a general spending card? Category is fine",
"knowledge_base_ref": "knowledge_base.md",
"kb_filter": [
"American Express Gold",
"American Express Platinum",
"Amex Blue Business Plus",
"Amex Blue Cash Preferred",
"Amex Business Gold",
"Amex Business Platinum",
"Amex Green",
"Bilt Blue",
"Bilt Obsidian",
"Bilt Palladium",
"Capital One Venture",
"Capital One Venture X",
"Chase Freedom Flex",
"Chase Freedom Unlimited",
"Chase Ink Business Preferred",
"Chase Ink Business Unlimited",
"Chase Sapphire Preferred",
"Chase Sapphire Reserve",
"Citi Custom Cash",
"Citi Double Cash",
"Citi Strata Premier",
"Wells Fargo Autograph"
],
"system_prompt_ref": "system_prompt_template.md"
},
"scoring": {
"dimensions": {
"card_selection": {
"weight": 0.25,
"type": "automated",
"description": "F1 of recommended cards vs. optimal set",
"checks": {
"expected_cards": [
"American Express Gold",
"Chase Ink Business Preferred",
"Chase Sapphire Reserve",
"Capital One Venture",
"Amex Business Platinum"
]
},
"hard_constraint": false
},
"ev_accuracy": {
"weight": 0.3,
"type": "automated",
"description": "EV accuracy vs. computed ground truth",
"reference": {
"user_profile": {
"monthly_spend": {
"dining": 500.0,
"groceries": 600.0,
"gas": 120.0,
"travel": 100.0,
"flights": 0,
"hotels": 0,
"streaming": 0,
"online_shopping": 0,
"everything_else": 800.0
},
"lounge_visits_per_year": 0,
"delta_flights_per_year": 0,
"united_flights_per_year": 0,
"uses_uber": false,
"uses_doordash": false,
"uses_instacart": false,
"uses_streaming": false,
"has_global_entry": false,
"uses_clear": false,
"has_amazon_prime": false,
"uses_resy_restaurants": false,
"uses_hyatt_hotels": false,
"uses_marriott_hotels": false,
"uses_hilton_hotels": false,
"monthly_rent": 0,
"pays_housing_with_bilt": false,
"time_horizon_months": 36,
"existing_cards": [
"Sapphire Preferred",
"Capital One Savor",
"Citi Double Cash",
"Discover It",
"Apple Card"
],
"cards_opened_last_24_months": 5
},
"expected_card_ids": [
"american_express_gold",
"chase_ink_business_preferred",
"chase_sapphire_reserve",
"capital_one_venture",
"amex_business_platinum"
],
"ev_tolerance_pct": 0.1
}
},
"factual_fidelity": {
"weight": 0.3,
"type": "automated",
"description": "Accuracy of factual claims about cards",
"reference": {
"extracted_claims": null,
"reference_ev_usd": null
}
},
"constraint_compliance": {
"weight": 0.15,
"type": "automated",
"description": "Respects user constraints and issuer rules",
"checks": {
"expected_cards": [
"American Express Gold",
"Chase Ink Business Preferred",
"Chase Sapphire Reserve",
"Capital One Venture",
"Amex Business Platinum"
],
"expected_housing_option": null
},
"hard_constraint": false
}
},
"passing_threshold": 0.5,
"hard_constraint_failure_zeroes_dimension": true
},
"reference_solution": {
"_status": "COMPUTED",
"recommended_cards": [
"American Express Gold",
"Chase Ink Business Preferred",
"Chase Sapphire Reserve",
"Capital One Venture",
"Amex Business Platinum"
],
"total_ev_usd": null,
"ev_breakdown": null,
"housing_option": null,
"key_constraints_flags": [],
"expert_notes": "Ground truth EV computed from card_database.json + user_profile at evaluation time."
}
}