lexenvs-harbor / data /tasks /objective_hard_06.json
endishai's picture
Upload folder using huggingface_hub
2312199 verified
{
"task_id": "objective_hard_06",
"version": "2.0.0",
"created_at": "2026-03-13",
"metadata": {
"domain": "credit_card_optimization",
"difficulty": "hard",
"task_number": 6,
"complexity_hint": {
"max_tokens": 8000,
"expected_output": "up to 3-card portfolio optimization with full EV breakdown over 24 months"
},
"requires_human_review": false
},
"prompt": {
"system": "",
"user": "You are a financial advisor. Build an optimal credit card portfolio for this user and calculate the total expected value over 24 months.\n\nUser profile:\n- Monthly spending: $800 dining, $200 groceries, $300 travel (flights/hotels), $400 other\n- Travels 1x/month at least, out of SFO/JFK/SAN\n- FICO: 715\n- Income: $150K\n- Currently has: Wells Fargo Cash Card ($22K limit), BOA Travel Card ($4.5K limit)\n- Interested in perks beyond cash back\n- Considering: Capital One Venture X, Chase Sapphire Reserve\n- No business cards\n- Time horizon: 24 months\n\nRecommend up to 3 cards. Show per-card EV breakdown.",
"knowledge_base_ref": "knowledge_base.md",
"kb_filter": [
"American Express Gold",
"American Express Platinum",
"Chase Sapphire Reserve",
"Chase Sapphire Preferred",
"Capital One Venture X",
"Capital One Venture",
"Citi Strata Premier",
"Citi Strata Elite",
"Chase Freedom Unlimited",
"Amex Green",
"Wells Fargo Autograph Journey"
],
"system_prompt_ref": "system_prompt_template.md"
},
"scoring": {
"dimensions": {
"card_selection": {
"weight": 0.25,
"type": "automated",
"description": "F1 of recommended cards vs. computed optimal set",
"checks": {
"expected_cards": [
"American Express Gold",
"American Express Platinum",
"Chase Sapphire Reserve"
]
},
"hard_constraint": false
},
"ev_accuracy": {
"weight": 0.3,
"type": "automated",
"description": "EV accuracy vs. computed ground truth",
"reference": {
"user_profile": {
"monthly_spend": {
"dining": 800.0,
"groceries": 200.0,
"travel": 100.0,
"flights": 100.0,
"hotels": 100.0,
"everything_else": 400.0
},
"time_horizon_months": 24,
"lounge_visits_per_year": 12,
"uses_uber": true,
"uses_streaming": true,
"has_amazon_prime": true
},
"expected_card_ids": [
"american_express_gold",
"american_express_platinum",
"chase_sapphire_reserve"
],
"ev_tolerance_pct": 0.07
}
},
"factual_fidelity": {
"weight": 0.3,
"type": "automated",
"description": "Accuracy of factual claims about cards",
"reference": {
"extracted_claims": null,
"reference_ev_usd": null
}
},
"constraint_compliance": {
"weight": 0.15,
"type": "automated",
"description": "Respects user constraints",
"checks": {
"expected_cards": [
"American Express Gold",
"American Express Platinum",
"Chase Sapphire Reserve"
],
"expected_housing_option": null
},
"hard_constraint": false
}
},
"passing_threshold": 0.5,
"hard_constraint_failure_zeroes_dimension": true
},
"reference_solution": {
"_status": "COMPUTED",
"recommended_cards": [
"American Express Gold",
"American Express Platinum",
"Chase Sapphire Reserve"
],
"total_ev_usd": null,
"ev_breakdown": null,
"housing_option": null,
"key_constraints_flags": [
"max_3_cards",
"no_business_cards"
],
"expert_notes": ""
}
}