lexenvs-harbor / data /tasks /objective_hard_18.json
endishai's picture
Upload folder using huggingface_hub
2312199 verified
{
"task_id": "objective_hard_18",
"version": "2.0.0",
"created_at": "2026-03-13",
"metadata": {
"domain": "credit_card_optimization",
"difficulty": "hard",
"task_number": 18,
"complexity_hint": {
"max_tokens": 8000,
"expected_output": "up to 3-card portfolio optimization with full EV breakdown over 24 months"
},
"requires_human_review": false
},
"prompt": {
"system": "",
"user": "You are a financial advisor. Build an optimal credit card portfolio for this user and calculate the total expected value over 24 months.\n\nUser profile:\n- Monthly spending: $800 dining, $400 groceries (+ $500 Costco), $150 gas, $12000 travel (airlines + hotels via Expedia/Priceline), $300 phone bill, $2000 other shopping\n- Currently has: Chase Sapphire Preferred, Citi Costco card (both 10+ years old)\n- FICO: 840\n- Income: $300K\n- Has Amazon Prime, Costco membership\n- OK with category-specific cards, no rotating categories\n- No business cards\n- Purpose: maximize travel rewards/points\n- Time horizon: 24 months\n\nRecommend up to 3 cards. Show per-card EV breakdown.",
"knowledge_base_ref": "knowledge_base.md",
"kb_filter": [
"American Express Platinum",
"American Express Gold",
"Chase Sapphire Reserve",
"Capital One Venture X",
"Citi Strata Elite",
"Citi Strata Premier",
"Chase Freedom Unlimited",
"Amex Green",
"Capital One Venture"
],
"system_prompt_ref": "system_prompt_template.md"
},
"scoring": {
"dimensions": {
"card_selection": {
"weight": 0.25,
"type": "automated",
"description": "F1 of recommended cards vs. computed optimal set",
"checks": {
"expected_cards": [
"American Express Platinum",
"Citi Strata Premier",
"Capital One Venture"
]
},
"hard_constraint": false
},
"ev_accuracy": {
"weight": 0.3,
"type": "automated",
"description": "EV accuracy vs. computed ground truth",
"reference": {
"user_profile": {
"monthly_spend": {
"dining": 800.0,
"groceries": 900.0,
"travel": 6000.0,
"flights": 4000.0,
"hotels": 2000.0,
"gas": 150.0,
"telecom": 300.0,
"everything_else": 2000.0
},
"time_horizon_months": 24,
"lounge_visits_per_year": 12,
"uses_uber": true,
"uses_streaming": true,
"has_amazon_prime": true
},
"expected_card_ids": [
"american_express_platinum",
"citi_strata_premier",
"capital_one_venture"
],
"ev_tolerance_pct": 0.07
}
},
"factual_fidelity": {
"weight": 0.3,
"type": "automated",
"description": "Accuracy of factual claims about cards",
"reference": {
"extracted_claims": null,
"reference_ev_usd": null
}
},
"constraint_compliance": {
"weight": 0.15,
"type": "automated",
"description": "Respects user constraints",
"checks": {
"expected_cards": [
"American Express Platinum",
"Citi Strata Premier",
"Capital One Venture"
],
"expected_housing_option": null
},
"hard_constraint": false
}
},
"passing_threshold": 0.5,
"hard_constraint_failure_zeroes_dimension": true
},
"reference_solution": {
"_status": "COMPUTED",
"recommended_cards": [
"American Express Platinum",
"Citi Strata Premier",
"Capital One Venture"
],
"total_ev_usd": null,
"ev_breakdown": null,
"housing_option": null,
"key_constraints_flags": [
"max_3_cards",
"no_business_cards",
"no_rotating_categories"
],
"expert_notes": ""
}
}