Spaces:
Sleeping
Sleeping
File size: 4,114 Bytes
2312199 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | {
"task_id": "easy_03",
"version": "1.0.0",
"created_at": "2026-03-11",
"metadata": {
"domain": "credit_card_optimization",
"difficulty": "easy",
"task_number": 3,
"complexity_hint": {
"max_tokens": 4000,
"expected_output": "single card recommendation with EV calculation"
},
"requires_human_review": true
},
"prompt": {
"system": "",
"user": "You are an expert financial advisor tasked with giving the user the optimal credit card advice for their situation.\n\nUser profile: \n\\- Travels monthly for work from a major airport \n\\- $5,000 a month in personal expenses \n\\- Wants just 1 card for the next 2+ years \n\\- Values lounge access at $10 a visit \n\\- Currently pays for TSA precheck, but not Clear\n\nWhich travel card should the user get and what is his expected value from the card over two years?\n\nCard context: Amex Platinum, Capital One Venture X, Chase Sapphire Preferred, Chase Sapphire Reserve, Bilt Palladium, Citi Strata Elite, Citi AAdvantage Executive, Atmos Rewards Summit, Capital One Venture",
"knowledge_base_ref": "knowledge_base.md",
"kb_filter": [
"American Express Platinum",
"Capital One Venture X",
"Chase Sapphire Preferred",
"Chase Sapphire Reserve",
"Bilt Palladium",
"Citi Strata Elite",
"Citi AAdvantage Executive",
"Atmos Rewards Summit",
"Capital One Venture"
],
"system_prompt_ref": "system_prompt_template.md"
},
"scoring": {
"dimensions": {
"constraint_compliance": {
"weight": 0.3,
"type": "automated",
"description": "Hard rule checks: velocity limits, eligibility, user constraints",
"checks": {
"velocity_rules": null,
"eligibility_rules": null,
"user_constraints": null,
"expected_cards": [
"Capital One Venture X"
],
"expected_housing_option": null,
"key_constraints_flags": [
"single_card_constraint",
"lounge_valuation"
]
},
"hard_constraint": false
},
"ev_accuracy": {
"weight": 0.4,
"type": "automated",
"description": "EV calculation accuracy vs. reference solution",
"reference": {
"reference_ev_usd": 6711.5,
"ev_tolerance_pct": 0.05
}
},
"reasoning_quality": {
"weight": 0.2,
"type": "human",
"description": "Quality of tradeoff articulation and strategic reasoning (0-3 scale)",
"rubric": {
"0": "No reasoning or incorrect reasoning",
"1": "Surface-level reasoning, misses key tradeoffs",
"2": "Correct tradeoffs identified with clear justification",
"3": "Expert-level nuance including edge cases and constraint interactions"
},
"score": null
},
"constraint_prioritization": {
"weight": 0.1,
"type": "human",
"description": "Correct handling of ambiguity and conflicting constraints",
"score": null
}
},
"passing_threshold": 0.6,
"hard_constraint_failure_zeroes_dimension": true
},
"reference_solution": {
"_status": "EXPERT_REVIEWED",
"recommended_cards": [
"Capital One Venture X"
],
"total_ev_usd": 6711.5,
"ev_breakdown": {
"signup_bonuses_usd": 1387.5,
"ongoing_rewards_usd": 4810.0,
"credits_usd": 514.0,
"annual_fees_usd": 0.0,
"other_usd": 0.0
},
"housing_option": null,
"key_constraints_flags": [
"single_card_constraint",
"lounge_valuation"
],
"expert_notes": "Capital One Venture X is optimal for single-card traveler. Annual: 130k miles (120k spend + 10k anniversary) * 1.85cpp = $2,405. Benefits: lounge $240/yr (2 visits/mo * $10) + TSA $17/yr = $257/yr. Signup: 75k * 1.85cpp = $1,387.50. Expert total: $2,662/yr * 2 + $1,387.50 = $6,711.50. Note: Expert calculation does not explicitly subtract the $395 annual fee or add $300 travel credit \u2014 these may be treated as roughly offsetting."
}
}
|