Spaces:
Sleeping
Sleeping
File size: 4,277 Bytes
2312199 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 | {
"task_id": "objective_hard_15",
"version": "2.0.0",
"created_at": "2026-03-13",
"metadata": {
"domain": "credit_card_optimization",
"difficulty": "hard",
"task_number": 15,
"complexity_hint": {
"max_tokens": 8000,
"expected_output": "up to 3-card portfolio optimization with full EV breakdown over 24 months"
},
"requires_human_review": false
},
"prompt": {
"system": "",
"user": "You are a financial advisor. Recommend the best additions to optimize this user's credit card portfolio and calculate the expected value over 24 months.\n\nUser profile:\n- Monthly spending: $1000 dining, $600 groceries, $125 gas, $2000 travel, $8000 everything else (incl. rent, insurance, etc.)\n- Monthly rent: $3500 (pays with Bilt card)\n- Currently has: BoA Customized Cash, BoA Unlimited Cash, Chase Ink, Bilt Mastercard, Citi Double Cash, Chase Sapphire Reserve, Chase Freedom Unlimited, Amex Blue Cash Everyday\n- FICO: 780\n- Household income: $750K\n- Open to business cards\n- Both spouses are 1/24 for Chase\n- Likes Hyatt hotels, lives in United hub city\n- Time horizon: 24 months\n\nRecommend up to 3 new cards for the household. Show per-card EV breakdown.",
"knowledge_base_ref": "knowledge_base.md",
"kb_filter": [
"American Express Gold",
"American Express Platinum",
"Amex Business Platinum",
"Amex Business Gold",
"Chase Sapphire Reserve",
"Capital One Venture X",
"Citi Strata Elite",
"Citi Strata Premier",
"Chase World of Hyatt",
"Chase United Explorer",
"Chase United Club"
],
"system_prompt_ref": "system_prompt_template.md"
},
"scoring": {
"dimensions": {
"card_selection": {
"weight": 0.25,
"type": "automated",
"description": "F1 of recommended cards vs. computed optimal set",
"checks": {
"expected_cards": [
"American Express Gold",
"American Express Platinum",
"Chase United Club"
]
},
"hard_constraint": false
},
"ev_accuracy": {
"weight": 0.3,
"type": "automated",
"description": "EV accuracy vs. computed ground truth",
"reference": {
"user_profile": {
"monthly_spend": {
"dining": 1000.0,
"groceries": 600.0,
"travel": 1000.0,
"flights": 600.0,
"hotels": 400.0,
"gas": 125.0,
"everything_else": 8000.0
},
"time_horizon_months": 24,
"lounge_visits_per_year": 12,
"uses_uber": true,
"uses_streaming": true,
"has_global_entry": true,
"has_amazon_prime": true,
"monthly_rent": 3500.0,
"pays_housing_with_bilt": true,
"uses_hyatt_hotels": true
},
"expected_card_ids": [
"american_express_gold",
"american_express_platinum",
"chase_united_club"
],
"ev_tolerance_pct": 0.07
}
},
"factual_fidelity": {
"weight": 0.3,
"type": "automated",
"description": "Accuracy of factual claims about cards",
"reference": {
"extracted_claims": null,
"reference_ev_usd": null
}
},
"constraint_compliance": {
"weight": 0.15,
"type": "automated",
"description": "Respects user constraints",
"checks": {
"expected_cards": [
"American Express Gold",
"American Express Platinum",
"Chase United Club"
],
"expected_housing_option": null
},
"hard_constraint": false
}
},
"passing_threshold": 0.5,
"hard_constraint_failure_zeroes_dimension": true
},
"reference_solution": {
"_status": "COMPUTED",
"recommended_cards": [
"American Express Gold",
"American Express Platinum",
"Chase United Club"
],
"total_ev_usd": null,
"ev_breakdown": null,
"housing_option": null,
"key_constraints_flags": [
"max_3_new_cards",
"under_5_24",
"business_cards_ok"
],
"expert_notes": ""
}
}
|