Spaces:
Sleeping
Sleeping
| { | |
| "task_id": "objective_hard_15", | |
| "version": "2.0.0", | |
| "created_at": "2026-03-13", | |
| "metadata": { | |
| "domain": "credit_card_optimization", | |
| "difficulty": "hard", | |
| "task_number": 15, | |
| "complexity_hint": { | |
| "max_tokens": 8000, | |
| "expected_output": "up to 3-card portfolio optimization with full EV breakdown over 24 months" | |
| }, | |
| "requires_human_review": false | |
| }, | |
| "prompt": { | |
| "system": "", | |
| "user": "You are a financial advisor. Recommend the best additions to optimize this user's credit card portfolio and calculate the expected value over 24 months.\n\nUser profile:\n- Monthly spending: $1000 dining, $600 groceries, $125 gas, $2000 travel, $8000 everything else (incl. rent, insurance, etc.)\n- Monthly rent: $3500 (pays with Bilt card)\n- Currently has: BoA Customized Cash, BoA Unlimited Cash, Chase Ink, Bilt Mastercard, Citi Double Cash, Chase Sapphire Reserve, Chase Freedom Unlimited, Amex Blue Cash Everyday\n- FICO: 780\n- Household income: $750K\n- Open to business cards\n- Both spouses are 1/24 for Chase\n- Likes Hyatt hotels, lives in United hub city\n- Time horizon: 24 months\n\nRecommend up to 3 new cards for the household. Show per-card EV breakdown.", | |
| "knowledge_base_ref": "knowledge_base.md", | |
| "kb_filter": [ | |
| "American Express Gold", | |
| "American Express Platinum", | |
| "Amex Business Platinum", | |
| "Amex Business Gold", | |
| "Chase Sapphire Reserve", | |
| "Capital One Venture X", | |
| "Citi Strata Elite", | |
| "Citi Strata Premier", | |
| "Chase World of Hyatt", | |
| "Chase United Explorer", | |
| "Chase United Club" | |
| ], | |
| "system_prompt_ref": "system_prompt_template.md" | |
| }, | |
| "scoring": { | |
| "dimensions": { | |
| "card_selection": { | |
| "weight": 0.25, | |
| "type": "automated", | |
| "description": "F1 of recommended cards vs. computed optimal set", | |
| "checks": { | |
| "expected_cards": [ | |
| "American Express Gold", | |
| "American Express Platinum", | |
| "Chase United Club" | |
| ] | |
| }, | |
| "hard_constraint": false | |
| }, | |
| "ev_accuracy": { | |
| "weight": 0.3, | |
| "type": "automated", | |
| "description": "EV accuracy vs. computed ground truth", | |
| "reference": { | |
| "user_profile": { | |
| "monthly_spend": { | |
| "dining": 1000.0, | |
| "groceries": 600.0, | |
| "travel": 1000.0, | |
| "flights": 600.0, | |
| "hotels": 400.0, | |
| "gas": 125.0, | |
| "everything_else": 8000.0 | |
| }, | |
| "time_horizon_months": 24, | |
| "lounge_visits_per_year": 12, | |
| "uses_uber": true, | |
| "uses_streaming": true, | |
| "has_global_entry": true, | |
| "has_amazon_prime": true, | |
| "monthly_rent": 3500.0, | |
| "pays_housing_with_bilt": true, | |
| "uses_hyatt_hotels": true | |
| }, | |
| "expected_card_ids": [ | |
| "american_express_gold", | |
| "american_express_platinum", | |
| "chase_united_club" | |
| ], | |
| "ev_tolerance_pct": 0.07 | |
| } | |
| }, | |
| "factual_fidelity": { | |
| "weight": 0.3, | |
| "type": "automated", | |
| "description": "Accuracy of factual claims about cards", | |
| "reference": { | |
| "extracted_claims": null, | |
| "reference_ev_usd": null | |
| } | |
| }, | |
| "constraint_compliance": { | |
| "weight": 0.15, | |
| "type": "automated", | |
| "description": "Respects user constraints", | |
| "checks": { | |
| "expected_cards": [ | |
| "American Express Gold", | |
| "American Express Platinum", | |
| "Chase United Club" | |
| ], | |
| "expected_housing_option": null | |
| }, | |
| "hard_constraint": false | |
| } | |
| }, | |
| "passing_threshold": 0.5, | |
| "hard_constraint_failure_zeroes_dimension": true | |
| }, | |
| "reference_solution": { | |
| "_status": "COMPUTED", | |
| "recommended_cards": [ | |
| "American Express Gold", | |
| "American Express Platinum", | |
| "Chase United Club" | |
| ], | |
| "total_ev_usd": null, | |
| "ev_breakdown": null, | |
| "housing_option": null, | |
| "key_constraints_flags": [ | |
| "max_3_new_cards", | |
| "under_5_24", | |
| "business_cards_ok" | |
| ], | |
| "expert_notes": "" | |
| } | |
| } | |