Spaces:
Sleeping
Sleeping
| { | |
| "task_id": "objective_easy_02", | |
| "version": "2.0.0", | |
| "created_at": "2026-03-12", | |
| "metadata": { | |
| "domain": "credit_card_optimization", | |
| "difficulty": "easy", | |
| "task_number": 2, | |
| "complexity_hint": { | |
| "max_tokens": 4000, | |
| "expected_output": "single card recommendation for a non-traveler with EV calculation" | |
| }, | |
| "requires_human_review": false | |
| }, | |
| "prompt": { | |
| "system": "", | |
| "user": "You are a financial advisor. Recommend the single best credit card for this user and calculate the expected value over 12 months.\n\nUser profile:\n- Monthly spending: $300 dining, $800 groceries, $0 travel/flights/hotels, $200 gas, $80 streaming, $0 transit, $1200 everything else\n- Does NOT travel (0 trips per year, no lounge use)\n- Does NOT use Uber, rideshare, or food delivery apps\n- Uses streaming services (Netflix, Spotify)\n- No existing credit cards\n- Time horizon: 12 months\n\nThis user wants the simplest, highest-value card with no annual fee. Provide a detailed EV breakdown.", | |
| "knowledge_base_ref": "knowledge_base.md", | |
| "kb_filter": [ | |
| "Chase Freedom Unlimited", | |
| "Chase Freedom Flex", | |
| "Citi Double Cash", | |
| "Amex Blue Cash Preferred", | |
| "Capital One Savor", | |
| "BofA Customized Cash Rewards", | |
| "Wells Fargo Autograph" | |
| ], | |
| "system_prompt_ref": "system_prompt_template.md" | |
| }, | |
| "scoring": { | |
| "dimensions": { | |
| "card_selection": { | |
| "weight": 0.25, | |
| "type": "automated", | |
| "description": "F1 of recommended cards vs. optimal set", | |
| "checks": { | |
| "expected_cards": [ | |
| "Amex Blue Cash Preferred" | |
| ] | |
| }, | |
| "hard_constraint": false | |
| }, | |
| "ev_accuracy": { | |
| "weight": 0.3, | |
| "type": "automated", | |
| "description": "EV accuracy vs. computed ground truth", | |
| "reference": { | |
| "user_profile": { | |
| "monthly_spend": { | |
| "dining": 300, | |
| "groceries": 800, | |
| "travel": 0, | |
| "flights": 0, | |
| "hotels": 0, | |
| "gas": 200, | |
| "streaming": 80, | |
| "transit": 0, | |
| "everything_else": 1200 | |
| }, | |
| "uses_streaming": true, | |
| "time_horizon_months": 12 | |
| }, | |
| "expected_card_ids": [ | |
| "amex_blue_cash_preferred" | |
| ], | |
| "ev_tolerance_pct": 0.05 | |
| } | |
| }, | |
| "factual_fidelity": { | |
| "weight": 0.3, | |
| "type": "automated", | |
| "description": "Accuracy of factual claims about cards", | |
| "reference": { | |
| "extracted_claims": null, | |
| "reference_ev_usd": null | |
| } | |
| }, | |
| "constraint_compliance": { | |
| "weight": 0.15, | |
| "type": "automated", | |
| "description": "Respects user constraints (no annual fee preference, no travel cards)", | |
| "checks": { | |
| "expected_cards": [ | |
| "Amex Blue Cash Preferred" | |
| ], | |
| "expected_housing_option": null | |
| }, | |
| "hard_constraint": false | |
| } | |
| }, | |
| "passing_threshold": 0.6, | |
| "hard_constraint_failure_zeroes_dimension": true | |
| }, | |
| "reference_solution": { | |
| "_status": "COMPUTED", | |
| "recommended_cards": [ | |
| "Amex Blue Cash Preferred" | |
| ], | |
| "total_ev_usd": null, | |
| "ev_breakdown": null, | |
| "housing_option": null, | |
| "key_constraints_flags": [ | |
| "no_annual_fee_preference", | |
| "non_traveler" | |
| ], | |
| "expert_notes": "Ground truth EV computed from card_database.json. Blue Cash Preferred wins due to 6% groceries on $800/mo ($576/yr), 6% streaming ($57.60/yr), 3% gas ($72/yr). First year fee waived. Disney streaming credit usable." | |
| } | |
| } | |