File size: 6,036 Bytes
2312199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
{
  "task_id": "objective_hard_01",
  "version": "2.0.0",
  "created_at": "2026-03-12",
  "metadata": {
    "domain": "credit_card_optimization",
    "difficulty": "hard",
    "task_number": 1,
    "complexity_hint": {
      "max_tokens": 8000,
      "expected_output": "5-card portfolio with sequencing, constraint analysis, and full EV breakdown over 24 months"
    },
    "requires_human_review": false
  },
  "prompt": {
    "system": "",
    "user": "You are a financial advisor. Build an optimal 5-card credit card portfolio for this user over a 24-month horizon. This user is sophisticated and wants to maximize total expected value.\n\nUser profile:\n- Monthly spending: $1200 dining, $800 groceries, $400 travel, $600 flights (books direct), $500 hotels (books direct), $250 gas, $80 streaming, $200 transit, $100 office supplies, $150 shipping, $1500 everything else\n- Takes 20 trips per year (frequent business traveler, highly values lounge access at $30/visit)\n- Uses Uber ($200/yr Uber Cash easily), Instacart, DoorDash\n- Uses streaming services heavily\n- Dines at Resy restaurants 2x/week\n- Has Global Entry, uses CLEAR\n- Currently holds: Chase Freedom Flex (opened 6 months ago). That's it — 1/24.\n- Time horizon: 24 months\n- Willing to pay high annual fees if math works out\n- Has a business (can apply for business cards)\n- Monthly rent: $2,500 (currently pays by check)\n\nConstraints:\n- Must stay under 5/24 for Chase cards\n- Must respect Amex card sequences (Platinum before Gold)\n- Provide card application ordering/timing\n- Calculate EV for the full portfolio including all signup bonuses, ongoing rewards, credits, perks, and fees\n\nRecommend exactly 5 cards with application order and detailed per-card EV.",
    "knowledge_base_ref": "knowledge_base.md",
    "kb_filter": [
      "American Express Platinum",
      "American Express Gold",
      "Amex Business Platinum",
      "Amex Business Gold",
      "Amex Blue Business Plus",
      "Chase Sapphire Reserve",
      "Chase Sapphire Preferred",
      "Chase Ink Business Preferred",
      "Chase Freedom Unlimited",
      "Capital One Venture X",
      "Bilt Palladium",
      "Bilt Obsidian",
      "Bilt Blue",
      "Citi Strata Elite",
      "Citi Strata Premier"
    ],
    "system_prompt_ref": "system_prompt_template.md"
  },
  "scoring": {
    "dimensions": {
      "card_selection": {
        "weight": 0.25,
        "type": "automated",
        "description": "F1 of recommended cards vs. computed optimal set",
        "checks": {
          "expected_cards": [
            "American Express Platinum",
            "American Express Gold",
            "Amex Business Platinum",
            "Chase Sapphire Reserve",
            "Bilt Palladium"
          ]
        },
        "hard_constraint": false
      },
      "ev_accuracy": {
        "weight": 0.30,
        "type": "automated",
        "description": "EV accuracy vs. computed ground truth",
        "reference": {
          "user_profile": {
            "monthly_spend": {
              "dining": 1200,
              "groceries": 800,
              "travel": 400,
              "flights": 600,
              "hotels": 500,
              "gas": 250,
              "streaming": 80,
              "transit": 200,
              "office_supply": 100,
              "shipping": 150,
              "everything_else": 1500
            },
            "lounge_visits_per_year": 20,
            "delta_flights_per_year": 0,
            "united_flights_per_year": 0,
            "uses_uber": true,
            "uses_instacart": true,
            "uses_doordash": true,
            "uses_streaming": true,
            "uses_resy_restaurants": true,
            "uses_rideshare": true,
            "uses_clear": true,
            "has_global_entry": true,
            "monthly_rent": 2500,
            "pays_housing_with_bilt": true,
            "time_horizon_months": 24,
            "existing_cards": ["Chase Freedom Flex"],
            "cards_opened_last_24_months": 1
          },
          "expected_card_ids": [
            "american_express_platinum",
            "american_express_gold",
            "amex_business_platinum",
            "chase_sapphire_reserve",
            "bilt_palladium"
          ],
          "ev_tolerance_pct": 0.10
        }
      },
      "factual_fidelity": {
        "weight": 0.30,
        "type": "automated",
        "description": "Accuracy of factual claims about cards",
        "reference": {
          "extracted_claims": null,
          "reference_ev_usd": null
        }
      },
      "constraint_compliance": {
        "weight": 0.15,
        "type": "automated",
        "description": "Respects 5/24, Amex sequences, 5-card limit, timing",
        "checks": {
          "expected_cards": [
            "American Express Platinum",
            "American Express Gold",
            "Amex Business Platinum",
            "Chase Sapphire Reserve",
            "Bilt Palladium"
          ],
          "expected_housing_option": null
        },
        "hard_constraint": false
      }
    },
    "passing_threshold": 0.5,
    "hard_constraint_failure_zeroes_dimension": false
  },
  "reference_solution": {
    "_status": "COMPUTED",
    "recommended_cards": [
      "American Express Platinum",
      "American Express Gold",
      "Amex Business Platinum",
      "Chase Sapphire Reserve",
      "Bilt Palladium"
    ],
    "total_ev_usd": null,
    "ev_breakdown": null,
    "housing_option": null,
    "key_constraints_flags": [
      "5_24_rule",
      "amex_card_sequence",
      "5_card_limit",
      "business_cards",
      "housing_optimization"
    ],
    "expert_notes": "Ground truth EV computed from card_database.json + solver. Optimal 5-card set: Amex Plat (SUB $3500 + lounge + credits), Amex Gold (4x dining/groceries), Amex Biz Plat (SUB $4000 + credits), CSR (SUB $2562 + hotels at 3x UR), Bilt Palladium (1x on $2500/mo rent + 2x catch-all at 2.2cpp + Priority Pass). Total portfolio EV ~$23,451 over 24 months."
  }
}