File size: 7,413 Bytes
2312199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
{
  "task_id": "objective_hard_11",
  "version": "2.0.0",
  "created_at": "2026-03-18",
  "metadata": {
    "domain": "credit_card_optimization",
    "difficulty": "hard",
    "task_number": 11,
    "complexity_hint": {
      "max_tokens": 8000,
      "expected_output": "multi-card portfolio with sequencing and full EV breakdown"
    },
    "requires_human_review": false
  },
  "prompt": {
    "system": "",
    "user": "You\u2019re an expert financial advisor. Tell the user how to change their credit card setup to maximize EV over the next 3 years.\n\n# **Card Recommendation Request**\n\nCREDIT PROFILE\n\n* Current credit cards you are the primary account holder of: (list cards, limits, opening date):   \n  * *Capital One Quicksilver $5000 limit, September 2019*  \n  * *Capital One Venture $14,000 limit, May 2022*  \n  * *TD Bank $8000 limit, April 2017*  \n* FICO Scores with source (see note on FICO score sources below): *Experian 776*  \n* * Oldest credit card account age with you as primary name on the account: *8 years 7 months*  \n* * Number of personal credit cards approved for in the past 6 months: 0  \n* * Number of personal credit cards approved for in the past 12 months: 1  \n* * Number of personal credit cards approved for in the past 24 months: 1  \n* * Annual income $75,000\n\nCATEGORIES\n\n* * OK with category-specific cards?: *Yes*  \n* * OK with rotating category cards?: *Yes*  \n* * Estimate average monthly spend in the categories below. Only include what you can pay by credit card.  \n  * * Dining $350  \n  * * Groceries $300  \n  * * Gas $0  \n  * * Travel:  \n    * Flights: $150  \n    * MTA: $100  \n    * Cabs: $15  \n  * * Do you plan on using this card abroad for a significant length of time (study abroad, digital nomad, expat, extended travel)?: No  \n  * * Any other categories (examples: phone/internet, insurance) or stores (example: Amazon) with significant, regular credit card spend (the more you specify, the better):  \n* * Any other significant, regular credit card spend you didn't include above?: * Hobbies: $40  \n  * * Can you pay rent by credit card? If yes, list rent amount and if there's a fee for paying by credit card: No\n\nMEMBERSHIPS & SUBSCRIPTIONS (delete lines that don't apply)\n\n* * Currently paying $13.99/month or more for Disney Bundle (Disney+ / Hulu / EPSN+) or other Hulu services?: $30/month  \n* * Current member of Chase, US Bank or any other big bank?: Capital One  \n* * Are you open to Business Cards?: *Yes, a small portion of my income is via 1099*\n\nPURPOSE\n\n* * What's the purpose of your next card (choose ONE)?: *Travel rewards*  \n  * * If you answered \"travel rewards\", do you have a preferred airline and/or hotel chain you regularly use? *Delta*  \n* * Do you have any cards you've been looking at?  \n  * I've been looking at the AmEx Delta - I am technically in pop-up jail, but since I opened and closed the account quickly because I got nervous about the fee, I think I might be able to call them and see if they will allow me to open a new card now.",
    "knowledge_base_ref": "knowledge_base.md",
    "kb_filter": [
      "American Express Gold",
      "American Express Platinum",
      "Amex Blue Business Plus",
      "Amex Blue Cash Preferred",
      "Amex Business Gold",
      "Amex Business Platinum",
      "Amex Delta SkyMiles Gold",
      "Amex Delta SkyMiles Platinum",
      "Amex Delta SkyMiles Reserve",
      "Amex Green",
      "Bilt Blue",
      "Bilt Obsidian",
      "Bilt Palladium",
      "Capital One Venture",
      "Capital One Venture X",
      "Chase Freedom Flex",
      "Chase Freedom Unlimited",
      "Chase Ink Business Preferred",
      "Chase Ink Business Unlimited",
      "Chase Sapphire Preferred",
      "Chase Sapphire Reserve",
      "Citi Custom Cash",
      "Citi Double Cash",
      "Citi Strata Premier",
      "Wells Fargo Autograph"
    ],
    "system_prompt_ref": "system_prompt_template.md"
  },
  "scoring": {
    "dimensions": {
      "card_selection": {
        "weight": 0.25,
        "type": "automated",
        "description": "F1 of recommended cards vs. optimal set",
        "checks": {
          "expected_cards": [
            "American Express Gold",
            "American Express Platinum",
            "Chase Sapphire Reserve",
            "Amex Delta SkyMiles Gold",
            "Chase Ink Business Preferred"
          ]
        },
        "hard_constraint": false
      },
      "ev_accuracy": {
        "weight": 0.3,
        "type": "automated",
        "description": "EV accuracy vs. computed ground truth",
        "reference": {
          "user_profile": {
            "monthly_spend": {
              "dining": 350.0,
              "groceries": 300.0,
              "gas": 0,
              "travel": 0,
              "flights": 150.0,
              "hotels": 0,
              "streaming": 60,
              "online_shopping": 0,
              "everything_else": 0
            },
            "lounge_visits_per_year": 0,
            "delta_flights_per_year": 6,
            "united_flights_per_year": 0,
            "uses_uber": false,
            "uses_doordash": false,
            "uses_instacart": false,
            "uses_streaming": true,
            "has_global_entry": false,
            "uses_clear": false,
            "has_amazon_prime": false,
            "uses_resy_restaurants": false,
            "uses_hyatt_hotels": false,
            "uses_marriott_hotels": false,
            "uses_hilton_hotels": false,
            "monthly_rent": 0,
            "pays_housing_with_bilt": false,
            "time_horizon_months": 36,
            "existing_cards": [
              "Capital One Quicksilver"
            ],
            "cards_opened_last_24_months": 1
          },
          "expected_card_ids": [
            "american_express_gold",
            "american_express_platinum",
            "chase_sapphire_reserve",
            "amex_delta_skymiles_gold",
            "chase_ink_business_preferred"
          ],
          "ev_tolerance_pct": 0.1
        }
      },
      "factual_fidelity": {
        "weight": 0.3,
        "type": "automated",
        "description": "Accuracy of factual claims about cards",
        "reference": {
          "extracted_claims": null,
          "reference_ev_usd": null
        }
      },
      "constraint_compliance": {
        "weight": 0.15,
        "type": "automated",
        "description": "Respects user constraints and issuer rules",
        "checks": {
          "expected_cards": [
            "American Express Gold",
            "American Express Platinum",
            "Chase Sapphire Reserve",
            "Amex Delta SkyMiles Gold",
            "Chase Ink Business Preferred"
          ],
          "expected_housing_option": null
        },
        "hard_constraint": false
      }
    },
    "passing_threshold": 0.5,
    "hard_constraint_failure_zeroes_dimension": true
  },
  "reference_solution": {
    "_status": "COMPUTED",
    "recommended_cards": [
      "American Express Gold",
      "American Express Platinum",
      "Chase Sapphire Reserve",
      "Amex Delta SkyMiles Gold",
      "Chase Ink Business Preferred"
    ],
    "total_ev_usd": null,
    "ev_breakdown": null,
    "housing_option": null,
    "key_constraints_flags": [],
    "expert_notes": "Ground truth EV computed from card_database.json + user_profile at evaluation time."
  }
}