File size: 6,568 Bytes
2312199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
{
  "task_id": "objective_hard_17",
  "version": "2.0.0",
  "created_at": "2026-03-18",
  "metadata": {
    "domain": "credit_card_optimization",
    "difficulty": "hard",
    "task_number": 17,
    "complexity_hint": {
      "max_tokens": 8000,
      "expected_output": "multi-card portfolio with sequencing and full EV breakdown"
    },
    "requires_human_review": false
  },
  "prompt": {
    "system": "",
    "user": "You\u2019re an expert financial advisor. Tell the user how to change their credit card setup to maximize EV over the next 3 years.\n\n# **Credit card recommendation please**\n\nCREDIT PROFILE\n\n* Current credit cards you are the primary account holder of: Bank of America - Customized Cash Rewards Visa Signature, $2,000 limit, March 25th, 2022  \n* FICO Scores with source (see note on FICO score sources below): Experian 717  \n* Oldest credit card account age with you as primary name on the account: 3 years, 3 months  \n* Number of personal credit cards approved for in the past 6 months: 0  \n* Number of personal credit cards approved for in the past 12 months: 0  \n* Number of personal credit cards approved for in the past 24 months: 0  \n* Annual income $: \\~$78K annually\n\nCATEGORIES\n\n* OK with category-specific cards?: Yes  \n* OK with rotating category cards?: No  \n* Estimate average monthly spend in the categories below. Only include what you can pay by credit card.  \n* Dining $: \\~$500  \n* Groceries $: \\~$400 Costco only  \n* Gas $: \\~$280 (I only use Chevron)  \n* Travel $: \\~$4,000  \n* Do you plan on using this card abroad for a significant length of time (study abroad, digital nomad, expat, extended travel)?: No  \n* Any other categories (examples: phone/internet, insurance) or stores (example: Amazon) with significant, regular credit card spend (the more you specify, the better): No  \n* Any other significant, regular credit card spend you didn't include above?: $28 a month for gym?  \n* Can you pay rent by credit card? If yes, list rent amount and if there's a fee for paying by credit card: N/A\n\nMEMBERSHIPS & SUBSCRIPTIONS (delete lines that don't apply)\n\n* Current member of Amazon Prime?: No  \n* Current Verizon postpaid customer?: No  \n* Current member of Costco or Sam's Club? On my brother\u2019s Costco membership  \n* Currently paying $13.99/month or more for Disney Bundle (Disney+ / Hulu / EPSN+) or other Hulu services? Disney+, CrunchyRoll, Peacock  \n* Current member of Chase, US Bank or any other big bank?: Chase account  \n* Active US military?: No  \n* Are you open to Business Cards?: No PURPOSE  \n* What's the purpose of your next card (choose ONE)?: Higher Credit limit, hopefully use for traveling, no preferred airline or hotel  \n* Do you have any cards you've been looking at? A good friend recommended Chase Sapphire Reserve and/or Amex Platinum\n\nI am looking to travel a lot this summer so I am not sure what type of a new credit card and how high of a limit to ask for that will let me best take advantage of the spending I\u2019ll be doing on flight tickets. A few months ago I got my BofA credit card increased from $1,500 to $2,000. Thank you in advanced to whoever takes the time to help\\!\n\nEdit: I have no preference, I am really not sure what it best for me.",
    "knowledge_base_ref": "knowledge_base.md",
    "kb_filter": [
      "American Express Gold",
      "American Express Platinum",
      "Amex Blue Business Plus",
      "Amex Blue Cash Preferred",
      "Amex Business Gold",
      "Amex Business Platinum",
      "Amex Green",
      "Bilt Blue",
      "Bilt Obsidian",
      "Bilt Palladium",
      "Capital One Venture",
      "Capital One Venture X",
      "Chase Freedom Flex",
      "Chase Freedom Unlimited",
      "Chase Ink Business Preferred",
      "Chase Ink Business Unlimited",
      "Chase Sapphire Preferred",
      "Chase Sapphire Reserve",
      "Citi Custom Cash",
      "Citi Double Cash",
      "Citi Strata Premier",
      "Wells Fargo Autograph"
    ],
    "system_prompt_ref": "system_prompt_template.md"
  },
  "scoring": {
    "dimensions": {
      "card_selection": {
        "weight": 0.25,
        "type": "automated",
        "description": "F1 of recommended cards vs. optimal set",
        "checks": {
          "expected_cards": [
            "American Express Gold",
            "Chase Sapphire Reserve",
            "Chase Ink Business Preferred",
            "American Express Platinum",
            "Capital One Venture"
          ]
        },
        "hard_constraint": false
      },
      "ev_accuracy": {
        "weight": 0.3,
        "type": "automated",
        "description": "EV accuracy vs. computed ground truth",
        "reference": {
          "user_profile": {
            "monthly_spend": {
              "dining": 300,
              "groceries": 400,
              "everything_else": 500
            },
            "time_horizon_months": 36
          },
          "expected_card_ids": [
            "american_express_gold",
            "chase_sapphire_reserve",
            "chase_ink_business_preferred",
            "american_express_platinum",
            "capital_one_venture"
          ],
          "ev_tolerance_pct": 0.1
        }
      },
      "factual_fidelity": {
        "weight": 0.3,
        "type": "automated",
        "description": "Accuracy of factual claims about cards",
        "reference": {
          "extracted_claims": null,
          "reference_ev_usd": null
        }
      },
      "constraint_compliance": {
        "weight": 0.15,
        "type": "automated",
        "description": "Respects user constraints and issuer rules",
        "checks": {
          "expected_cards": [
            "American Express Gold",
            "Chase Sapphire Reserve",
            "Chase Ink Business Preferred",
            "American Express Platinum",
            "Capital One Venture"
          ],
          "expected_housing_option": null
        },
        "hard_constraint": false
      }
    },
    "passing_threshold": 0.5,
    "hard_constraint_failure_zeroes_dimension": true
  },
  "reference_solution": {
    "_status": "COMPUTED",
    "recommended_cards": [
      "American Express Gold",
      "Chase Sapphire Reserve",
      "Chase Ink Business Preferred",
      "American Express Platinum",
      "Capital One Venture"
    ],
    "total_ev_usd": null,
    "ev_breakdown": null,
    "housing_option": null,
    "key_constraints_flags": [],
    "expert_notes": "Ground truth EV computed from card_database.json + user_profile at evaluation time."
  }
}