File size: 5,386 Bytes
2312199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
{
  "task_id": "medium_02",
  "version": "1.0.0",
  "created_at": "2026-03-11",
  "metadata": {
    "domain": "credit_card_optimization",
    "difficulty": "medium",
    "task_number": 2,
    "complexity_hint": {
      "max_tokens": 6000,
      "expected_output": "multi-card strategy with 2-year timeline and EV breakdown"
    },
    "requires_human_review": true
  },
  "prompt": {
    "system": "",
    "user": "You are a financial advisor tasked with building the following user\u2019s credit card strategy for the next 2 years in order to maximize the user\u2019s expected value while staying within their constraints, assuming all offers stay constant.\n\nUser profile:\n\n* A couple with a combined household income of $250k, one had a good credit score and one has an excellent credit score  \n* Monthly spending: $6,500 total \u2014 $2,000 groceries & dining (family of 4, prioritizes organic/healthy options), $1,000 gas & commuting, $500 online shopping, $400 utilities, $600 entertainment (movies, subscriptions), rest miscellaneous  \n* Already has 2 credit cards (one rotating 5% categories card, one 1.5% cash back on everything card); worried about \"too many inquiries hurting score for upcoming mortgage\"  \n* Prefers cards with simple cards, doesn\u2019t want to manage more than 1 monthly credit per card  \n* Wants to avoid foreign transaction fees for occasional vacations  \n* Pays in full every month  \n* Goal: Maximize rewards for family essentials over next 12\u201324 months, including sign-up bonuses, but prioritize family-friendly perks like purchase protection and perk simplicity  \n* Only willing to apply for up to 1 new card each per year.  \n* Do NOT recommend any card with AF \\> $50 unless the math clearly shows \\>$150 net value/year after fee. If no card fits well without major compromises, say so and explain why.\n\nOutline your strategy that complies with the user\u2019s requests and include a detailed expected value calculation.",
    "knowledge_base_ref": "knowledge_base.md",
    "kb_filter": [
      "Capital One Venture X",
      "American Express Gold",
      "Chase Sapphire Preferred",
      "Chase Sapphire Reserve",
      "Chase Freedom Flex",
      "Chase Freedom Unlimited",
      "Amex Blue Cash Preferred",
      "Citi Double Cash",
      "Amex Green",
      "Capital One Venture",
      "American Express Platinum",
      "Citi Strata Premier",
      "Wells Fargo Autograph",
      "BofA Premium Rewards",
      "Bilt Palladium"
    ],
    "system_prompt_ref": "system_prompt_template.md"
  },
  "scoring": {
    "dimensions": {
      "constraint_compliance": {
        "weight": 0.3,
        "type": "automated",
        "description": "Hard rule checks: velocity limits, eligibility, user constraints",
        "checks": {
          "velocity_rules": null,
          "eligibility_rules": null,
          "user_constraints": null,
          "expected_cards": [
            "Capital One Venture X",
            "American Express Gold",
            "Chase Sapphire Preferred"
          ],
          "expected_housing_option": null,
          "key_constraints_flags": [
            "couple_strategy",
            "1_card_per_person_per_year",
            "annual_fee_threshold_150_net",
            "mortgage_concerns"
          ]
        },
        "hard_constraint": false
      },
      "ev_accuracy": {
        "weight": 0.4,
        "type": "automated",
        "description": "EV calculation accuracy vs. reference solution",
        "reference": {
          "reference_ev_usd": 13566.5,
          "ev_tolerance_pct": 0.05
        }
      },
      "reasoning_quality": {
        "weight": 0.2,
        "type": "human",
        "description": "Quality of tradeoff articulation and strategic reasoning (0-3 scale)",
        "rubric": {
          "0": "No reasoning or incorrect reasoning",
          "1": "Surface-level reasoning, misses key tradeoffs",
          "2": "Correct tradeoffs identified with clear justification",
          "3": "Expert-level nuance including edge cases and constraint interactions"
        },
        "score": null
      },
      "constraint_prioritization": {
        "weight": 0.1,
        "type": "human",
        "description": "Correct handling of ambiguity and conflicting constraints",
        "score": null
      }
    },
    "passing_threshold": 0.6,
    "hard_constraint_failure_zeroes_dimension": true
  },
  "reference_solution": {
    "_status": "EXPERT_REVIEWED",
    "recommended_cards": [
      "Capital One Venture X",
      "American Express Gold",
      "Chase Sapphire Preferred"
    ],
    "total_ev_usd": 13566.5,
    "ev_breakdown": {
      "signup_bonuses_usd": 5662.5,
      "ongoing_rewards_usd": 7716.0,
      "credits_usd": 1818.0,
      "annual_fees_usd": -1630.0,
      "other_usd": 0.0
    },
    "housing_option": null,
    "key_constraints_flags": [
      "couple_strategy",
      "1_card_per_person_per_year",
      "annual_fee_threshold_150_net",
      "mortgage_concerns"
    ],
    "expert_notes": "Couple strategy: Partner 1 gets Venture X yr1, CSP yr2. Partner 2 gets Amex Gold yr1, CSP yr2. Amex Gold 4x on $2k dining/grocery. Venture X 2x on $4.5k other. Both CSPs month 13-14 for bonus hunting then back to Gold+VX. Amex credits: $848 (resy $100 + dining $120 + uber $120 + dunkin $84 * 2yr). Cap1 credits: $970. Welcome offers: $5,662.50."
  }
}