File size: 5,186 Bytes
2312199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
{
  "task_id": "medium_04",
  "version": "1.0.0",
  "created_at": "2026-03-11",
  "metadata": {
    "domain": "credit_card_optimization",
    "difficulty": "medium",
    "task_number": 4,
    "complexity_hint": {
      "max_tokens": 6000,
      "expected_output": "multi-card strategy with 2-year timeline and EV breakdown"
    },
    "requires_human_review": true
  },
  "prompt": {
    "system": "",
    "user": "You are a financial advisor tasked with building the following user\u2019s credit card strategy for the next 2 years in order to maximize the user\u2019s expected value while staying within their constraints, assuming all offers stay constant.\n\nUser profile:\n\n* Age 31, freelance income $70k\u2013$130k/year (highly variable, averages $95k), excellent credit score  \n* Monthly spending: $3,500, of which $1,200 is groceries/dining, $800 marketing/software (business), $500 travel (client trips), $400 utilities, $300 debt payments (currently $8k at 18% APR from old card), rest variable/misc  \n* Already has 3 cards (Amex Blue Business Plus, Chase Ink Preferred, Citi Double Cash). concerned about \"variable income making minimum spends risky\" regarding welcome offers  \n* Concerned about mixing business/personal spend for tax reasons  \n* Prefers no AF or \\< $95 offset easily  \n* Will pay in full on new spending and new cards, but still has that aforementioned debt  \n* Only willing to add 1\u20132 new cards. \n\nWhat should the user\u2019s credit card strategy be over the next 2 years given their entire situation in order to optimize EV without compromising on their concerns or constraints? Include a detailed expected value calculation for that strategy. Keep in mind credit card interest counts negatively toward EV.",
    "knowledge_base_ref": "knowledge_base.md",
    "kb_filter": [
      "Chase Ink Business Unlimited",
      "Capital One Venture X",
      "Amex Blue Business Plus",
      "Chase Ink Business Preferred",
      "Citi Double Cash",
      "Capital One Venture X Business",
      "Amex Business Gold",
      "Chase Sapphire Preferred",
      "Amex Blue Cash Preferred",
      "Chase Freedom Unlimited",
      "Capital One Venture",
      "Amex Blue Business Cash",
      "Citi Strata Premier",
      "Wells Fargo Autograph",
      "Chase Ink Business Premier"
    ],
    "system_prompt_ref": "system_prompt_template.md"
  },
  "scoring": {
    "dimensions": {
      "constraint_compliance": {
        "weight": 0.3,
        "type": "automated",
        "description": "Hard rule checks: velocity limits, eligibility, user constraints",
        "checks": {
          "velocity_rules": null,
          "eligibility_rules": null,
          "user_constraints": null,
          "expected_cards": [
            "Chase Ink Business Unlimited",
            "Capital One Venture X"
          ],
          "expected_housing_option": null,
          "key_constraints_flags": [
            "debt_payoff_priority",
            "variable_income",
            "business_personal_separation",
            "annual_fee_preference"
          ]
        },
        "hard_constraint": false
      },
      "ev_accuracy": {
        "weight": 0.4,
        "type": "automated",
        "description": "EV calculation accuracy vs. reference solution",
        "reference": {
          "reference_ev_usd": 8874.06,
          "ev_tolerance_pct": 0.05
        }
      },
      "reasoning_quality": {
        "weight": 0.2,
        "type": "human",
        "description": "Quality of tradeoff articulation and strategic reasoning (0-3 scale)",
        "rubric": {
          "0": "No reasoning or incorrect reasoning",
          "1": "Surface-level reasoning, misses key tradeoffs",
          "2": "Correct tradeoffs identified with clear justification",
          "3": "Expert-level nuance including edge cases and constraint interactions"
        },
        "score": null
      },
      "constraint_prioritization": {
        "weight": 0.1,
        "type": "human",
        "description": "Correct handling of ambiguity and conflicting constraints",
        "score": null
      }
    },
    "passing_threshold": 0.6,
    "hard_constraint_failure_zeroes_dimension": true
  },
  "reference_solution": {
    "_status": "EXPERT_REVIEWED",
    "recommended_cards": [
      "Chase Ink Business Unlimited",
      "Capital One Venture X"
    ],
    "total_ev_usd": 8874.06,
    "ev_breakdown": {
      "signup_bonuses_usd": 2925.0,
      "ongoing_rewards_usd": 3841.2,
      "credits_usd": 970.0,
      "annual_fees_usd": -980.0,
      "other_usd": 2117.86
    },
    "housing_option": null,
    "key_constraints_flags": [
      "debt_payoff_priority",
      "variable_income",
      "business_personal_separation",
      "annual_fee_preference"
    ],
    "expert_notes": "Pay off $8k debt ASAP (~2 months), saving $2,293.44 in future interest (only $175.58 accrued). Apply for Ink Business Unlimited ($1,700/mo biz spend, prepay to hit $6k bonus) and Venture X ($1,500/mo personal). Spending: VX for dining/grocery/misc, Ink Preferred 3x for marketing/travel/utilities. Other_usd includes $2,293.44 interest saved minus $175.58 interest paid = $2,117.86 net."
  }
}