File size: 4,892 Bytes
2312199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
{
  "task_id": "easy_04",
  "version": "1.0.0",
  "created_at": "2026-03-11",
  "metadata": {
    "domain": "credit_card_optimization",
    "difficulty": "easy",
    "task_number": 4,
    "complexity_hint": {
      "max_tokens": 4000,
      "expected_output": "single card recommendation with EV calculation"
    },
    "requires_human_review": true
  },
  "prompt": {
    "system": "",
    "user": "You are a financial advisor suggesting a business credit card.\n\nUser profile:\n\n* Age 35, side hustle revenue $30k/year (freelance), personal income $70k  \n* Monthly business spending: $1,500 total \u2014 $800 office supplies/ads, $400 travel/meetings, $300 dining  \n* Has 1 personal card; wants to keep business spend separate  \n* Prefers cash back instead of transferable points  \n* Pays in full every month  \n* Will only bother with an annual fee if it\u2019s worth more than $100 to him in excess of the annual fee\n\nGoal: Maximize rewards over the next 2 years.\n\nCard context: All non-airline/hotel credit cards",
    "knowledge_base_ref": "knowledge_base.md",
    "kb_filter": [
      "American Express Platinum",
      "American Express Gold",
      "Amex Green",
      "Amex Blue Business Plus",
      "Amex Blue Cash Preferred",
      "Amex Blue Business Cash",
      "Amex Business Platinum",
      "Amex Business Gold",
      "Chase Sapphire Preferred",
      "Chase Sapphire Reserve",
      "Chase Freedom Unlimited",
      "Chase Freedom Flex",
      "Chase Ink Business Preferred",
      "Chase Ink Business Unlimited",
      "Chase Ink Business Premier",
      "Chase Business Sapphire Reserve",
      "Chase Prime Visa",
      "Chase Amazon Visa",
      "Citi Strata Elite",
      "Citi Strata Premier",
      "Citi Strata",
      "Citi Double Cash",
      "Citi Diamond Preferred",
      "Citi Custom Cash",
      "Capital One Venture X",
      "Capital One Venture",
      "Capital One Savor",
      "Capital One Venture X Business",
      "Capital One Business Spark 2X Miles",
      "Capital One Business Spark 2X Cash",
      "Bilt Blue",
      "Bilt Obsidian",
      "Bilt Palladium",
      "Wells Fargo Autograph",
      "Wells Fargo Autograph Journey",
      "BofA Customized Cash Rewards",
      "BofA Unlimited Cash Rewards",
      "BofA Premium Rewards",
      "BofA Premium Rewards Elite",
      "BofA Travel Rewards",
      "US Bank Smartly"
    ],
    "system_prompt_ref": "system_prompt_template.md"
  },
  "scoring": {
    "dimensions": {
      "constraint_compliance": {
        "weight": 0.3,
        "type": "automated",
        "description": "Hard rule checks: velocity limits, eligibility, user constraints",
        "checks": {
          "velocity_rules": null,
          "eligibility_rules": null,
          "user_constraints": null,
          "expected_cards": [
            "Amex Blue Business Cash"
          ],
          "expected_housing_option": null,
          "key_constraints_flags": [
            "cash_back_preference",
            "annual_fee_threshold",
            "business_card"
          ]
        },
        "hard_constraint": false
      },
      "ev_accuracy": {
        "weight": 0.4,
        "type": "automated",
        "description": "EV calculation accuracy vs. reference solution",
        "reference": {
          "reference_ev_usd": 970.0,
          "ev_tolerance_pct": 0.05
        }
      },
      "reasoning_quality": {
        "weight": 0.2,
        "type": "human",
        "description": "Quality of tradeoff articulation and strategic reasoning (0-3 scale)",
        "rubric": {
          "0": "No reasoning or incorrect reasoning",
          "1": "Surface-level reasoning, misses key tradeoffs",
          "2": "Correct tradeoffs identified with clear justification",
          "3": "Expert-level nuance including edge cases and constraint interactions"
        },
        "score": null
      },
      "constraint_prioritization": {
        "weight": 0.1,
        "type": "human",
        "description": "Correct handling of ambiguity and conflicting constraints",
        "score": null
      }
    },
    "passing_threshold": 0.6,
    "hard_constraint_failure_zeroes_dimension": true
  },
  "reference_solution": {
    "_status": "EXPERT_REVIEWED",
    "recommended_cards": [
      "Amex Blue Business Cash"
    ],
    "total_ev_usd": 970.0,
    "ev_breakdown": {
      "signup_bonuses_usd": 250.0,
      "ongoing_rewards_usd": 720.0,
      "credits_usd": 0.0,
      "annual_fees_usd": 0.0,
      "other_usd": 0.0
    },
    "housing_option": null,
    "key_constraints_flags": [
      "cash_back_preference",
      "annual_fee_threshold",
      "business_card"
    ],
    "expert_notes": "Amex Blue Business Cash: no annual fee, 2% cash back on everything (well under cap), $250 signup bonus. EV = 2% * $1,500 * 24 + $250 = $970. Satisfies cash back preference and keeps business spend separate."
  }
}