File size: 7,201 Bytes
b0f51d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
// JSON CoT-aware Linter (v0.8.2 anti-bullshit pack #8)
//
// Pain (Solutions Hub `structured_outputs`): JSON schema engines fail
// silently and CoT models commit to the answer before reasoning when
// the schema places `answer` before `reasoning` — constrained decoding
// emits keys in property order, so the model has to commit a final
// answer first and only then writes the rationale to justify it,
// defeating Chain-of-Thought entirely.
//
// Source citations:
//   - https://collinwilkins.com/articles/structured-output (field
//     ordering anti-pattern explained)
//   - JSONSchemaBench (10K real schemas) — most are not CoT-aware
//   - llguidance / Outlines / SGLang grammars — all respect property order
//
// Pure logic — no human strings. Returns codes+params; main.js does
// the i18n lookup.

// Heuristic field classifiers. Tested against real schemas + examples
// in the smoke harness; conservative on `other` to avoid mislabeling
// ambiguous fields (e.g. a `score` could be either reasoning-side or
// answer-side, but lexically it patterns as answer-side and the
// false-anti-pattern cost is only "review the schema", which is fine).
const REASONING_PATTERNS = [
  /reason/i,
  /think/i,
  /thought/i,
  /\bcot\b/i,
  /chain.of.thought/i,
  /analysis/i,
  /\bexplanation\b/i,
  /rationale/i,
  /step.by.step/i,
  /scratchpad/i,
  /justif/i,
  /deliberat/i,
  /\bplan\b/i,
  /\bwhy\b/i,
];

const ANSWER_PATTERNS = [
  /^answer$/i,
  /^result$/i,
  /^output$/i,
  /^response$/i,
  /^final/i,
  /^verdict$/i,
  /^decision$/i,
  /^prediction$/i,
  /^conclusion$/i,
  /^value$/i,
  /^score$/i,
  /^classif/i,
  /^label$/i,
  /^choice$/i,
  /^selected/i,
];

export function classifyFieldName(name) {
  if (typeof name !== "string" || !name) return "other";
  for (const pat of REASONING_PATTERNS) {
    if (pat.test(name)) return "reasoning";
  }
  for (const pat of ANSWER_PATTERNS) {
    if (pat.test(name)) return "answer";
  }
  return "other";
}

// Decide whether `parsed` is a JSON Schema (has `properties` / `$schema`
// / `type: object`) or a plain example object. Both have ordered keys
// in modern JS (ES2015+ insertion-order preservation for non-integer
// string keys), and constrained decoders honor that order, so the
// detection works on either form.
function extractFieldOrder(parsed) {
  if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) {
    return { kind: "non_object", fields: [] };
  }
  // Schema form
  if (parsed.properties && typeof parsed.properties === "object") {
    return { kind: "schema", fields: Object.keys(parsed.properties) };
  }
  // Example object form
  return { kind: "example", fields: Object.keys(parsed) };
}

function buildFieldAnnotations(fields) {
  return fields.map((name, idx) => ({
    name,
    idx,
    type: classifyFieldName(name),
  }));
}

function suggestReorder(annotations) {
  // Strategy: keep relative order within each type bucket, but emit
  // reasoning fields first, then `other`, then answer fields. That
  // way CoT runs first, the model can reference any context fields,
  // and the answer comes last (constrained decoding commits the
  // answer after the rationale).
  const reasoning = annotations.filter(a => a.type === "reasoning").map(a => a.name);
  const other     = annotations.filter(a => a.type === "other").map(a => a.name);
  const answer    = annotations.filter(a => a.type === "answer").map(a => a.name);
  return [...reasoning, ...other, ...answer];
}

// Public entry point. `text` is the user-pasted JSON Schema or example.
// Returns { code, params } where `code` is one of:
//   - invalid_json
//   - non_object
//   - empty_fields
//   - good_order        (reasoning before answer — CoT honored)
//   - anti_pattern      (answer before reasoning — model commits early)
//   - missing_reasoning (answer-like fields present, no reasoning)
//   - missing_answer    (reasoning fields present, no answer-like field)
//   - no_cot_fields     (object has fields but none look reasoning/answer)
export function lintJsonCot(text) {
  if (typeof text !== "string" || !text.trim()) {
    return { code: "empty_fields", params: { reason: "empty_input" } };
  }
  let parsed;
  try {
    parsed = JSON.parse(text);
  } catch (e) {
    return {
      code: "invalid_json",
      params: { error: String(e && e.message || e).slice(0, 200) },
    };
  }
  const { kind, fields } = extractFieldOrder(parsed);
  if (kind === "non_object") {
    return { code: "non_object", params: { kind: Array.isArray(parsed) ? "array" : typeof parsed } };
  }
  if (fields.length === 0) {
    return { code: "empty_fields", params: { kind } };
  }

  const annotations = buildFieldAnnotations(fields);
  const reasoningIdx = annotations.findIndex(a => a.type === "reasoning");
  const answerIdx    = annotations.findIndex(a => a.type === "answer");
  const hasReasoning = reasoningIdx !== -1;
  const hasAnswer    = answerIdx !== -1;

  const baseParams = {
    kind,
    fields: annotations,
    field_count: annotations.length,
    reasoning_idx: hasReasoning ? reasoningIdx : null,
    answer_idx: hasAnswer ? answerIdx : null,
    suggested_order: suggestReorder(annotations),
  };

  if (!hasReasoning && !hasAnswer) {
    return { code: "no_cot_fields", params: baseParams };
  }
  if (hasReasoning && !hasAnswer) {
    return { code: "missing_answer", params: baseParams };
  }
  if (!hasReasoning && hasAnswer) {
    return { code: "missing_reasoning", params: baseParams };
  }
  // Both present — order is decisive.
  if (reasoningIdx < answerIdx) {
    return { code: "good_order", params: baseParams };
  }
  return { code: "anti_pattern", params: baseParams };
}

// Build a properties-reordered JSON string preserving the original
// shape (schema vs example). Used by the UI to show "suggested fix".
export function reorderJsonText(text, suggestedOrder) {
  let parsed;
  try { parsed = JSON.parse(text); }
  catch { return null; }
  if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) return null;

  // Reorder properties within a plain object preserving values.
  const reorderObj = (obj, order) => {
    const out = {};
    // First emit suggested keys that exist on the object.
    for (const k of order) {
      if (Object.prototype.hasOwnProperty.call(obj, k)) out[k] = obj[k];
    }
    // Then any keys not in the suggested order (defensive: keeps unknowns).
    for (const k of Object.keys(obj)) {
      if (!Object.prototype.hasOwnProperty.call(out, k)) out[k] = obj[k];
    }
    return out;
  };

  if (parsed.properties && typeof parsed.properties === "object") {
    parsed.properties = reorderObj(parsed.properties, suggestedOrder);
    // If `required` array exists, mirror suggested order so generators
    // that emit fields in `required[]` order also benefit. Keep only
    // the keys originally present in `required`.
    if (Array.isArray(parsed.required)) {
      const wasRequired = new Set(parsed.required);
      parsed.required = suggestedOrder.filter(k => wasRequired.has(k));
    }
    return JSON.stringify(parsed, null, 2);
  }
  return JSON.stringify(reorderObj(parsed, suggestedOrder), null, 2);
}