taf-agent / js /json_cot_linter.js
karlexmarin's picture
v0.8.2 JSON CoT-aware Linter — anti-bullshit pack #8
b0f51d4
// JSON CoT-aware Linter (v0.8.2 anti-bullshit pack #8)
//
// Pain (Solutions Hub `structured_outputs`): JSON schema engines fail
// silently and CoT models commit to the answer before reasoning when
// the schema places `answer` before `reasoning` — constrained decoding
// emits keys in property order, so the model has to commit a final
// answer first and only then writes the rationale to justify it,
// defeating Chain-of-Thought entirely.
//
// Source citations:
// - https://collinwilkins.com/articles/structured-output (field
// ordering anti-pattern explained)
// - JSONSchemaBench (10K real schemas) — most are not CoT-aware
// - llguidance / Outlines / SGLang grammars — all respect property order
//
// Pure logic — no human strings. Returns codes+params; main.js does
// the i18n lookup.
// Heuristic field classifiers. Tested against real schemas + examples
// in the smoke harness; conservative on `other` to avoid mislabeling
// ambiguous fields (e.g. a `score` could be either reasoning-side or
// answer-side, but lexically it patterns as answer-side and the
// false-anti-pattern cost is only "review the schema", which is fine).
const REASONING_PATTERNS = [
/reason/i,
/think/i,
/thought/i,
/\bcot\b/i,
/chain.of.thought/i,
/analysis/i,
/\bexplanation\b/i,
/rationale/i,
/step.by.step/i,
/scratchpad/i,
/justif/i,
/deliberat/i,
/\bplan\b/i,
/\bwhy\b/i,
];
const ANSWER_PATTERNS = [
/^answer$/i,
/^result$/i,
/^output$/i,
/^response$/i,
/^final/i,
/^verdict$/i,
/^decision$/i,
/^prediction$/i,
/^conclusion$/i,
/^value$/i,
/^score$/i,
/^classif/i,
/^label$/i,
/^choice$/i,
/^selected/i,
];
export function classifyFieldName(name) {
if (typeof name !== "string" || !name) return "other";
for (const pat of REASONING_PATTERNS) {
if (pat.test(name)) return "reasoning";
}
for (const pat of ANSWER_PATTERNS) {
if (pat.test(name)) return "answer";
}
return "other";
}
// Decide whether `parsed` is a JSON Schema (has `properties` / `$schema`
// / `type: object`) or a plain example object. Both have ordered keys
// in modern JS (ES2015+ insertion-order preservation for non-integer
// string keys), and constrained decoders honor that order, so the
// detection works on either form.
function extractFieldOrder(parsed) {
if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) {
return { kind: "non_object", fields: [] };
}
// Schema form
if (parsed.properties && typeof parsed.properties === "object") {
return { kind: "schema", fields: Object.keys(parsed.properties) };
}
// Example object form
return { kind: "example", fields: Object.keys(parsed) };
}
function buildFieldAnnotations(fields) {
return fields.map((name, idx) => ({
name,
idx,
type: classifyFieldName(name),
}));
}
function suggestReorder(annotations) {
// Strategy: keep relative order within each type bucket, but emit
// reasoning fields first, then `other`, then answer fields. That
// way CoT runs first, the model can reference any context fields,
// and the answer comes last (constrained decoding commits the
// answer after the rationale).
const reasoning = annotations.filter(a => a.type === "reasoning").map(a => a.name);
const other = annotations.filter(a => a.type === "other").map(a => a.name);
const answer = annotations.filter(a => a.type === "answer").map(a => a.name);
return [...reasoning, ...other, ...answer];
}
// Public entry point. `text` is the user-pasted JSON Schema or example.
// Returns { code, params } where `code` is one of:
// - invalid_json
// - non_object
// - empty_fields
// - good_order (reasoning before answer — CoT honored)
// - anti_pattern (answer before reasoning — model commits early)
// - missing_reasoning (answer-like fields present, no reasoning)
// - missing_answer (reasoning fields present, no answer-like field)
// - no_cot_fields (object has fields but none look reasoning/answer)
export function lintJsonCot(text) {
if (typeof text !== "string" || !text.trim()) {
return { code: "empty_fields", params: { reason: "empty_input" } };
}
let parsed;
try {
parsed = JSON.parse(text);
} catch (e) {
return {
code: "invalid_json",
params: { error: String(e && e.message || e).slice(0, 200) },
};
}
const { kind, fields } = extractFieldOrder(parsed);
if (kind === "non_object") {
return { code: "non_object", params: { kind: Array.isArray(parsed) ? "array" : typeof parsed } };
}
if (fields.length === 0) {
return { code: "empty_fields", params: { kind } };
}
const annotations = buildFieldAnnotations(fields);
const reasoningIdx = annotations.findIndex(a => a.type === "reasoning");
const answerIdx = annotations.findIndex(a => a.type === "answer");
const hasReasoning = reasoningIdx !== -1;
const hasAnswer = answerIdx !== -1;
const baseParams = {
kind,
fields: annotations,
field_count: annotations.length,
reasoning_idx: hasReasoning ? reasoningIdx : null,
answer_idx: hasAnswer ? answerIdx : null,
suggested_order: suggestReorder(annotations),
};
if (!hasReasoning && !hasAnswer) {
return { code: "no_cot_fields", params: baseParams };
}
if (hasReasoning && !hasAnswer) {
return { code: "missing_answer", params: baseParams };
}
if (!hasReasoning && hasAnswer) {
return { code: "missing_reasoning", params: baseParams };
}
// Both present — order is decisive.
if (reasoningIdx < answerIdx) {
return { code: "good_order", params: baseParams };
}
return { code: "anti_pattern", params: baseParams };
}
// Build a properties-reordered JSON string preserving the original
// shape (schema vs example). Used by the UI to show "suggested fix".
export function reorderJsonText(text, suggestedOrder) {
let parsed;
try { parsed = JSON.parse(text); }
catch { return null; }
if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) return null;
// Reorder properties within a plain object preserving values.
const reorderObj = (obj, order) => {
const out = {};
// First emit suggested keys that exist on the object.
for (const k of order) {
if (Object.prototype.hasOwnProperty.call(obj, k)) out[k] = obj[k];
}
// Then any keys not in the suggested order (defensive: keeps unknowns).
for (const k of Object.keys(obj)) {
if (!Object.prototype.hasOwnProperty.call(out, k)) out[k] = obj[k];
}
return out;
};
if (parsed.properties && typeof parsed.properties === "object") {
parsed.properties = reorderObj(parsed.properties, suggestedOrder);
// If `required` array exists, mirror suggested order so generators
// that emit fields in `required[]` order also benefit. Keep only
// the keys originally present in `required`.
if (Array.isArray(parsed.required)) {
const wasRequired = new Set(parsed.required);
parsed.required = suggestedOrder.filter(k => wasRequired.has(k));
}
return JSON.stringify(parsed, null, 2);
}
return JSON.stringify(reorderObj(parsed, suggestedOrder), null, 2);
}