File size: 5,717 Bytes
f56a29b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 | /**
* JSON parsing with fallback strategies for AI-generated responses.
*/
import { jsonrepair } from 'jsonrepair';
import { createLogger } from '@/lib/logger';
const log = createLogger('Generation');
export function parseJsonResponse<T>(response: string): T | null {
// Strategy 1: Try to extract JSON from markdown code blocks (may have multiple)
const codeBlockMatches = response.matchAll(/```(?:json)?\s*([\s\S]*?)```/g);
for (const match of codeBlockMatches) {
const extracted = match[1].trim();
// Only try if it looks like JSON (starts with { or [)
if (extracted.startsWith('{') || extracted.startsWith('[')) {
const result = tryParseJson<T>(extracted);
if (result !== null) {
log.debug('Successfully parsed JSON from code block');
return result;
}
}
}
// Strategy 2: Try to find JSON structure directly in response (no code block)
// Look for array or object start
const jsonStartArray = response.indexOf('[');
const jsonStartObject = response.indexOf('{');
if (jsonStartArray !== -1 || jsonStartObject !== -1) {
// Prefer the structure that appears first
const startIndex =
jsonStartArray === -1
? jsonStartObject
: jsonStartObject === -1
? jsonStartArray
: Math.min(jsonStartArray, jsonStartObject);
// Find the matching close bracket
let depth = 0;
let endIndex = -1;
let inString = false;
let escapeNext = false;
for (let i = startIndex; i < response.length; i++) {
const char = response[i];
if (escapeNext) {
escapeNext = false;
continue;
}
if (char === '\\' && inString) {
escapeNext = true;
continue;
}
if (char === '"' && !escapeNext) {
inString = !inString;
continue;
}
if (!inString) {
if (char === '[' || char === '{') depth++;
else if (char === ']' || char === '}') {
depth--;
if (depth === 0) {
endIndex = i;
break;
}
}
}
}
if (endIndex !== -1) {
const jsonStr = response.substring(startIndex, endIndex + 1);
const result = tryParseJson<T>(jsonStr);
if (result !== null) {
log.debug('Successfully parsed JSON from response body');
return result;
}
}
}
// Strategy 3: Last resort - try the whole response
const result = tryParseJson<T>(response.trim());
if (result !== null) {
log.debug('Successfully parsed raw response as JSON');
return result;
}
log.error('Failed to parse JSON from response');
log.error('Raw response (first 500 chars):', response.substring(0, 500));
return null;
}
/**
* Try to parse JSON with various fixes for common AI response issues
*/
export function tryParseJson<T>(jsonStr: string): T | null {
// Attempt 1: Try parsing as-is
try {
return JSON.parse(jsonStr) as T;
} catch {
// Continue to fix attempts
}
// Attempt 2: Fix common JSON issues from AI responses
try {
let fixed = jsonStr;
// Fix 1: Handle LaTeX-style escapes that break JSON (e.g., \frac, \left, \right, \times, etc.)
// These are common in math content and need to be double-escaped
// Match backslash followed by letters (LaTeX commands) inside strings,
// but skip valid JSON escape sequences (\b, \f, \n, \r, \t, \u)
fixed = fixed.replace(/"([^"\\]*(?:\\.[^"\\]*)*)"/g, (_match, content) => {
// Double-escape backslash+letter ONLY for non-JSON-escape letters
const fixedContent = content.replace(/\\([a-zA-Z])/g, (_m: string, ch: string) => {
// Preserve valid JSON escape sequences
if ('bfnrtu'.includes(ch)) return `\\${ch}`;
return `\\\\${ch}`;
});
return `"${fixedContent}"`;
});
// Fix 2: Fix other invalid escape sequences (e.g., \S, \L, etc.)
// Valid JSON escapes: \", \\, \/, \b, \f, \n, \r, \t, \uXXXX
fixed = fixed.replace(/\\([^"\\\/bfnrtu\n\r])/g, (match, char) => {
// If it's a letter, it's likely a LaTeX command
if (/[a-zA-Z]/.test(char)) {
return '\\\\' + char;
}
return match;
});
// Fix 3: Try to fix truncated JSON arrays/objects
const trimmed = fixed.trim();
if (trimmed.startsWith('[') && !trimmed.endsWith(']')) {
const lastCompleteObj = fixed.lastIndexOf('}');
if (lastCompleteObj > 0) {
fixed = fixed.substring(0, lastCompleteObj + 1) + ']';
log.warn('Fixed truncated JSON array');
}
} else if (trimmed.startsWith('{') && !trimmed.endsWith('}')) {
// Try to close incomplete object
const openBraces = (fixed.match(/{/g) || []).length;
const closeBraces = (fixed.match(/}/g) || []).length;
if (openBraces > closeBraces) {
fixed += '}'.repeat(openBraces - closeBraces);
log.warn('Fixed truncated JSON object');
}
}
return JSON.parse(fixed) as T;
} catch {
// Continue to next attempt
}
// Attempt 3: Use jsonrepair to fix malformed JSON (e.g. unescaped quotes in Chinese text)
try {
const repaired = jsonrepair(jsonStr);
return JSON.parse(repaired) as T;
} catch {
// Continue to next attempt
}
// Attempt 4: More aggressive fixing - remove control characters
try {
let fixed = jsonStr;
// Remove or escape control characters
fixed = fixed.replace(/[\x00-\x1F\x7F]/g, (char) => {
switch (char) {
case '\n':
return '\\n';
case '\r':
return '\\r';
case '\t':
return '\\t';
default:
return '';
}
});
return JSON.parse(fixed) as T;
} catch {
return null;
}
}
|