gaurv007 commited on
Commit
74929b6
·
verified ·
1 Parent(s): d339e38

v4.0: Fix analyze route — proper SSE parsing + immediate JSON file download

Browse files
Files changed (1) hide show
  1. web/app/api/analyze/route.ts +103 -89
web/app/api/analyze/route.ts CHANGED
@@ -14,7 +14,7 @@ export async function POST(req: NextRequest) {
14
  );
15
  }
16
 
17
- // Step 1: Submit analysis to Gradio Space API
18
  const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/_analysis_and_index`, {
19
  method: "POST",
20
  headers: { "Content-Type": "application/json" },
@@ -28,129 +28,143 @@ export async function POST(req: NextRequest) {
28
  const { event_id } = await submitRes.json();
29
  if (!event_id) throw new Error("No event_id from Gradio");
30
 
31
- // Step 2: Poll for result (SSE endpoint)
32
- const resultRes = await fetch(
33
- `${GRADIO_URL}/gradio_api/call/_analysis_and_index/${event_id}`,
34
- { headers: { Accept: "text/event-stream" } }
35
- );
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- if (!resultRes.ok) {
38
- throw new Error(`Gradio result failed: ${resultRes.status}`);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  }
40
 
41
- const resultText = await resultRes.text();
42
-
43
- // Parse SSE — find the "data:" line after "event: complete"
44
- const dataMatch = resultText.match(/event:\s*complete\s*\ndata:\s*(.+)/);
45
- if (!dataMatch) throw new Error("No complete event from Gradio");
46
-
47
- const gradioData = JSON.parse(dataMatch[1]);
48
- // gradioData is an array:
49
- // [0] = summary HTML
50
- // [1] = clauses HTML
51
- // [2] = entities HTML
52
- // [3] = contradictions HTML
53
- // [4] = document HTML
54
- // [5] = obligations HTML
55
- // [6] = compliance HTML
56
- // [7] = redlining HTML
57
- // [8] = JSON file object {path, url, ...}
58
- // [9] = CSV file object
59
- // [10] = status string
60
- // [11] = analysis result dict (or null — it's gr.State, not directly in API output)
61
- // ... (chunks, embeddings, chatbot status — also gr.State, not in output)
62
-
63
- // Step 3: Download the JSON report file which has structured data
64
  const jsonFileObj = gradioData[8];
65
  if (!jsonFileObj?.url) {
66
- throw new Error("No JSON report in response");
67
  }
68
 
 
69
  const jsonRes = await fetch(jsonFileObj.url);
70
- if (!jsonRes.ok) throw new Error("Failed to download JSON report");
71
  const analysisData = await jsonRes.json();
72
 
73
- // Step 4: Transform to the format the frontend expects
74
- const clauseResults: any[] = [];
 
 
 
 
 
 
75
  for (const cr of (analysisData.clauses || [])) {
76
- // Find or create the clause entry
77
- let existing = clauseResults.find(c => c.text === cr.text);
78
- if (!existing) {
79
- existing = { text: cr.text, categories: [] };
80
- clauseResults.push(existing);
81
  }
82
- existing.categories.push({
83
  name: cr.label,
84
  severity: cr.risk,
85
  confidence: cr.confidence,
86
  description: cr.description,
87
  });
88
  }
 
89
 
90
- // Extract redlines from the analysis (parse from HTML if needed)
91
- // Since redlines aren't in the JSON report, parse from the HTML
92
  const redlines: any[] = [];
93
- const redlineHtml = gradioData[7] || "";
94
- // Simple regex extraction from redlining HTML
95
- const redlineBlocks = redlineHtml.split('border-left:4px solid');
96
- for (let i = 1; i < redlineBlocks.length; i++) {
97
- const block = redlineBlocks[i];
98
- const labelMatch = block.match(/font-weight:600[^>]*>([^<]+)<\/span>\s*<span[^>]*>([^<]+)/);
99
- const originalMatch = block.match(/<del>([^<]*)<\/del>/);
100
- const safeMatch = block.match(/Suggested Alternative<\/div>\s*<div[^>]*>([^<]*(?:<[^/][^>]*>[^<]*)*)/);
101
- const legalMatch = block.match(/Legal Basis<\/div>\s*<div[^>]*>([^<]+)/);
102
- const consumerMatch = block.match(/Consumer Standard<\/div>\s*<div[^>]*>([^<]+)/);
103
- const tierMatch = block.match(/(LLM Refined|Template)/);
104
-
105
- if (labelMatch) {
106
- // Clean HTML from safe alternative
107
- let safeText = safeMatch ? safeMatch[1].replace(/<[^>]+>/g, '').trim() : '';
108
- redlines.push({
109
- clause_label: labelMatch[1].trim(),
110
- risk_level: labelMatch[2].trim(),
111
- original_text: originalMatch ? originalMatch[1].trim() : '',
112
- safe_alternative: safeText,
113
- legal_basis: legalMatch ? legalMatch[1].trim() : '',
114
- consumer_standard: consumerMatch ? consumerMatch[1].trim() : '',
115
- tier: tierMatch ? (tierMatch[1] === 'LLM Refined' ? 'llm_refined' : 'template') : 'template',
116
- });
117
  }
118
  }
119
 
120
- // Parse risk score and grade from summary HTML
121
- const scoreMatch = (gradioData[0] || '').match(/font-size:48px[^>]*>(\d+)/);
122
- const gradeMatch = (gradioData[0] || '').match(/Grade\s+([A-F])/);
123
- const totalMatch = (gradioData[0] || '').match(/(\d+)\s+clauses\s+analyzed/);
124
- const flaggedMatch = (gradioData[0] || '').match(/(\d+)\s+flagged/);
125
-
126
- // Parse severity counts from summary HTML
127
- const critMatch = (gradioData[0] || '').match(/color:#dc2626[^>]*>(\d+)<\/div>\s*<div[^>]*>Critical/);
128
- const highMatch2 = (gradioData[0] || '').match(/color:#ea580c[^>]*>(\d+)<\/div>\s*<div[^>]*>High/);
129
- const medMatch = (gradioData[0] || '').match(/color:#ca8a04[^>]*>(\d+)<\/div>\s*<div[^>]*>Medium/);
130
- const lowMatch = (gradioData[0] || '').match(/color:#16a34a[^>]*>(\d+)<\/div>\s*<div[^>]*>Low/);
131
-
132
- const riskScore = scoreMatch ? parseInt(scoreMatch[1]) : 0;
133
- const grade = gradeMatch ? gradeMatch[1] : 'A';
134
 
135
  return NextResponse.json({
136
  risk_score: riskScore,
137
- grade: grade,
138
- total_clauses: totalMatch ? parseInt(totalMatch[1]) : clauseResults.length,
139
- flagged_count: flaggedMatch ? parseInt(flaggedMatch[1]) : clauseResults.length,
140
- results: clauseResults,
141
  entities: analysisData.entities || [],
142
  contradictions: analysisData.contradictions || [],
143
  obligations: analysisData.obligations || [],
144
  compliance: analysisData.compliance || {},
145
- redlines: redlines,
146
- model: analysisData.metadata?.model?.includes("loaded") ? "ml" : "regex",
147
  latency_ms: 0,
148
- session_id: null, // Gradio API doesn't expose gr.State
149
  });
150
  } catch (error: any) {
151
  console.error("Analyze error:", error.message);
152
  return NextResponse.json(
153
- { error: "Analysis failed: " + (error.message || "Unknown error. The backend may be starting up — try again in 30 seconds.") },
154
  { status: 500 }
155
  );
156
  }
 
14
  );
15
  }
16
 
17
+ // Step 1: Submit to Gradio Space
18
  const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/_analysis_and_index`, {
19
  method: "POST",
20
  headers: { "Content-Type": "application/json" },
 
28
  const { event_id } = await submitRes.json();
29
  if (!event_id) throw new Error("No event_id from Gradio");
30
 
31
+ // Step 2: Poll for result (SSE)
32
+ // The Gradio API streams but we need the full response
33
+ let resultText = "";
34
+ let attempts = 0;
35
+ const maxAttempts = 60; // 60 seconds max
36
+
37
+ while (attempts < maxAttempts) {
38
+ const resultRes = await fetch(
39
+ `${GRADIO_URL}/gradio_api/call/_analysis_and_index/${event_id}`,
40
+ { headers: { Accept: "text/event-stream" } }
41
+ );
42
+
43
+ resultText = await resultRes.text();
44
+
45
+ if (resultText.includes("event: complete")) break;
46
+ if (resultText.includes("event: error")) {
47
+ const errMatch = resultText.match(/data:\s*(.+)/);
48
+ throw new Error(errMatch ? errMatch[1] : "Analysis failed in backend");
49
+ }
50
+
51
+ // Wait 1 second and retry
52
+ await new Promise(r => setTimeout(r, 1000));
53
+ attempts++;
54
+ }
55
+
56
+ if (!resultText.includes("event: complete")) {
57
+ throw new Error("Analysis timed out");
58
+ }
59
 
60
+ // Step 3: Parse the SSE data
61
+ // Format: "event: complete\ndata: [...]"
62
+ // The data contains HTML with literal newlines, so we need to find 'data: ' after 'event: complete'
63
+ const completeIdx = resultText.indexOf("event: complete");
64
+ const dataIdx = resultText.indexOf("data: ", completeIdx);
65
+ if (dataIdx === -1) throw new Error("No data in response");
66
+
67
+ const dataStr = resultText.substring(dataIdx + 6).trim();
68
+
69
+ // Parse JSON — the HTML strings contain control characters so we need to handle that
70
+ // In JS, JSON.parse is more lenient with control chars in strings than Python's strict mode
71
+ let gradioData: any[];
72
+ try {
73
+ gradioData = JSON.parse(dataStr);
74
+ } catch {
75
+ // If direct parse fails, try replacing problematic control characters
76
+ const cleaned = dataStr.replace(/[\x00-\x1f]/g, (ch: string) => {
77
+ if (ch === "\n") return "\\n";
78
+ if (ch === "\r") return "\\r";
79
+ if (ch === "\t") return "\\t";
80
+ return "";
81
+ });
82
+ gradioData = JSON.parse(cleaned);
83
  }
84
 
85
+ // Step 4: Download the JSON report file (structured data)
86
+ // gradioData[8] is the JSON file object with { url, path, ... }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  const jsonFileObj = gradioData[8];
88
  if (!jsonFileObj?.url) {
89
+ throw new Error("No JSON report generated");
90
  }
91
 
92
+ // Download immediately (temp files expire quickly)
93
  const jsonRes = await fetch(jsonFileObj.url);
94
+ if (!jsonRes.ok) throw new Error("Failed to download analysis JSON");
95
  const analysisData = await jsonRes.json();
96
 
97
+ // Step 5: Transform to frontend format
98
+ const riskScore = analysisData.risk?.score ?? 0;
99
+ const grade = analysisData.risk?.grade ?? "A";
100
+ const totalClauses = analysisData.metadata?.total_clauses ?? 0;
101
+ const flaggedCount = analysisData.metadata?.flagged_clauses ?? 0;
102
+
103
+ // Group clauses by text (multiple labels per clause)
104
+ const clauseMap = new Map<string, any>();
105
  for (const cr of (analysisData.clauses || [])) {
106
+ if (!clauseMap.has(cr.text)) {
107
+ clauseMap.set(cr.text, { text: cr.text, categories: [] });
 
 
 
108
  }
109
+ clauseMap.get(cr.text)!.categories.push({
110
  name: cr.label,
111
  severity: cr.risk,
112
  confidence: cr.confidence,
113
  description: cr.description,
114
  });
115
  }
116
+ const results = Array.from(clauseMap.values());
117
 
118
+ // Parse redlines from HTML (gradioData[7])
 
119
  const redlines: any[] = [];
120
+ const redlineHtml = typeof gradioData[7] === "string" ? gradioData[7] : "";
121
+ if (redlineHtml.includes("Clause Redlining")) {
122
+ // Split by redline card borders
123
+ const blocks = redlineHtml.split(/border-left:4px solid #/);
124
+ for (let i = 1; i < blocks.length; i++) {
125
+ const block = blocks[i];
126
+ const labelMatch = block.match(/font-weight:600[^>]*>([^<]+)<\/span>\s*<span[^>]*font-weight:600[^>]*>([^<]+)/);
127
+ const origMatch = block.match(/<del>([^<]*)<\/del>/);
128
+ const safeBlock = block.match(/Suggested Alternative[\s\S]*?<div[^>]*color:#166534[^>]*>([\s\S]*?)<\/div>/);
129
+ const legalMatch = block.match(/Legal Basis<\/div>\s*<div[^>]*>([^<]+)/);
130
+ const consumerMatch = block.match(/Consumer Standard<\/div>\s*<div[^>]*>([^<]+)/);
131
+ const isLLM = block.includes("LLM Refined");
132
+
133
+ if (labelMatch) {
134
+ redlines.push({
135
+ clause_label: labelMatch[1].trim(),
136
+ risk_level: labelMatch[2].trim(),
137
+ original_text: origMatch ? origMatch[1].trim() : "",
138
+ safe_alternative: safeBlock ? safeBlock[1].replace(/<[^>]+>/g, "").trim() : "",
139
+ legal_basis: legalMatch ? legalMatch[1].trim() : "",
140
+ consumer_standard: consumerMatch ? consumerMatch[1].trim() : "",
141
+ tier: isLLM ? "llm_refined" : "template",
142
+ });
143
+ }
144
  }
145
  }
146
 
147
+ const modelStatus = analysisData.metadata?.model || "";
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  return NextResponse.json({
150
  risk_score: riskScore,
151
+ grade,
152
+ total_clauses: totalClauses,
153
+ flagged_count: flaggedCount,
154
+ results,
155
  entities: analysisData.entities || [],
156
  contradictions: analysisData.contradictions || [],
157
  obligations: analysisData.obligations || [],
158
  compliance: analysisData.compliance || {},
159
+ redlines,
160
+ model: modelStatus.includes("loaded") ? "ml" : "regex",
161
  latency_ms: 0,
162
+ session_id: null,
163
  });
164
  } catch (error: any) {
165
  console.error("Analyze error:", error.message);
166
  return NextResponse.json(
167
+ { error: "Analysis failed: " + (error.message || "Try again in 30 seconds.") },
168
  { status: 500 }
169
  );
170
  }