ClauseGuard / web /app /api /analyze /route.ts
gaurv007's picture
v4.0: Fix analyze route — proper SSE parsing + immediate JSON file download
74929b6 verified
raw
history blame
6.47 kB
import { NextRequest, NextResponse } from "next/server";
const GRADIO_URL = process.env.CLAUSEGUARD_GRADIO_URL || "https://gaurv007-clauseguard.hf.space";
export async function POST(req: NextRequest) {
try {
const body = await req.json();
const { text } = body;
if (!text || typeof text !== "string" || text.trim().length < 50) {
return NextResponse.json(
{ error: "Please provide at least 50 characters of text to analyze." },
{ status: 400 }
);
}
// Step 1: Submit to Gradio Space
const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/_analysis_and_index`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ data: [text] }),
});
if (!submitRes.ok) {
throw new Error(`Gradio submit failed: ${submitRes.status}`);
}
const { event_id } = await submitRes.json();
if (!event_id) throw new Error("No event_id from Gradio");
// Step 2: Poll for result (SSE)
// The Gradio API streams but we need the full response
let resultText = "";
let attempts = 0;
const maxAttempts = 60; // 60 seconds max
while (attempts < maxAttempts) {
const resultRes = await fetch(
`${GRADIO_URL}/gradio_api/call/_analysis_and_index/${event_id}`,
{ headers: { Accept: "text/event-stream" } }
);
resultText = await resultRes.text();
if (resultText.includes("event: complete")) break;
if (resultText.includes("event: error")) {
const errMatch = resultText.match(/data:\s*(.+)/);
throw new Error(errMatch ? errMatch[1] : "Analysis failed in backend");
}
// Wait 1 second and retry
await new Promise(r => setTimeout(r, 1000));
attempts++;
}
if (!resultText.includes("event: complete")) {
throw new Error("Analysis timed out");
}
// Step 3: Parse the SSE data
// Format: "event: complete\ndata: [...]"
// The data contains HTML with literal newlines, so we need to find 'data: ' after 'event: complete'
const completeIdx = resultText.indexOf("event: complete");
const dataIdx = resultText.indexOf("data: ", completeIdx);
if (dataIdx === -1) throw new Error("No data in response");
const dataStr = resultText.substring(dataIdx + 6).trim();
// Parse JSON — the HTML strings contain control characters so we need to handle that
// In JS, JSON.parse is more lenient with control chars in strings than Python's strict mode
let gradioData: any[];
try {
gradioData = JSON.parse(dataStr);
} catch {
// If direct parse fails, try replacing problematic control characters
const cleaned = dataStr.replace(/[\x00-\x1f]/g, (ch: string) => {
if (ch === "\n") return "\\n";
if (ch === "\r") return "\\r";
if (ch === "\t") return "\\t";
return "";
});
gradioData = JSON.parse(cleaned);
}
// Step 4: Download the JSON report file (structured data)
// gradioData[8] is the JSON file object with { url, path, ... }
const jsonFileObj = gradioData[8];
if (!jsonFileObj?.url) {
throw new Error("No JSON report generated");
}
// Download immediately (temp files expire quickly)
const jsonRes = await fetch(jsonFileObj.url);
if (!jsonRes.ok) throw new Error("Failed to download analysis JSON");
const analysisData = await jsonRes.json();
// Step 5: Transform to frontend format
const riskScore = analysisData.risk?.score ?? 0;
const grade = analysisData.risk?.grade ?? "A";
const totalClauses = analysisData.metadata?.total_clauses ?? 0;
const flaggedCount = analysisData.metadata?.flagged_clauses ?? 0;
// Group clauses by text (multiple labels per clause)
const clauseMap = new Map<string, any>();
for (const cr of (analysisData.clauses || [])) {
if (!clauseMap.has(cr.text)) {
clauseMap.set(cr.text, { text: cr.text, categories: [] });
}
clauseMap.get(cr.text)!.categories.push({
name: cr.label,
severity: cr.risk,
confidence: cr.confidence,
description: cr.description,
});
}
const results = Array.from(clauseMap.values());
// Parse redlines from HTML (gradioData[7])
const redlines: any[] = [];
const redlineHtml = typeof gradioData[7] === "string" ? gradioData[7] : "";
if (redlineHtml.includes("Clause Redlining")) {
// Split by redline card borders
const blocks = redlineHtml.split(/border-left:4px solid #/);
for (let i = 1; i < blocks.length; i++) {
const block = blocks[i];
const labelMatch = block.match(/font-weight:600[^>]*>([^<]+)<\/span>\s*<span[^>]*font-weight:600[^>]*>([^<]+)/);
const origMatch = block.match(/<del>([^<]*)<\/del>/);
const safeBlock = block.match(/Suggested Alternative[\s\S]*?<div[^>]*color:#166534[^>]*>([\s\S]*?)<\/div>/);
const legalMatch = block.match(/Legal Basis<\/div>\s*<div[^>]*>([^<]+)/);
const consumerMatch = block.match(/Consumer Standard<\/div>\s*<div[^>]*>([^<]+)/);
const isLLM = block.includes("LLM Refined");
if (labelMatch) {
redlines.push({
clause_label: labelMatch[1].trim(),
risk_level: labelMatch[2].trim(),
original_text: origMatch ? origMatch[1].trim() : "",
safe_alternative: safeBlock ? safeBlock[1].replace(/<[^>]+>/g, "").trim() : "",
legal_basis: legalMatch ? legalMatch[1].trim() : "",
consumer_standard: consumerMatch ? consumerMatch[1].trim() : "",
tier: isLLM ? "llm_refined" : "template",
});
}
}
}
const modelStatus = analysisData.metadata?.model || "";
return NextResponse.json({
risk_score: riskScore,
grade,
total_clauses: totalClauses,
flagged_count: flaggedCount,
results,
entities: analysisData.entities || [],
contradictions: analysisData.contradictions || [],
obligations: analysisData.obligations || [],
compliance: analysisData.compliance || {},
redlines,
model: modelStatus.includes("loaded") ? "ml" : "regex",
latency_ms: 0,
session_id: null,
});
} catch (error: any) {
console.error("Analyze error:", error.message);
return NextResponse.json(
{ error: "Analysis failed: " + (error.message || "Try again in 30 seconds.") },
{ status: 500 }
);
}
}