Spaces:
Sleeping
Sleeping
File size: 6,469 Bytes
9548e93 510d822 9548e93 510d822 9548e93 74929b6 510d822 9548e93 510d822 9548e93 510d822 74929b6 510d822 74929b6 510d822 74929b6 510d822 74929b6 9548e93 74929b6 510d822 74929b6 510d822 74929b6 510d822 74929b6 510d822 74929b6 510d822 74929b6 510d822 74929b6 510d822 74929b6 510d822 74929b6 510d822 74929b6 510d822 74929b6 510d822 74929b6 510d822 4148ffc 9548e93 74929b6 9548e93 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 | import { NextRequest, NextResponse } from "next/server";
const GRADIO_URL = process.env.CLAUSEGUARD_GRADIO_URL || "https://gaurv007-clauseguard.hf.space";
export async function POST(req: NextRequest) {
try {
const body = await req.json();
const { text } = body;
if (!text || typeof text !== "string" || text.trim().length < 50) {
return NextResponse.json(
{ error: "Please provide at least 50 characters of text to analyze." },
{ status: 400 }
);
}
// Step 1: Submit to Gradio Space
const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/_analysis_and_index`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ data: [text] }),
});
if (!submitRes.ok) {
throw new Error(`Gradio submit failed: ${submitRes.status}`);
}
const { event_id } = await submitRes.json();
if (!event_id) throw new Error("No event_id from Gradio");
// Step 2: Poll for result (SSE)
// The Gradio API streams but we need the full response
let resultText = "";
let attempts = 0;
const maxAttempts = 60; // 60 seconds max
while (attempts < maxAttempts) {
const resultRes = await fetch(
`${GRADIO_URL}/gradio_api/call/_analysis_and_index/${event_id}`,
{ headers: { Accept: "text/event-stream" } }
);
resultText = await resultRes.text();
if (resultText.includes("event: complete")) break;
if (resultText.includes("event: error")) {
const errMatch = resultText.match(/data:\s*(.+)/);
throw new Error(errMatch ? errMatch[1] : "Analysis failed in backend");
}
// Wait 1 second and retry
await new Promise(r => setTimeout(r, 1000));
attempts++;
}
if (!resultText.includes("event: complete")) {
throw new Error("Analysis timed out");
}
// Step 3: Parse the SSE data
// Format: "event: complete\ndata: [...]"
// The data contains HTML with literal newlines, so we need to find 'data: ' after 'event: complete'
const completeIdx = resultText.indexOf("event: complete");
const dataIdx = resultText.indexOf("data: ", completeIdx);
if (dataIdx === -1) throw new Error("No data in response");
const dataStr = resultText.substring(dataIdx + 6).trim();
// Parse JSON — the HTML strings contain control characters so we need to handle that
// In JS, JSON.parse is more lenient with control chars in strings than Python's strict mode
let gradioData: any[];
try {
gradioData = JSON.parse(dataStr);
} catch {
// If direct parse fails, try replacing problematic control characters
const cleaned = dataStr.replace(/[\x00-\x1f]/g, (ch: string) => {
if (ch === "\n") return "\\n";
if (ch === "\r") return "\\r";
if (ch === "\t") return "\\t";
return "";
});
gradioData = JSON.parse(cleaned);
}
// Step 4: Download the JSON report file (structured data)
// gradioData[8] is the JSON file object with { url, path, ... }
const jsonFileObj = gradioData[8];
if (!jsonFileObj?.url) {
throw new Error("No JSON report generated");
}
// Download immediately (temp files expire quickly)
const jsonRes = await fetch(jsonFileObj.url);
if (!jsonRes.ok) throw new Error("Failed to download analysis JSON");
const analysisData = await jsonRes.json();
// Step 5: Transform to frontend format
const riskScore = analysisData.risk?.score ?? 0;
const grade = analysisData.risk?.grade ?? "A";
const totalClauses = analysisData.metadata?.total_clauses ?? 0;
const flaggedCount = analysisData.metadata?.flagged_clauses ?? 0;
// Group clauses by text (multiple labels per clause)
const clauseMap = new Map<string, any>();
for (const cr of (analysisData.clauses || [])) {
if (!clauseMap.has(cr.text)) {
clauseMap.set(cr.text, { text: cr.text, categories: [] });
}
clauseMap.get(cr.text)!.categories.push({
name: cr.label,
severity: cr.risk,
confidence: cr.confidence,
description: cr.description,
});
}
const results = Array.from(clauseMap.values());
// Parse redlines from HTML (gradioData[7])
const redlines: any[] = [];
const redlineHtml = typeof gradioData[7] === "string" ? gradioData[7] : "";
if (redlineHtml.includes("Clause Redlining")) {
// Split by redline card borders
const blocks = redlineHtml.split(/border-left:4px solid #/);
for (let i = 1; i < blocks.length; i++) {
const block = blocks[i];
const labelMatch = block.match(/font-weight:600[^>]*>([^<]+)<\/span>\s*<span[^>]*font-weight:600[^>]*>([^<]+)/);
const origMatch = block.match(/<del>([^<]*)<\/del>/);
const safeBlock = block.match(/Suggested Alternative[\s\S]*?<div[^>]*color:#166534[^>]*>([\s\S]*?)<\/div>/);
const legalMatch = block.match(/Legal Basis<\/div>\s*<div[^>]*>([^<]+)/);
const consumerMatch = block.match(/Consumer Standard<\/div>\s*<div[^>]*>([^<]+)/);
const isLLM = block.includes("LLM Refined");
if (labelMatch) {
redlines.push({
clause_label: labelMatch[1].trim(),
risk_level: labelMatch[2].trim(),
original_text: origMatch ? origMatch[1].trim() : "",
safe_alternative: safeBlock ? safeBlock[1].replace(/<[^>]+>/g, "").trim() : "",
legal_basis: legalMatch ? legalMatch[1].trim() : "",
consumer_standard: consumerMatch ? consumerMatch[1].trim() : "",
tier: isLLM ? "llm_refined" : "template",
});
}
}
}
const modelStatus = analysisData.metadata?.model || "";
return NextResponse.json({
risk_score: riskScore,
grade,
total_clauses: totalClauses,
flagged_count: flaggedCount,
results,
entities: analysisData.entities || [],
contradictions: analysisData.contradictions || [],
obligations: analysisData.obligations || [],
compliance: analysisData.compliance || {},
redlines,
model: modelStatus.includes("loaded") ? "ml" : "regex",
latency_ms: 0,
session_id: null,
});
} catch (error: any) {
console.error("Analyze error:", error.message);
return NextResponse.json(
{ error: "Analysis failed: " + (error.message || "Try again in 30 seconds.") },
{ status: 500 }
);
}
}
|