import { NextRequest, NextResponse } from "next/server"; import { createClient } from "@/lib/supabase/server"; const GRADIO_URL = process.env.CLAUSEGUARD_GRADIO_URL || "https://gaurv007-clauseguard.hf.space"; // FIX v4.1: Max text size validation (prevent oversized payloads) const MAX_TEXT_LENGTH = 200_000; // 200KB export async function POST(req: NextRequest) { try { const supabase = await createClient(); const { data: { user } } = await supabase.auth.getUser(); if (!user) { return NextResponse.json({ error: "Unauthorized. Please log in to analyze texts." }, { status: 401 }); } const body = await req.json(); let { text } = body; if (!text || typeof text !== "string" || text.trim().length < 50) { return NextResponse.json( { error: "Please provide at least 50 characters of text to analyze." }, { status: 400 } ); } // FIX v4.1: Input size validation if (text.length > MAX_TEXT_LENGTH) { return NextResponse.json( { error: `Text too long (${(text.length / 1000).toFixed(0)}KB). Maximum is ${MAX_TEXT_LENGTH / 1000}KB.` }, { status: 400 } ); } // FIX v4.1: REMOVED the XSS sanitization that corrupted contract text. // The old code did: text = text.replace(//g, ">"); // This PERMANENTLY MUTATED the text before analysis, corrupting contracts // that contain < or > characters (e.g., "shall not exceed >$10,000"). // Sanitization should happen at RENDER TIME in the frontend, not at analysis time. // The frontend already uses React which auto-escapes HTML in JSX. // Check scan limits — FIX v4.1: query the CORRECT table name const { data: profile } = await supabase .from("profiles") .select("plan, role, analyses_this_month") .eq("id", user.id) .single(); const isAdmin = profile?.role === "admin"; const plan = profile?.plan || "free"; // FIX v4.1: Use analyses_this_month from profiles (already tracked), not a separate count query const scanCount = profile?.analyses_this_month ?? 0; const limit = isAdmin ? 999999 : plan === "free" ? 10 : 999999; if (scanCount >= limit) { return NextResponse.json({ error: "Monthly scan limit reached. Please upgrade to Pro." }, { status: 403 }); } // Step 1: Submit to Gradio Space // FIX v4.3: Use the explicit api_name="analyze" set in app.py scan_btn.click() const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/analyze`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ data: [text] }), }); if (!submitRes.ok) { throw new Error(`Gradio submit failed: ${submitRes.status}`); } const { event_id } = await submitRes.json(); if (!event_id) throw new Error("No event_id from Gradio"); // FIX v4.1: Improved SSE polling with proper streaming support // Uses exponential backoff instead of fixed 1s intervals let resultText = ""; let attempts = 0; const maxAttempts = 90; // 90 seconds max (increased from 60) let delay = 500; // Start at 500ms, increase while (attempts < maxAttempts) { const resultRes = await fetch( `${GRADIO_URL}/gradio_api/call/analyze/${event_id}`, { headers: { Accept: "text/event-stream" } } ); resultText = await resultRes.text(); if (resultText.includes("event: complete")) break; if (resultText.includes("event: error")) { const errMatch = resultText.match(/data:\s*(.+)/); throw new Error(errMatch ? errMatch[1] : "Analysis failed in backend"); } await new Promise(r => setTimeout(r, delay)); delay = Math.min(delay * 1.2, 2000); // Cap at 2s attempts++; } if (!resultText.includes("event: complete")) { throw new Error("Analysis timed out. The backend may be loading models. Please try again in 30 seconds."); } // Step 3: Parse the SSE data const completeIdx = resultText.indexOf("event: complete"); const dataIdx = resultText.indexOf("data: ", completeIdx); if (dataIdx === -1) throw new Error("No data in response"); const dataStr = resultText.substring(dataIdx + 6).trim(); let gradioData: any[]; try { gradioData = JSON.parse(dataStr); } catch { const cleaned = dataStr.replace(/[\x00-\x1f]/g, (ch: string) => { if (ch === "\n") return "\\n"; if (ch === "\r") return "\\r"; if (ch === "\t") return "\\t"; return ""; }); gradioData = JSON.parse(cleaned); } // Step 4: Download the JSON report file (structured data) const jsonFileObj = gradioData[8]; if (!jsonFileObj?.url) { throw new Error("No JSON report generated"); } const jsonRes = await fetch(jsonFileObj.url); if (!jsonRes.ok) throw new Error("Failed to download analysis JSON"); const analysisData = await jsonRes.json(); // Step 5: Transform to frontend format const riskScore = analysisData.risk?.score ?? 0; const grade = analysisData.risk?.grade ?? "A"; const totalClauses = analysisData.metadata?.total_clauses ?? 0; const flaggedCount = analysisData.metadata?.flagged_clauses ?? 0; // Group clauses by text (multiple labels per clause) const clauseMap = new Map(); for (const cr of (analysisData.clauses || [])) { if (!clauseMap.has(cr.text)) { clauseMap.set(cr.text, { text: cr.text, categories: [] }); } clauseMap.get(cr.text)!.categories.push({ name: cr.label, severity: cr.risk, confidence: cr.confidence, description: cr.description, }); } const results = Array.from(clauseMap.values()); // FIX v4.1: Parse redlines from structured JSON data instead of fragile HTML regex const redlines: any[] = []; // Try to extract redlines from the analysis JSON first (if available) if (analysisData.redlines && Array.isArray(analysisData.redlines)) { for (const rl of analysisData.redlines) { redlines.push({ clause_label: rl.clause_label || "", risk_level: rl.risk_level || "MEDIUM", original_text: rl.original_text || "", safe_alternative: rl.safe_alternative || "", template_alternative: rl.template_alternative || "", legal_basis: rl.legal_basis || "", consumer_standard: rl.consumer_standard || "", tier: rl.tier || "template", }); } } // Fallback: try parsing from HTML only if no structured data if (redlines.length === 0) { const redlineHtml = typeof gradioData[7] === "string" ? gradioData[7] : ""; if (redlineHtml.includes("Clause Redlining")) { const blocks = redlineHtml.split(/border-left:4px solid #/); for (let i = 1; i < blocks.length; i++) { const block = blocks[i]; const labelMatch = block.match(/font-weight:600[^>]*>([^<]+)<\/span>\s*]*font-weight:600[^>]*>([^<]+)/); const origMatch = block.match(/([^<]*)<\/del>/); const safeBlock = block.match(/Suggested Alternative[\s\S]*?]*color:#166534[^>]*>([\s\S]*?)<\/div>/); const legalMatch = block.match(/Legal Basis<\/div>\s*]*>([^<]+)/); const consumerMatch = block.match(/Consumer Standard<\/div>\s*]*>([^<]+)/); const isLLM = block.includes("LLM Refined"); if (labelMatch) { redlines.push({ clause_label: labelMatch[1].trim(), risk_level: labelMatch[2].trim(), original_text: origMatch ? origMatch[1].trim() : "", safe_alternative: safeBlock ? safeBlock[1].replace(/<[^>]+>/g, "").trim() : "", legal_basis: legalMatch ? legalMatch[1].trim() : "", consumer_standard: consumerMatch ? consumerMatch[1].trim() : "", tier: isLLM ? "llm_refined" : "template", }); } } } } const modelStatus = analysisData.metadata?.model || ""; // FIX v4.1: Increment scan count in profiles table await supabase .from("profiles") .update({ analyses_this_month: scanCount + 1 }) .eq("id", user.id); // FIX v4.3: Save analysis to DB so it shows in history // Wrapped in Promise.resolve() because Supabase returns PromiseLike (no .catch) Promise.resolve( supabase.from("analyses").insert({ user_id: user.id, total_clauses: totalClauses, flagged_count: flaggedCount, risk_score: riskScore, grade, clauses: results, entities: analysisData.entities || [], contradictions: analysisData.contradictions || [], obligations: analysisData.obligations || [], compliance: analysisData.compliance || {}, model: modelStatus.includes("loaded") ? "ml" : "regex", }) ).catch(() => {}); // fire-and-forget, don't block response return NextResponse.json({ risk_score: riskScore, grade, total_clauses: totalClauses, flagged_count: flaggedCount, results, entities: analysisData.entities || [], contradictions: analysisData.contradictions || [], obligations: analysisData.obligations || [], compliance: analysisData.compliance || {}, redlines, model: modelStatus.includes("loaded") ? "ml" : "regex", latency_ms: 0, session_id: null, }); } catch (error: any) { console.error("Analyze error:", error.message); return NextResponse.json( { error: "Analysis failed: " + (error.message || "Try again in 30 seconds.") }, { status: 500 } ); } }