Spaces:
Sleeping
Sleeping
| import { NextRequest, NextResponse } from "next/server"; | |
| import { createClient } from "@/lib/supabase/server"; | |
| const GRADIO_URL = process.env.CLAUSEGUARD_GRADIO_URL || "https://gaurv007-clauseguard.hf.space"; | |
| // FIX v4.1: Max text size validation (prevent oversized payloads) | |
| const MAX_TEXT_LENGTH = 200_000; // 200KB | |
| export async function POST(req: NextRequest) { | |
| try { | |
| const supabase = await createClient(); | |
| const { data: { user } } = await supabase.auth.getUser(); | |
| if (!user) { | |
| return NextResponse.json({ error: "Unauthorized. Please log in to analyze texts." }, { status: 401 }); | |
| } | |
| const body = await req.json(); | |
| let { text } = body; | |
| if (!text || typeof text !== "string" || text.trim().length < 50) { | |
| return NextResponse.json( | |
| { error: "Please provide at least 50 characters of text to analyze." }, | |
| { status: 400 } | |
| ); | |
| } | |
| // FIX v4.1: Input size validation | |
| if (text.length > MAX_TEXT_LENGTH) { | |
| return NextResponse.json( | |
| { error: `Text too long (${(text.length / 1000).toFixed(0)}KB). Maximum is ${MAX_TEXT_LENGTH / 1000}KB.` }, | |
| { status: 400 } | |
| ); | |
| } | |
| // FIX v4.1: REMOVED the XSS sanitization that corrupted contract text. | |
| // The old code did: text = text.replace(/</g, "<").replace(/>/g, ">"); | |
| // This PERMANENTLY MUTATED the text before analysis, corrupting contracts | |
| // that contain < or > characters (e.g., "shall not exceed >$10,000"). | |
| // Sanitization should happen at RENDER TIME in the frontend, not at analysis time. | |
| // The frontend already uses React which auto-escapes HTML in JSX. | |
| // Check scan limits — FIX v4.1: query the CORRECT table name | |
| const { data: profile } = await supabase | |
| .from("profiles") | |
| .select("plan, role, analyses_this_month") | |
| .eq("id", user.id) | |
| .single(); | |
| const isAdmin = profile?.role === "admin"; | |
| const plan = profile?.plan || "free"; | |
| // FIX v4.1: Use analyses_this_month from profiles (already tracked), not a separate count query | |
| const scanCount = profile?.analyses_this_month ?? 0; | |
| const limit = isAdmin ? 999999 : plan === "free" ? 10 : 999999; | |
| if (scanCount >= limit) { | |
| return NextResponse.json({ error: "Monthly scan limit reached. Please upgrade to Pro." }, { status: 403 }); | |
| } | |
| // Step 1: Submit to Gradio Space | |
| const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/_analysis_and_index`, { | |
| method: "POST", | |
| headers: { "Content-Type": "application/json" }, | |
| body: JSON.stringify({ data: [text] }), | |
| }); | |
| if (!submitRes.ok) { | |
| throw new Error(`Gradio submit failed: ${submitRes.status}`); | |
| } | |
| const { event_id } = await submitRes.json(); | |
| if (!event_id) throw new Error("No event_id from Gradio"); | |
| // FIX v4.1: Improved SSE polling with proper streaming support | |
| // Uses exponential backoff instead of fixed 1s intervals | |
| let resultText = ""; | |
| let attempts = 0; | |
| const maxAttempts = 90; // 90 seconds max (increased from 60) | |
| let delay = 500; // Start at 500ms, increase | |
| while (attempts < maxAttempts) { | |
| const resultRes = await fetch( | |
| `${GRADIO_URL}/gradio_api/call/_analysis_and_index/${event_id}`, | |
| { headers: { Accept: "text/event-stream" } } | |
| ); | |
| resultText = await resultRes.text(); | |
| if (resultText.includes("event: complete")) break; | |
| if (resultText.includes("event: error")) { | |
| const errMatch = resultText.match(/data:\s*(.+)/); | |
| throw new Error(errMatch ? errMatch[1] : "Analysis failed in backend"); | |
| } | |
| await new Promise(r => setTimeout(r, delay)); | |
| delay = Math.min(delay * 1.2, 2000); // Cap at 2s | |
| attempts++; | |
| } | |
| if (!resultText.includes("event: complete")) { | |
| throw new Error("Analysis timed out. The backend may be loading models. Please try again in 30 seconds."); | |
| } | |
| // Step 3: Parse the SSE data | |
| const completeIdx = resultText.indexOf("event: complete"); | |
| const dataIdx = resultText.indexOf("data: ", completeIdx); | |
| if (dataIdx === -1) throw new Error("No data in response"); | |
| const dataStr = resultText.substring(dataIdx + 6).trim(); | |
| let gradioData: any[]; | |
| try { | |
| gradioData = JSON.parse(dataStr); | |
| } catch { | |
| const cleaned = dataStr.replace(/[\x00-\x1f]/g, (ch: string) => { | |
| if (ch === "\n") return "\\n"; | |
| if (ch === "\r") return "\\r"; | |
| if (ch === "\t") return "\\t"; | |
| return ""; | |
| }); | |
| gradioData = JSON.parse(cleaned); | |
| } | |
| // Step 4: Download the JSON report file (structured data) | |
| const jsonFileObj = gradioData[8]; | |
| if (!jsonFileObj?.url) { | |
| throw new Error("No JSON report generated"); | |
| } | |
| const jsonRes = await fetch(jsonFileObj.url); | |
| if (!jsonRes.ok) throw new Error("Failed to download analysis JSON"); | |
| const analysisData = await jsonRes.json(); | |
| // Step 5: Transform to frontend format | |
| const riskScore = analysisData.risk?.score ?? 0; | |
| const grade = analysisData.risk?.grade ?? "A"; | |
| const totalClauses = analysisData.metadata?.total_clauses ?? 0; | |
| const flaggedCount = analysisData.metadata?.flagged_clauses ?? 0; | |
| // Group clauses by text (multiple labels per clause) | |
| const clauseMap = new Map<string, any>(); | |
| for (const cr of (analysisData.clauses || [])) { | |
| if (!clauseMap.has(cr.text)) { | |
| clauseMap.set(cr.text, { text: cr.text, categories: [] }); | |
| } | |
| clauseMap.get(cr.text)!.categories.push({ | |
| name: cr.label, | |
| severity: cr.risk, | |
| confidence: cr.confidence, | |
| description: cr.description, | |
| }); | |
| } | |
| const results = Array.from(clauseMap.values()); | |
| // FIX v4.1: Parse redlines from structured JSON data instead of fragile HTML regex | |
| const redlines: any[] = []; | |
| // Try to extract redlines from the analysis JSON first (if available) | |
| if (analysisData.redlines && Array.isArray(analysisData.redlines)) { | |
| for (const rl of analysisData.redlines) { | |
| redlines.push({ | |
| clause_label: rl.clause_label || "", | |
| risk_level: rl.risk_level || "MEDIUM", | |
| original_text: rl.original_text || "", | |
| safe_alternative: rl.safe_alternative || "", | |
| template_alternative: rl.template_alternative || "", | |
| legal_basis: rl.legal_basis || "", | |
| consumer_standard: rl.consumer_standard || "", | |
| tier: rl.tier || "template", | |
| }); | |
| } | |
| } | |
| // Fallback: try parsing from HTML only if no structured data | |
| if (redlines.length === 0) { | |
| const redlineHtml = typeof gradioData[7] === "string" ? gradioData[7] : ""; | |
| if (redlineHtml.includes("Clause Redlining")) { | |
| const blocks = redlineHtml.split(/border-left:4px solid #/); | |
| for (let i = 1; i < blocks.length; i++) { | |
| const block = blocks[i]; | |
| const labelMatch = block.match(/font-weight:600[^>]*>([^<]+)<\/span>\s*<span[^>]*font-weight:600[^>]*>([^<]+)/); | |
| const origMatch = block.match(/<del>([^<]*)<\/del>/); | |
| const safeBlock = block.match(/Suggested Alternative[\s\S]*?<div[^>]*color:#166534[^>]*>([\s\S]*?)<\/div>/); | |
| const legalMatch = block.match(/Legal Basis<\/div>\s*<div[^>]*>([^<]+)/); | |
| const consumerMatch = block.match(/Consumer Standard<\/div>\s*<div[^>]*>([^<]+)/); | |
| const isLLM = block.includes("LLM Refined"); | |
| if (labelMatch) { | |
| redlines.push({ | |
| clause_label: labelMatch[1].trim(), | |
| risk_level: labelMatch[2].trim(), | |
| original_text: origMatch ? origMatch[1].trim() : "", | |
| safe_alternative: safeBlock ? safeBlock[1].replace(/<[^>]+>/g, "").trim() : "", | |
| legal_basis: legalMatch ? legalMatch[1].trim() : "", | |
| consumer_standard: consumerMatch ? consumerMatch[1].trim() : "", | |
| tier: isLLM ? "llm_refined" : "template", | |
| }); | |
| } | |
| } | |
| } | |
| } | |
| const modelStatus = analysisData.metadata?.model || ""; | |
| // FIX v4.1: Increment scan count in profiles table | |
| await supabase | |
| .from("profiles") | |
| .update({ analyses_this_month: scanCount + 1 }) | |
| .eq("id", user.id); | |
| return NextResponse.json({ | |
| risk_score: riskScore, | |
| grade, | |
| total_clauses: totalClauses, | |
| flagged_count: flaggedCount, | |
| results, | |
| entities: analysisData.entities || [], | |
| contradictions: analysisData.contradictions || [], | |
| obligations: analysisData.obligations || [], | |
| compliance: analysisData.compliance || {}, | |
| redlines, | |
| model: modelStatus.includes("loaded") ? "ml" : "regex", | |
| latency_ms: 0, | |
| session_id: null, | |
| }); | |
| } catch (error: any) { | |
| console.error("Analyze error:", error.message); | |
| return NextResponse.json( | |
| { error: "Analysis failed: " + (error.message || "Try again in 30 seconds.") }, | |
| { status: 500 } | |
| ); | |
| } | |
| } | |