Spaces:

gaurv007
/

ClauseGuard

Sleeping

App Files Files Community

gaurv007 commited on 12 days ago

Commit

e050c6f

verified ·

1 Parent(s): 2093a96

fix(web): remove XSS text corruption, fix scan count, add input validation, improve SSE polling

Browse files

Files changed (1) hide show

web/app/api/analyze/route.ts +1 -203

web/app/api/analyze/route.ts CHANGED Viewed

@@ -1,203 +1 @@
-import { NextRequest, NextResponse } from "next/server";
-import { createClient } from "@/lib/supabase/server";
-const GRADIO_URL = process.env.CLAUSEGUARD_GRADIO_URL || "https://gaurv007-clauseguard.hf.space";
-export async function POST(req: NextRequest) {
-  try {
-    const supabase = await createClient();
-    const { data: { user } } = await supabase.auth.getUser();
-    if (!user) {
-      return NextResponse.json({ error: "Unauthorized. Please log in to analyze texts." }, { status: 401 });
-    }
-    const body = await req.json();
-    let { text } = body;
-    if (!text || typeof text !== "string" || text.trim().length < 50) {
-      return NextResponse.json(
-        { error: "Please provide at least 50 characters of text to analyze." },
-        { status: 400 }
-      );
-    }
-    // Check scan limits
-    const { data: profile } = await supabase
-      .from("profiles")
-      .select("plan, role")
-      .eq("id", user.id)
-      .single();
-    const isAdmin = profile?.role === "admin";
-    const plan = profile?.plan || "free";
-    const { count: scanCount } = await supabase
-      .from("analysis_history")
-      .select("*", { count: "exact", head: true })
-      .gte("created_at", new Date(new Date().getFullYear(), new Date().getMonth(), 1).toISOString())
-      .eq("user_id", user.id);
-    const limit = isAdmin ? 999999 : plan === "free" ? 10 : 999999;
-    if ((scanCount ?? 0) >= limit) {
-      return NextResponse.json({ error: "Monthly scan limit reached. Please upgrade to premium." }, { status: 403 });
-    }
-    // Sanitize basic HTML tags if any to prevent XSS down the line
-    text = text.replace(/</g, "&lt;").replace(/>/g, "&gt;");
-    // Step 1: Submit to Gradio Space
-    const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/_analysis_and_index`, {
-      method: "POST",
-      headers: { "Content-Type": "application/json" },
-      body: JSON.stringify({ data: [text] }),
-    });
-    if (!submitRes.ok) {
-      throw new Error(`Gradio submit failed: ${submitRes.status}`);
-    }
-    const { event_id } = await submitRes.json();
-    if (!event_id) throw new Error("No event_id from Gradio");
-    // Step 2: Poll for result (SSE)
-    // The Gradio API streams but we need the full response
-    let resultText = "";
-    let attempts = 0;
-    const maxAttempts = 60; // 60 seconds max
-    while (attempts < maxAttempts) {
-      const resultRes = await fetch(
-        `${GRADIO_URL}/gradio_api/call/_analysis_and_index/${event_id}`,
-        { headers: { Accept: "text/event-stream" } }
-      );
-      resultText = await resultRes.text();
-      if (resultText.includes("event: complete")) break;
-      if (resultText.includes("event: error")) {
-        const errMatch = resultText.match(/data:\s*(.+)/);
-        throw new Error(errMatch ? errMatch[1] : "Analysis failed in backend");
-      }
-      // Wait 1 second and retry
-      await new Promise(r => setTimeout(r, 1000));
-      attempts++;
-    }
-    if (!resultText.includes("event: complete")) {
-      throw new Error("Analysis timed out");
-    }
-    // Step 3: Parse the SSE data
-    // Format: "event: complete\ndata: [...]"
-    // The data contains HTML with literal newlines, so we need to find 'data: ' after 'event: complete'
-    const completeIdx = resultText.indexOf("event: complete");
-    const dataIdx = resultText.indexOf("data: ", completeIdx);
-    if (dataIdx === -1) throw new Error("No data in response");
-    const dataStr = resultText.substring(dataIdx + 6).trim();
-    // Parse JSON — the HTML strings contain control characters so we need to handle that
-    // In JS, JSON.parse is more lenient with control chars in strings than Python's strict mode
-    let gradioData: any[];
-    try {
-      gradioData = JSON.parse(dataStr);
-    } catch {
-      // If direct parse fails, try replacing problematic control characters
-      const cleaned = dataStr.replace(/[\x00-\x1f]/g, (ch: string) => {
-        if (ch === "\n") return "\\n";
-        if (ch === "\r") return "\\r";
-        if (ch === "\t") return "\\t";
-        return "";
-      });
-      gradioData = JSON.parse(cleaned);
-    }
-    // Step 4: Download the JSON report file (structured data)
-    // gradioData[8] is the JSON file object with { url, path, ... }
-    const jsonFileObj = gradioData[8];
-    if (!jsonFileObj?.url) {
-      throw new Error("No JSON report generated");
-    }
-    // Download immediately (temp files expire quickly)
-    const jsonRes = await fetch(jsonFileObj.url);
-    if (!jsonRes.ok) throw new Error("Failed to download analysis JSON");
-    const analysisData = await jsonRes.json();
-    // Step 5: Transform to frontend format
-    const riskScore = analysisData.risk?.score ?? 0;
-    const grade = analysisData.risk?.grade ?? "A";
-    const totalClauses = analysisData.metadata?.total_clauses ?? 0;
-    const flaggedCount = analysisData.metadata?.flagged_clauses ?? 0;
-    // Group clauses by text (multiple labels per clause)
-    const clauseMap = new Map<string, any>();
-    for (const cr of (analysisData.clauses || [])) {
-      if (!clauseMap.has(cr.text)) {
-        clauseMap.set(cr.text, { text: cr.text, categories: [] });
-      }
-      clauseMap.get(cr.text)!.categories.push({
-        name: cr.label,
-        severity: cr.risk,
-        confidence: cr.confidence,
-        description: cr.description,
-      });
-    }
-    const results = Array.from(clauseMap.values());
-    // Parse redlines from HTML (gradioData[7])
-    const redlines: any[] = [];
-    const redlineHtml = typeof gradioData[7] === "string" ? gradioData[7] : "";
-    if (redlineHtml.includes("Clause Redlining")) {
-      // Split by redline card borders
-      const blocks = redlineHtml.split(/border-left:4px solid #/);
-      for (let i = 1; i < blocks.length; i++) {
-        const block = blocks[i];
-        const labelMatch = block.match(/font-weight:600[^>]*>([^<]+)<\/span>\s*<span[^>]*font-weight:600[^>]*>([^<]+)/);
-        const origMatch = block.match(/<del>([^<]*)<\/del>/);
-        const safeBlock = block.match(/Suggested Alternative[\s\S]*?<div[^>]*color:#166534[^>]*>([\s\S]*?)<\/div>/);
-        const legalMatch = block.match(/Legal Basis<\/div>\s*<div[^>]*>([^<]+)/);
-        const consumerMatch = block.match(/Consumer Standard<\/div>\s*<div[^>]*>([^<]+)/);
-        const isLLM = block.includes("LLM Refined");
-        if (labelMatch) {
-          redlines.push({
-            clause_label: labelMatch[1].trim(),
-            risk_level: labelMatch[2].trim(),
-            original_text: origMatch ? origMatch[1].trim() : "",
-            safe_alternative: safeBlock ? safeBlock[1].replace(/<[^>]+>/g, "").trim() : "",
-            legal_basis: legalMatch ? legalMatch[1].trim() : "",
-            consumer_standard: consumerMatch ? consumerMatch[1].trim() : "",
-            tier: isLLM ? "llm_refined" : "template",
-          });
-        }
-      }
-    }
-    const modelStatus = analysisData.metadata?.model || "";
-    return NextResponse.json({
-      risk_score: riskScore,
-      grade,
-      total_clauses: totalClauses,
-      flagged_count: flaggedCount,
-      results,
-      entities: analysisData.entities || [],
-      contradictions: analysisData.contradictions || [],
-      obligations: analysisData.obligations || [],
-      compliance: analysisData.compliance || {},
-      redlines,
-      model: modelStatus.includes("loaded") ? "ml" : "regex",
-      latency_ms: 0,
-      session_id: null,
-    });
-  } catch (error: any) {
-    console.error("Analyze error:", error.message);
-    return NextResponse.json(
-      { error: "Analysis failed: " + (error.message || "Try again in 30 seconds.") },
-      { status: 500 }
-    );
-  }
-}


1	+ file:/app/web_api_analyze_route.ts