Spaces:

gaurv007
/

ClauseGuard

Sleeping

App Files Files Community

gaurv007 commited on 15 days ago

Commit

74929b6

verified ·

1 Parent(s): d339e38

v4.0: Fix analyze route — proper SSE parsing + immediate JSON file download

Browse files

Files changed (1) hide show

web/app/api/analyze/route.ts +103 -89

web/app/api/analyze/route.ts CHANGED Viewed

@@ -14,7 +14,7 @@ export async function POST(req: NextRequest) {
       );
     }
-    // Step 1: Submit analysis to Gradio Space API
     const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/_analysis_and_index`, {
       method: "POST",
       headers: { "Content-Type": "application/json" },
@@ -28,129 +28,143 @@ export async function POST(req: NextRequest) {
     const { event_id } = await submitRes.json();
     if (!event_id) throw new Error("No event_id from Gradio");
-    // Step 2: Poll for result (SSE endpoint)
-    const resultRes = await fetch(
-      `${GRADIO_URL}/gradio_api/call/_analysis_and_index/${event_id}`,
-      { headers: { Accept: "text/event-stream" } }
-    );
-    if (!resultRes.ok) {
-      throw new Error(`Gradio result failed: ${resultRes.status}`);
     }
-    const resultText = await resultRes.text();
-    // Parse SSE — find the "data:" line after "event: complete"
-    const dataMatch = resultText.match(/event:\s*complete\s*\ndata:\s*(.+)/);
-    if (!dataMatch) throw new Error("No complete event from Gradio");
-    const gradioData = JSON.parse(dataMatch[1]);
-    // gradioData is an array:
-    // [0] = summary HTML
-    // [1] = clauses HTML
-    // [2] = entities HTML
-    // [3] = contradictions HTML
-    // [4] = document HTML
-    // [5] = obligations HTML
-    // [6] = compliance HTML
-    // [7] = redlining HTML
-    // [8] = JSON file object {path, url, ...}
-    // [9] = CSV file object
-    // [10] = status string
-    // [11] = analysis result dict (or null — it's gr.State, not directly in API output)
-    // ... (chunks, embeddings, chatbot status — also gr.State, not in output)
-    // Step 3: Download the JSON report file which has structured data
     const jsonFileObj = gradioData[8];
     if (!jsonFileObj?.url) {
-      throw new Error("No JSON report in response");
     }
     const jsonRes = await fetch(jsonFileObj.url);
-    if (!jsonRes.ok) throw new Error("Failed to download JSON report");
     const analysisData = await jsonRes.json();
-    // Step 4: Transform to the format the frontend expects
-    const clauseResults: any[] = [];
     for (const cr of (analysisData.clauses || [])) {
-      // Find or create the clause entry
-      let existing = clauseResults.find(c => c.text === cr.text);
-      if (!existing) {
-        existing = { text: cr.text, categories: [] };
-        clauseResults.push(existing);
       }
-      existing.categories.push({
         name: cr.label,
         severity: cr.risk,
         confidence: cr.confidence,
         description: cr.description,
       });
     }
-    // Extract redlines from the analysis (parse from HTML if needed)
-    // Since redlines aren't in the JSON report, parse from the HTML
     const redlines: any[] = [];
-    const redlineHtml = gradioData[7] || "";
-    // Simple regex extraction from redlining HTML
-    const redlineBlocks = redlineHtml.split('border-left:4px solid');
-    for (let i = 1; i < redlineBlocks.length; i++) {
-      const block = redlineBlocks[i];
-      const labelMatch = block.match(/font-weight:600[^>]*>([^<]+)<\/span>\s*<span[^>]*>([^<]+)/);
-      const originalMatch = block.match(/<del>([^<]*)<\/del>/);
-      const safeMatch = block.match(/Suggested Alternative<\/div>\s*<div[^>]*>([^<]*(?:<[^/][^>]*>[^<]*)*)/);
-      const legalMatch = block.match(/Legal Basis<\/div>\s*<div[^>]*>([^<]+)/);
-      const consumerMatch = block.match(/Consumer Standard<\/div>\s*<div[^>]*>([^<]+)/);
-      const tierMatch = block.match(/(LLM Refined|Template)/);
-      if (labelMatch) {
-        // Clean HTML from safe alternative
-        let safeText = safeMatch ? safeMatch[1].replace(/<[^>]+>/g, '').trim() : '';
-        redlines.push({
-          clause_label: labelMatch[1].trim(),
-          risk_level: labelMatch[2].trim(),
-          original_text: originalMatch ? originalMatch[1].trim() : '',
-          safe_alternative: safeText,
-          legal_basis: legalMatch ? legalMatch[1].trim() : '',
-          consumer_standard: consumerMatch ? consumerMatch[1].trim() : '',
-          tier: tierMatch ? (tierMatch[1] === 'LLM Refined' ? 'llm_refined' : 'template') : 'template',
-        });
       }
     }
-    // Parse risk score and grade from summary HTML
-    const scoreMatch = (gradioData[0] || '').match(/font-size:48px[^>]*>(\d+)/);
-    const gradeMatch = (gradioData[0] || '').match(/Grade\s+([A-F])/);
-    const totalMatch = (gradioData[0] || '').match(/(\d+)\s+clauses\s+analyzed/);
-    const flaggedMatch = (gradioData[0] || '').match(/(\d+)\s+flagged/);
-    // Parse severity counts from summary HTML
-    const critMatch = (gradioData[0] || '').match(/color:#dc2626[^>]*>(\d+)<\/div>\s*<div[^>]*>Critical/);
-    const highMatch2 = (gradioData[0] || '').match(/color:#ea580c[^>]*>(\d+)<\/div>\s*<div[^>]*>High/);
-    const medMatch = (gradioData[0] || '').match(/color:#ca8a04[^>]*>(\d+)<\/div>\s*<div[^>]*>Medium/);
-    const lowMatch = (gradioData[0] || '').match(/color:#16a34a[^>]*>(\d+)<\/div>\s*<div[^>]*>Low/);
-    const riskScore = scoreMatch ? parseInt(scoreMatch[1]) : 0;
-    const grade = gradeMatch ? gradeMatch[1] : 'A';
     return NextResponse.json({
       risk_score: riskScore,
-      grade: grade,
-      total_clauses: totalMatch ? parseInt(totalMatch[1]) : clauseResults.length,
-      flagged_count: flaggedMatch ? parseInt(flaggedMatch[1]) : clauseResults.length,
-      results: clauseResults,
       entities: analysisData.entities || [],
       contradictions: analysisData.contradictions || [],
       obligations: analysisData.obligations || [],
       compliance: analysisData.compliance || {},
-      redlines: redlines,
-      model: analysisData.metadata?.model?.includes("loaded") ? "ml" : "regex",
       latency_ms: 0,
-      session_id: null, // Gradio API doesn't expose gr.State
     });
   } catch (error: any) {
     console.error("Analyze error:", error.message);
     return NextResponse.json(
-      { error: "Analysis failed: " + (error.message || "Unknown error. The backend may be starting up — try again in 30 seconds.") },
       { status: 500 }
     );
   }

       );
     }
+    // Step 1: Submit to Gradio Space
     const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/_analysis_and_index`, {
       method: "POST",
       headers: { "Content-Type": "application/json" },
     const { event_id } = await submitRes.json();
     if (!event_id) throw new Error("No event_id from Gradio");
+    // Step 2: Poll for result (SSE)
+    // The Gradio API streams but we need the full response
+    let resultText = "";
+    let attempts = 0;
+    const maxAttempts = 60; // 60 seconds max
+    while (attempts < maxAttempts) {
+      const resultRes = await fetch(
+        `${GRADIO_URL}/gradio_api/call/_analysis_and_index/${event_id}`,
+        { headers: { Accept: "text/event-stream" } }
+      );
+      resultText = await resultRes.text();
+      if (resultText.includes("event: complete")) break;
+      if (resultText.includes("event: error")) {
+        const errMatch = resultText.match(/data:\s*(.+)/);
+        throw new Error(errMatch ? errMatch[1] : "Analysis failed in backend");
+      }
+      // Wait 1 second and retry
+      await new Promise(r => setTimeout(r, 1000));
+      attempts++;
+    }
+    if (!resultText.includes("event: complete")) {
+      throw new Error("Analysis timed out");
+    }
+    // Step 3: Parse the SSE data
+    // Format: "event: complete\ndata: [...]"
+    // The data contains HTML with literal newlines, so we need to find 'data: ' after 'event: complete'
+    const completeIdx = resultText.indexOf("event: complete");
+    const dataIdx = resultText.indexOf("data: ", completeIdx);
+    if (dataIdx === -1) throw new Error("No data in response");
+    const dataStr = resultText.substring(dataIdx + 6).trim();
+    // Parse JSON — the HTML strings contain control characters so we need to handle that
+    // In JS, JSON.parse is more lenient with control chars in strings than Python's strict mode
+    let gradioData: any[];
+    try {
+      gradioData = JSON.parse(dataStr);
+    } catch {
+      // If direct parse fails, try replacing problematic control characters
+      const cleaned = dataStr.replace(/[\x00-\x1f]/g, (ch: string) => {
+        if (ch === "\n") return "\\n";
+        if (ch === "\r") return "\\r";
+        if (ch === "\t") return "\\t";
+        return "";
+      });
+      gradioData = JSON.parse(cleaned);
     }
+    // Step 4: Download the JSON report file (structured data)
+    // gradioData[8] is the JSON file object with { url, path, ... }
     const jsonFileObj = gradioData[8];
     if (!jsonFileObj?.url) {
+      throw new Error("No JSON report generated");
     }
+    // Download immediately (temp files expire quickly)
     const jsonRes = await fetch(jsonFileObj.url);
+    if (!jsonRes.ok) throw new Error("Failed to download analysis JSON");
     const analysisData = await jsonRes.json();
+    // Step 5: Transform to frontend format
+    const riskScore = analysisData.risk?.score ?? 0;
+    const grade = analysisData.risk?.grade ?? "A";
+    const totalClauses = analysisData.metadata?.total_clauses ?? 0;
+    const flaggedCount = analysisData.metadata?.flagged_clauses ?? 0;
+    // Group clauses by text (multiple labels per clause)
+    const clauseMap = new Map<string, any>();
     for (const cr of (analysisData.clauses || [])) {
+      if (!clauseMap.has(cr.text)) {
+        clauseMap.set(cr.text, { text: cr.text, categories: [] });
       }
+      clauseMap.get(cr.text)!.categories.push({
         name: cr.label,
         severity: cr.risk,
         confidence: cr.confidence,
         description: cr.description,
       });
     }
+    const results = Array.from(clauseMap.values());
+    // Parse redlines from HTML (gradioData[7])
     const redlines: any[] = [];
+    const redlineHtml = typeof gradioData[7] === "string" ? gradioData[7] : "";
+    if (redlineHtml.includes("Clause Redlining")) {
+      // Split by redline card borders
+      const blocks = redlineHtml.split(/border-left:4px solid #/);
+      for (let i = 1; i < blocks.length; i++) {
+        const block = blocks[i];
+        const labelMatch = block.match(/font-weight:600[^>]*>([^<]+)<\/span>\s*<span[^>]*font-weight:600[^>]*>([^<]+)/);
+        const origMatch = block.match(/<del>([^<]*)<\/del>/);
+        const safeBlock = block.match(/Suggested Alternative[\s\S]*?<div[^>]*color:#166534[^>]*>([\s\S]*?)<\/div>/);
+        const legalMatch = block.match(/Legal Basis<\/div>\s*<div[^>]*>([^<]+)/);
+        const consumerMatch = block.match(/Consumer Standard<\/div>\s*<div[^>]*>([^<]+)/);
+        const isLLM = block.includes("LLM Refined");
+        if (labelMatch) {
+          redlines.push({
+            clause_label: labelMatch[1].trim(),
+            risk_level: labelMatch[2].trim(),
+            original_text: origMatch ? origMatch[1].trim() : "",
+            safe_alternative: safeBlock ? safeBlock[1].replace(/<[^>]+>/g, "").trim() : "",
+            legal_basis: legalMatch ? legalMatch[1].trim() : "",
+            consumer_standard: consumerMatch ? consumerMatch[1].trim() : "",
+            tier: isLLM ? "llm_refined" : "template",
+          });
+        }
       }
     }
+    const modelStatus = analysisData.metadata?.model || "";
     return NextResponse.json({
       risk_score: riskScore,
+      grade,
+      total_clauses: totalClauses,
+      flagged_count: flaggedCount,
+      results,
       entities: analysisData.entities || [],
       contradictions: analysisData.contradictions || [],
       obligations: analysisData.obligations || [],
       compliance: analysisData.compliance || {},
+      redlines,
+      model: modelStatus.includes("loaded") ? "ml" : "regex",
       latency_ms: 0,
+      session_id: null,
     });
   } catch (error: any) {
     console.error("Analyze error:", error.message);
     return NextResponse.json(
+      { error: "Analysis failed: " + (error.message || "Try again in 30 seconds.") },
       { status: 500 }
     );
   }