File size: 6,469 Bytes
9548e93
 
510d822
9548e93
 
 
 
510d822
9548e93
 
 
 
 
 
 
 
74929b6
510d822
9548e93
510d822
 
9548e93
 
510d822
 
 
 
 
 
 
74929b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
510d822
74929b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
510d822
 
74929b6
 
510d822
 
74929b6
9548e93
 
74929b6
510d822
74929b6
510d822
 
74929b6
 
 
 
 
 
 
 
510d822
74929b6
 
510d822
74929b6
510d822
 
 
 
 
 
74929b6
510d822
74929b6
510d822
74929b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
510d822
 
 
74929b6
510d822
 
 
74929b6
 
 
 
510d822
 
 
 
74929b6
 
510d822
74929b6
510d822
4148ffc
 
9548e93
74929b6
9548e93
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import { NextRequest, NextResponse } from "next/server";

const GRADIO_URL = process.env.CLAUSEGUARD_GRADIO_URL || "https://gaurv007-clauseguard.hf.space";

export async function POST(req: NextRequest) {
  try {
    const body = await req.json();
    const { text } = body;

    if (!text || typeof text !== "string" || text.trim().length < 50) {
      return NextResponse.json(
        { error: "Please provide at least 50 characters of text to analyze." },
        { status: 400 }
      );
    }

    // Step 1: Submit to Gradio Space
    const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/_analysis_and_index`, {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ data: [text] }),
    });

    if (!submitRes.ok) {
      throw new Error(`Gradio submit failed: ${submitRes.status}`);
    }

    const { event_id } = await submitRes.json();
    if (!event_id) throw new Error("No event_id from Gradio");

    // Step 2: Poll for result (SSE)
    // The Gradio API streams but we need the full response
    let resultText = "";
    let attempts = 0;
    const maxAttempts = 60; // 60 seconds max

    while (attempts < maxAttempts) {
      const resultRes = await fetch(
        `${GRADIO_URL}/gradio_api/call/_analysis_and_index/${event_id}`,
        { headers: { Accept: "text/event-stream" } }
      );

      resultText = await resultRes.text();

      if (resultText.includes("event: complete")) break;
      if (resultText.includes("event: error")) {
        const errMatch = resultText.match(/data:\s*(.+)/);
        throw new Error(errMatch ? errMatch[1] : "Analysis failed in backend");
      }

      // Wait 1 second and retry
      await new Promise(r => setTimeout(r, 1000));
      attempts++;
    }

    if (!resultText.includes("event: complete")) {
      throw new Error("Analysis timed out");
    }

    // Step 3: Parse the SSE data
    // Format: "event: complete\ndata: [...]"
    // The data contains HTML with literal newlines, so we need to find 'data: ' after 'event: complete'
    const completeIdx = resultText.indexOf("event: complete");
    const dataIdx = resultText.indexOf("data: ", completeIdx);
    if (dataIdx === -1) throw new Error("No data in response");

    const dataStr = resultText.substring(dataIdx + 6).trim();

    // Parse JSON — the HTML strings contain control characters so we need to handle that
    // In JS, JSON.parse is more lenient with control chars in strings than Python's strict mode
    let gradioData: any[];
    try {
      gradioData = JSON.parse(dataStr);
    } catch {
      // If direct parse fails, try replacing problematic control characters
      const cleaned = dataStr.replace(/[\x00-\x1f]/g, (ch: string) => {
        if (ch === "\n") return "\\n";
        if (ch === "\r") return "\\r";
        if (ch === "\t") return "\\t";
        return "";
      });
      gradioData = JSON.parse(cleaned);
    }

    // Step 4: Download the JSON report file (structured data)
    // gradioData[8] is the JSON file object with { url, path, ... }
    const jsonFileObj = gradioData[8];
    if (!jsonFileObj?.url) {
      throw new Error("No JSON report generated");
    }

    // Download immediately (temp files expire quickly)
    const jsonRes = await fetch(jsonFileObj.url);
    if (!jsonRes.ok) throw new Error("Failed to download analysis JSON");
    const analysisData = await jsonRes.json();

    // Step 5: Transform to frontend format
    const riskScore = analysisData.risk?.score ?? 0;
    const grade = analysisData.risk?.grade ?? "A";
    const totalClauses = analysisData.metadata?.total_clauses ?? 0;
    const flaggedCount = analysisData.metadata?.flagged_clauses ?? 0;

    // Group clauses by text (multiple labels per clause)
    const clauseMap = new Map<string, any>();
    for (const cr of (analysisData.clauses || [])) {
      if (!clauseMap.has(cr.text)) {
        clauseMap.set(cr.text, { text: cr.text, categories: [] });
      }
      clauseMap.get(cr.text)!.categories.push({
        name: cr.label,
        severity: cr.risk,
        confidence: cr.confidence,
        description: cr.description,
      });
    }
    const results = Array.from(clauseMap.values());

    // Parse redlines from HTML (gradioData[7])
    const redlines: any[] = [];
    const redlineHtml = typeof gradioData[7] === "string" ? gradioData[7] : "";
    if (redlineHtml.includes("Clause Redlining")) {
      // Split by redline card borders
      const blocks = redlineHtml.split(/border-left:4px solid #/);
      for (let i = 1; i < blocks.length; i++) {
        const block = blocks[i];
        const labelMatch = block.match(/font-weight:600[^>]*>([^<]+)<\/span>\s*<span[^>]*font-weight:600[^>]*>([^<]+)/);
        const origMatch = block.match(/<del>([^<]*)<\/del>/);
        const safeBlock = block.match(/Suggested Alternative[\s\S]*?<div[^>]*color:#166534[^>]*>([\s\S]*?)<\/div>/);
        const legalMatch = block.match(/Legal Basis<\/div>\s*<div[^>]*>([^<]+)/);
        const consumerMatch = block.match(/Consumer Standard<\/div>\s*<div[^>]*>([^<]+)/);
        const isLLM = block.includes("LLM Refined");

        if (labelMatch) {
          redlines.push({
            clause_label: labelMatch[1].trim(),
            risk_level: labelMatch[2].trim(),
            original_text: origMatch ? origMatch[1].trim() : "",
            safe_alternative: safeBlock ? safeBlock[1].replace(/<[^>]+>/g, "").trim() : "",
            legal_basis: legalMatch ? legalMatch[1].trim() : "",
            consumer_standard: consumerMatch ? consumerMatch[1].trim() : "",
            tier: isLLM ? "llm_refined" : "template",
          });
        }
      }
    }

    const modelStatus = analysisData.metadata?.model || "";

    return NextResponse.json({
      risk_score: riskScore,
      grade,
      total_clauses: totalClauses,
      flagged_count: flaggedCount,
      results,
      entities: analysisData.entities || [],
      contradictions: analysisData.contradictions || [],
      obligations: analysisData.obligations || [],
      compliance: analysisData.compliance || {},
      redlines,
      model: modelStatus.includes("loaded") ? "ml" : "regex",
      latency_ms: 0,
      session_id: null,
    });
  } catch (error: any) {
    console.error("Analyze error:", error.message);
    return NextResponse.json(
      { error: "Analysis failed: " + (error.message || "Try again in 30 seconds.") },
      { status: 500 }
    );
  }
}