File size: 10,286 Bytes
3552405
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
"""Model service layer β€” unified Qwen/vLLM inference via OpenAI-compatible API.

Provides a single shared client and reusable inference functions for all
ClauseGuard agents and the copilot. Handles retries, timeouts, JSON cleaning,
and graceful error recovery.
"""

from __future__ import annotations

import asyncio
import json
import logging
from typing import Any, Dict, List

from openai import AsyncOpenAI, OpenAI

from clauseguard.config.settings import (
    API_KEY,
    BASE_URL,
    MAX_TOKENS,
    MODEL_NAME,
    TEMPERATURE,
    TIMEOUT_SECONDS,
)

logger = logging.getLogger(__name__)

_async_client: AsyncOpenAI | None = None
_sync_client: OpenAI | None = None


def get_client() -> AsyncOpenAI:
    """Return the shared AsyncOpenAI client (lazy singleton)."""
    global _async_client
    if _async_client is None:
        _async_client = AsyncOpenAI(api_key=API_KEY, base_url=BASE_URL)
    return _async_client


def get_sync_client() -> OpenAI:
    """Return the shared synchronous OpenAI client (lazy singleton)."""
    global _sync_client
    if _sync_client is None:
        _sync_client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
    return _sync_client


def reset_client() -> None:
    """Reset the shared clients β€” useful for testing or config changes."""
    global _async_client, _sync_client
    _async_client = None
    _sync_client = None


def clean_json_response(content: str) -> str:
    """Strip markdown fences and leading/trailing non-JSON text from LLM output."""
    content = content.strip()
    if content.startswith("```json"):
        content = content[7:]
    elif content.startswith("```"):
        content = content[3:]
    if content.endswith("```"):
        content = content[:-3]
    return content.strip()


async def call_model(
    system_prompt: str,
    user_prompt: str,
    *,
    agent_name: str = "Agent",
    temperature: float | None = None,
    max_tokens: int | None = None,
    timeout: int | None = None,
    max_retries: int = 1,
    validate_json: bool = True,
) -> str | None:
    """Call the Qwen model with retry, timeout, and JSON validation.

    Args:
        system_prompt: The system-level instruction.
        user_prompt: The user-level query.
        agent_name: Label used in log messages.
        temperature: Sampling temperature (defaults to config TEMPERATURE).
        max_tokens: Max tokens for the response (defaults to config MAX_TOKENS).
        timeout: Per-call timeout in seconds (defaults to config TIMEOUT_SECONDS).
        max_retries: Number of additional retries on JSON parse failure.
        validate_json: Whether to validate the response as valid JSON.

    Returns:
        The model's raw text response, or None if all attempts fail.
    """
    client = get_client()
    temp = temperature if temperature is not None else TEMPERATURE
    mt = max_tokens if max_tokens is not None else MAX_TOKENS
    tout = timeout if timeout is not None else TIMEOUT_SECONDS

    last_error: str | None = None
    for attempt in range(max_retries + 1):
        try:
            response = await asyncio.wait_for(
                client.chat.completions.create(
                    model=MODEL_NAME,
                    messages=[
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": user_prompt},
                    ],
                    temperature=temp,
                    max_tokens=mt,
                ),
                timeout=tout,
            )
            content = response.choices[0].message.content or ""
            logger.info("%s received %d chars in %d attempt(s)", agent_name, len(content), attempt + 1)

            if validate_json:
                cleaned = clean_json_response(content)
                if not cleaned or not cleaned.strip():
                    raise ValueError("Empty response")
                json.loads(cleaned)
                logger.info("%s produced valid JSON", agent_name)
            return content

        except json.JSONDecodeError as e:
            last_error = str(e)
            preview = content[:200] if 'content' in dir() else "(no content)"
            logger.warning("%s returned malformed JSON (attempt %d): %s | preview: %s", agent_name, attempt + 1, e, preview)
            if attempt < max_retries:
                logger.warning("%s returned malformed JSON, retrying...", agent_name)
                user_prompt += "\n\nIMPORTANT: Output ONLY raw JSON. No markdown, no explanation."
        except ValueError as e:
            last_error = str(e)
            if attempt < max_retries:
                logger.warning("%s returned empty response, retrying...", agent_name)
        except asyncio.TimeoutError:
            logger.error("%s agent timed out after %ds", agent_name, tout)
            return None
        except Exception as e:
            logger.error("%s agent failed: %s", agent_name, e)
            return None

    logger.error("%s failed to produce valid JSON: %s", agent_name, last_error)
    return None


async def call_model_chat(
    messages: List[Dict[str, str]],
    *,
    temperature: float | None = None,
    max_tokens: int | None = None,
    timeout: int = 60,
) -> str:
    """Call the Qwen model for chat (multi-turn conversation).

    Args:
        messages: Full message list (system + history + user).
        temperature: Sampling temperature.
        max_tokens: Max tokens for the response.
        timeout: Per-call timeout in seconds.

    Returns:
        The assistant's text response, or a friendly error message.
    """
    client = get_client()
    temp = temperature if temperature is not None else TEMPERATURE
    mt = max_tokens if max_tokens is not None else MAX_TOKENS

    try:
        response = await asyncio.wait_for(
            client.chat.completions.create(
                model=MODEL_NAME,
                messages=messages,
                temperature=temp,
                max_tokens=mt,
            ),
            timeout=timeout,
        )
        content = response.choices[0].message.content
        return content or "I'm sorry, I couldn't generate a response. Please try again."
    except asyncio.TimeoutError:
        logger.error("Chat call timed out after %ds", timeout)
        return "I'm sorry, the request timed out. Please try a shorter question or try again."
    except Exception as e:
        logger.error("Chat call failed: %s", e)
        return f"I'm sorry, something went wrong: {e}"


# ── Synchronous wrappers for use in Streamlit callbacks ──


def call_model_chat_sync(
    messages: List[Dict[str, str]],
    *,
    temperature: float | None = None,
    max_tokens: int | None = None,
    timeout: int = 60,
) -> str:
    """Synchronous wrapper around call_model_chat for Streamlit callbacks."""
    try:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        try:
            result = loop.run_until_complete(
                call_model_chat(messages, temperature=temperature, max_tokens=max_tokens, timeout=timeout)
            )
        finally:
            loop.close()
        return result
    except Exception as e:
        logger.error("call_model_chat_sync failed: %s", e)
        return f"Sorry, an unexpected error occurred: {e}"


# ── Higher-level domain functions ──


async def analyze_clause(
    clause_text: str,
    clause_type: str = "",
    additional_context: str = "",
    system_prompt: str = "",
    user_prompt_template: str = "",
    agent_name: str = "Analyzer",
) -> str | None:
    """Analyze a single clause β€” used by pipeline agents.

    Args:
        clause_text: The clause raw text to analyze.
        clause_type: Optional pre-classified clause type.
        additional_context: Additional context to append.
        system_prompt: The agent-specific system prompt.
        user_prompt_template: A template string for the user prompt.
        agent_name: Label for logging.

    Returns:
        Raw response string or None.
    """
    user_prompt = user_prompt_template.format(
        clause_text=clause_text,
        clause_type=clause_type,
        context=additional_context,
    ) if user_prompt_template else clause_text

    return await call_model(
        system_prompt=system_prompt,
        user_prompt=user_prompt,
        agent_name=agent_name,
    )


async def generate_negotiation_message(
    clause_text: str,
    risk_reason: str,
    safer_version: str = "",
) -> str:
    """Generate a professional negotiation message for a risky clause."""
    system = (
        "You are a professional contract negotiator. Write a short, polite email "
        "message requesting a change to a contract clause. Keep it professional, "
        "concise, and non-confrontational. Maximum 4-5 sentences."
    )
    user = (
        f"The risky clause is:\n\"{clause_text}\"\n\n"
        f"Why it's risky:\n{risk_reason}\n\n"
    )
    if safer_version:
        user += f"Suggested safer version:\n\"{safer_version}\"\n\n"
    user += "Write a single email-style negotiation message requesting a fair revision."

    result = await call_model(
        system_prompt=system,
        user_prompt=user,
        agent_name="NegotiationGenerator",
        validate_json=False,
    )
    return result or ""


async def contract_chat(
    contract_context: str,
    chat_history: List[Dict[str, str]],
    user_message: str,
    system_prompt: str,
    timeout: int = 60,
) -> str:
    """Handle a contract chat conversation with full contract context.

    Args:
        contract_context: The formatted contract + analysis context.
        chat_history: Previous messages (role/content dicts).
        user_message: The user's new question.
        system_prompt: The copilot system prompt.
        timeout: Per-call timeout.

    Returns:
        Assistant response string.
    """
    full_system = f"{system_prompt}\n\n---\n\n## CONTRACT CONTEXT\n\n{contract_context}"

    messages: List[Dict[str, str]] = [{"role": "system", "content": full_system}]
    for msg in chat_history:
        messages.append({"role": msg["role"], "content": msg["content"]})
    messages.append({"role": "user", "content": user_message})

    return await call_model_chat(messages, timeout=timeout)