File size: 10,286 Bytes
3552405 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 | """Model service layer β unified Qwen/vLLM inference via OpenAI-compatible API.
Provides a single shared client and reusable inference functions for all
ClauseGuard agents and the copilot. Handles retries, timeouts, JSON cleaning,
and graceful error recovery.
"""
from __future__ import annotations
import asyncio
import json
import logging
from typing import Any, Dict, List
from openai import AsyncOpenAI, OpenAI
from clauseguard.config.settings import (
API_KEY,
BASE_URL,
MAX_TOKENS,
MODEL_NAME,
TEMPERATURE,
TIMEOUT_SECONDS,
)
logger = logging.getLogger(__name__)
_async_client: AsyncOpenAI | None = None
_sync_client: OpenAI | None = None
def get_client() -> AsyncOpenAI:
"""Return the shared AsyncOpenAI client (lazy singleton)."""
global _async_client
if _async_client is None:
_async_client = AsyncOpenAI(api_key=API_KEY, base_url=BASE_URL)
return _async_client
def get_sync_client() -> OpenAI:
"""Return the shared synchronous OpenAI client (lazy singleton)."""
global _sync_client
if _sync_client is None:
_sync_client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
return _sync_client
def reset_client() -> None:
"""Reset the shared clients β useful for testing or config changes."""
global _async_client, _sync_client
_async_client = None
_sync_client = None
def clean_json_response(content: str) -> str:
"""Strip markdown fences and leading/trailing non-JSON text from LLM output."""
content = content.strip()
if content.startswith("```json"):
content = content[7:]
elif content.startswith("```"):
content = content[3:]
if content.endswith("```"):
content = content[:-3]
return content.strip()
async def call_model(
system_prompt: str,
user_prompt: str,
*,
agent_name: str = "Agent",
temperature: float | None = None,
max_tokens: int | None = None,
timeout: int | None = None,
max_retries: int = 1,
validate_json: bool = True,
) -> str | None:
"""Call the Qwen model with retry, timeout, and JSON validation.
Args:
system_prompt: The system-level instruction.
user_prompt: The user-level query.
agent_name: Label used in log messages.
temperature: Sampling temperature (defaults to config TEMPERATURE).
max_tokens: Max tokens for the response (defaults to config MAX_TOKENS).
timeout: Per-call timeout in seconds (defaults to config TIMEOUT_SECONDS).
max_retries: Number of additional retries on JSON parse failure.
validate_json: Whether to validate the response as valid JSON.
Returns:
The model's raw text response, or None if all attempts fail.
"""
client = get_client()
temp = temperature if temperature is not None else TEMPERATURE
mt = max_tokens if max_tokens is not None else MAX_TOKENS
tout = timeout if timeout is not None else TIMEOUT_SECONDS
last_error: str | None = None
for attempt in range(max_retries + 1):
try:
response = await asyncio.wait_for(
client.chat.completions.create(
model=MODEL_NAME,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
temperature=temp,
max_tokens=mt,
),
timeout=tout,
)
content = response.choices[0].message.content or ""
logger.info("%s received %d chars in %d attempt(s)", agent_name, len(content), attempt + 1)
if validate_json:
cleaned = clean_json_response(content)
if not cleaned or not cleaned.strip():
raise ValueError("Empty response")
json.loads(cleaned)
logger.info("%s produced valid JSON", agent_name)
return content
except json.JSONDecodeError as e:
last_error = str(e)
preview = content[:200] if 'content' in dir() else "(no content)"
logger.warning("%s returned malformed JSON (attempt %d): %s | preview: %s", agent_name, attempt + 1, e, preview)
if attempt < max_retries:
logger.warning("%s returned malformed JSON, retrying...", agent_name)
user_prompt += "\n\nIMPORTANT: Output ONLY raw JSON. No markdown, no explanation."
except ValueError as e:
last_error = str(e)
if attempt < max_retries:
logger.warning("%s returned empty response, retrying...", agent_name)
except asyncio.TimeoutError:
logger.error("%s agent timed out after %ds", agent_name, tout)
return None
except Exception as e:
logger.error("%s agent failed: %s", agent_name, e)
return None
logger.error("%s failed to produce valid JSON: %s", agent_name, last_error)
return None
async def call_model_chat(
messages: List[Dict[str, str]],
*,
temperature: float | None = None,
max_tokens: int | None = None,
timeout: int = 60,
) -> str:
"""Call the Qwen model for chat (multi-turn conversation).
Args:
messages: Full message list (system + history + user).
temperature: Sampling temperature.
max_tokens: Max tokens for the response.
timeout: Per-call timeout in seconds.
Returns:
The assistant's text response, or a friendly error message.
"""
client = get_client()
temp = temperature if temperature is not None else TEMPERATURE
mt = max_tokens if max_tokens is not None else MAX_TOKENS
try:
response = await asyncio.wait_for(
client.chat.completions.create(
model=MODEL_NAME,
messages=messages,
temperature=temp,
max_tokens=mt,
),
timeout=timeout,
)
content = response.choices[0].message.content
return content or "I'm sorry, I couldn't generate a response. Please try again."
except asyncio.TimeoutError:
logger.error("Chat call timed out after %ds", timeout)
return "I'm sorry, the request timed out. Please try a shorter question or try again."
except Exception as e:
logger.error("Chat call failed: %s", e)
return f"I'm sorry, something went wrong: {e}"
# ββ Synchronous wrappers for use in Streamlit callbacks ββ
def call_model_chat_sync(
messages: List[Dict[str, str]],
*,
temperature: float | None = None,
max_tokens: int | None = None,
timeout: int = 60,
) -> str:
"""Synchronous wrapper around call_model_chat for Streamlit callbacks."""
try:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
result = loop.run_until_complete(
call_model_chat(messages, temperature=temperature, max_tokens=max_tokens, timeout=timeout)
)
finally:
loop.close()
return result
except Exception as e:
logger.error("call_model_chat_sync failed: %s", e)
return f"Sorry, an unexpected error occurred: {e}"
# ββ Higher-level domain functions ββ
async def analyze_clause(
clause_text: str,
clause_type: str = "",
additional_context: str = "",
system_prompt: str = "",
user_prompt_template: str = "",
agent_name: str = "Analyzer",
) -> str | None:
"""Analyze a single clause β used by pipeline agents.
Args:
clause_text: The clause raw text to analyze.
clause_type: Optional pre-classified clause type.
additional_context: Additional context to append.
system_prompt: The agent-specific system prompt.
user_prompt_template: A template string for the user prompt.
agent_name: Label for logging.
Returns:
Raw response string or None.
"""
user_prompt = user_prompt_template.format(
clause_text=clause_text,
clause_type=clause_type,
context=additional_context,
) if user_prompt_template else clause_text
return await call_model(
system_prompt=system_prompt,
user_prompt=user_prompt,
agent_name=agent_name,
)
async def generate_negotiation_message(
clause_text: str,
risk_reason: str,
safer_version: str = "",
) -> str:
"""Generate a professional negotiation message for a risky clause."""
system = (
"You are a professional contract negotiator. Write a short, polite email "
"message requesting a change to a contract clause. Keep it professional, "
"concise, and non-confrontational. Maximum 4-5 sentences."
)
user = (
f"The risky clause is:\n\"{clause_text}\"\n\n"
f"Why it's risky:\n{risk_reason}\n\n"
)
if safer_version:
user += f"Suggested safer version:\n\"{safer_version}\"\n\n"
user += "Write a single email-style negotiation message requesting a fair revision."
result = await call_model(
system_prompt=system,
user_prompt=user,
agent_name="NegotiationGenerator",
validate_json=False,
)
return result or ""
async def contract_chat(
contract_context: str,
chat_history: List[Dict[str, str]],
user_message: str,
system_prompt: str,
timeout: int = 60,
) -> str:
"""Handle a contract chat conversation with full contract context.
Args:
contract_context: The formatted contract + analysis context.
chat_history: Previous messages (role/content dicts).
user_message: The user's new question.
system_prompt: The copilot system prompt.
timeout: Per-call timeout.
Returns:
Assistant response string.
"""
full_system = f"{system_prompt}\n\n---\n\n## CONTRACT CONTEXT\n\n{contract_context}"
messages: List[Dict[str, str]] = [{"role": "system", "content": full_system}]
for msg in chat_history:
messages.append({"role": msg["role"], "content": msg["content"]})
messages.append({"role": "user", "content": user_message})
return await call_model_chat(messages, timeout=timeout)
|