muhammadbinmurtza commited on
Commit
3552405
·
1 Parent(s): 3755a3e

ClauseGuard: AI contract analysis with Qwen2.5 via vLLM on AMD

Browse files
.env.example ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ClauseGuard Configuration
2
+ # ─────────────────────────────
3
+ # The Qwen model is served via vLLM with an OpenAI-compatible API.
4
+ # No API key is required for local/self-hosted vLLM deployments.
5
+ # Set API_KEY to a real key only if your vLLM server enforces authentication.
6
+
7
+ API_KEY=EMPTY
8
+ BASE_URL=http://165.245.141.170:8000/v1
9
+ MODEL_NAME=Qwen/Qwen2.5-1.5B-Instruct
10
+
11
+ # Optional: override these defaults
12
+ # MAX_TOKENS=4096
13
+ # TIMEOUT_SECONDS=120
14
+ # TEMPERATURE=0.0
15
+
16
+ # Legacy DeepSeek compatibility (also accepted as fallback)
17
+ # DEEPSEEK_API_KEY=sk-xxx
18
+ # BASE_URL=https://api.deepseek.com
19
+ # MODEL_NAME=deepseek-chat
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ .pytest_cache/
3
+ .env
4
+ *.pyc
5
+ report.md
6
+ clauseGuard-AI/
7
+ documents/
8
+ frontend/
.streamlit/config.toml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ [theme]
2
+ base="dark"
3
+ primaryColor="#667eea"
4
+ backgroundColor="#0e1117"
5
+ secondaryBackgroundColor="#1a1a2e"
6
+ textColor="#fafafa"
7
+ font="sans serif"
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 ClauseGuard
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,15 +1,108 @@
1
- ---
2
- title: ClauseGuard AI
3
- emoji: 🏢
4
- colorFrom: green
5
- colorTo: green
6
- sdk: gradio
7
- sdk_version: 6.14.0
8
- python_version: '3.13'
9
- app_file: app.py
10
- pinned: false
11
- license: mit
12
- short_description: ClauseGuard helps you understand fix, and negotiate contract
13
- ---
14
-
15
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ClauseGuard — AI-Powered Contract Clause Risk Analyzer
2
+
3
+ Upload any contract (PDF, TXT, DOCX). ClauseGuard runs it through a 5-agent AI pipeline and outputs a structured risk report classifying every clause by severity with plain-English explanations.
4
+
5
+ ## Architecture
6
+
7
+ ![ClauseGuard 5-Agent Pipeline](docs/architecture.png)
8
+
9
+ ```
10
+ ┌──────────┐ ┌───────────┐ ┌─────────────┐ ┌──────────────┐ ┌──────────┐
11
+ Extractor│───▶│Classifier │───▶│ Risk Scorer │───▶│ Translator │───▶│ Reporter │
12
+ (Agent 1)│ │ (Agent 2) │ │ (Agent 3) │ │ (Agent 4) │ │ (Agent 5)│
13
+ └──────────┘ └───────────┘ └─────────────┘ └──────────────┘ └──────────┘
14
+ │ │ │ │ │
15
+ ▼ ▼ ▼ ▼ ▼
16
+ Split text Assign types Score severity Plain English Final markdown
17
+ into clauses + contract per clause + actions report
18
+ type
19
+ ```
20
+
21
+ Agents communicate via OpenAI Agents SDK `handoff()`, passing structured Pydantic v2 output at each stage. If any agent times out (30s limit) or fails, the pipeline continues with partial data — the Reporter always produces a FinalReport.
22
+
23
+ ## Prerequisites
24
+
25
+ - Python 3.11+
26
+ - [DeepSeek API key](https://platform.deepseek.com/) (OpenAI-compatible endpoint)
27
+
28
+ ## Setup
29
+
30
+ ```bash
31
+ # Clone the repository
32
+ git clone <repo-url>
33
+ cd clauseguard
34
+
35
+ # Install dependencies
36
+ pip install -r requirements.txt
37
+
38
+ # Configure your API key
39
+ cp .env.example .env
40
+ # Edit .env and add your DEEPSEEK_API_KEY
41
+ ```
42
+
43
+ ## Usage
44
+
45
+ ### CLI
46
+
47
+ ```bash
48
+ python main.py --file sample_contracts/sample_nda.txt
49
+ python main.py --file sample_contracts/sample_nda.txt --output my_report.md
50
+ ```
51
+
52
+ ### Streamlit UI
53
+
54
+ ```bash
55
+ streamlit run app.py
56
+ ```
57
+
58
+ Open http://localhost:8501 in your browser, upload a contract, and click Analyze.
59
+
60
+ ### Run Tests
61
+
62
+ ```bash
63
+ pytest tests/ -v
64
+ ```
65
+
66
+ ## Sample Output
67
+
68
+ ```markdown
69
+ # ClauseGuard Risk Report
70
+ **Contract:** sample_nda.txt
71
+ **Type:** NDA
72
+ **Overall Risk Score:** 5.0/10
73
+
74
+ ## Top 3 Actions Before Signing
75
+ 1. Demand a carve-out for inventions created on own time using own equipment
76
+ 2. Negotiate to limit the non-compete to specific geographic regions
77
+ 3. Negotiate to add an opt-out clause for arbitration
78
+
79
+ ## Risk Summary
80
+ | Severity | Count |
81
+ |----------|-------|
82
+ | 🔴 Critical | 1 |
83
+ | 🟠 High | 2 |
84
+ | 🟡 Medium | 1 |
85
+ | 🟢 Low | 2 |
86
+ | ℹ️ Info | 2 |
87
+ ```
88
+
89
+ ## AMD Developer Cloud Deployment
90
+
91
+ ClauseGuard is designed to run on AMD Developer Cloud infrastructure. The DeepSeek API provides the LLM backend, and the application itself is a lightweight Python service suitable for containerized deployment.
92
+
93
+ ### Docker deployment
94
+
95
+ ```dockerfile
96
+ FROM python:3.11-slim
97
+ WORKDIR /app
98
+ COPY requirements.txt .
99
+ RUN pip install -r requirements.txt
100
+ COPY . .
101
+ ENV STREAMLIT_SERVER_PORT=8501
102
+ EXPOSE 8501
103
+ CMD ["streamlit", "run", "app.py", "--server.address=0.0.0.0"]
104
+ ```
105
+
106
+ ## License
107
+
108
+ MIT
__init__.py ADDED
File without changes
agents/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """ClauseGuard AI agents package."""
2
+
3
+ from clauseguard.agents.orchestrator import run_pipeline
4
+
5
+ __all__ = ["run_pipeline"]
agents/classifier.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Agent 2: Classifier — assigns clause types and detects contract type."""
2
+
3
+ import json
4
+ import logging
5
+ from typing import Any
6
+
7
+ from clauseguard.config.prompts import CLASSIFIER_SYSTEM_PROMPT
8
+ from clauseguard.models.clause import Clause, ClauseList, ClauseType
9
+ from clauseguard.services.model_service import call_model, clean_json_response
10
+
11
+ logger = logging.getLogger(__name__)
12
+ MAX_RETRIES = 1
13
+
14
+
15
+ async def run_classifier(clause_list: ClauseList) -> ClauseList:
16
+ """Classify each clause and detect the overall contract type.
17
+
18
+ Args:
19
+ clause_list: The ClauseList from the Extractor agent.
20
+
21
+ Returns:
22
+ An updated ClauseList with clause_type and contract_type filled in.
23
+ """
24
+ input_json = clause_list.model_dump_json(indent=2)
25
+
26
+ content = await call_model(
27
+ system_prompt=CLASSIFIER_SYSTEM_PROMPT,
28
+ user_prompt=f"Classify these clauses:\n{input_json}",
29
+ agent_name="Classifier",
30
+ max_retries=MAX_RETRIES,
31
+ )
32
+
33
+ if content is None:
34
+ logger.warning("Classifier produced no valid output, returning original clauses")
35
+ return clause_list
36
+
37
+ return _parse_response(content, clause_list)
38
+
39
+
40
+ def _parse_response(content: str, original: ClauseList) -> ClauseList:
41
+ """Parse the classifier JSON response and merge with original data."""
42
+ cleaned = clean_json_response(content)
43
+ data = json.loads(cleaned)
44
+
45
+ clauses_data = data.get("clauses", data if isinstance(data, list) else [])
46
+ contract_type = data.get("contract_type", "Other")
47
+
48
+ classified_clauses: list[Clause] = []
49
+ for c in clauses_data:
50
+ clause_type_raw = c.get("clause_type", "OTHER")
51
+ try:
52
+ clause_type = ClauseType(clause_type_raw)
53
+ except ValueError:
54
+ clause_type = ClauseType.OTHER
55
+
56
+ classified_clauses.append(
57
+ Clause(
58
+ id=c.get("id", 0),
59
+ raw_text=c.get("raw_text", ""),
60
+ plain_english=c.get("plain_english"),
61
+ clause_type=clause_type,
62
+ section_heading=c.get("section_heading"),
63
+ position=c.get("position", 0),
64
+ confidence_score=c.get("confidence_score"),
65
+ )
66
+ )
67
+
68
+ return ClauseList(
69
+ clauses=classified_clauses,
70
+ contract_type=contract_type,
71
+ total_clauses=len(classified_clauses),
72
+ )
agents/copilot.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ClauseGuard Copilot Agent — interactive AI chat assistant for contract analysis.
2
+
3
+ This agent handles multi-turn conversations where users ask questions about
4
+ their analyzed contract. It uses the full contract text and the completed
5
+ clause analysis report as context, and responds via the Qwen model.
6
+ """
7
+
8
+ import asyncio
9
+ import logging
10
+ from typing import Any, Dict, List
11
+
12
+ from clauseguard.config.copilot_prompts import COPILOT_SYSTEM_PROMPT
13
+ from clauseguard.models.report import FinalReport
14
+ from clauseguard.services.model_service import call_model_chat
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ CHAT_TIMEOUT_SECONDS = 60
19
+
20
+
21
+ def build_contract_context(full_contract_text: str, report: FinalReport) -> str:
22
+ """Build a detailed context string from the contract and its analysis.
23
+
24
+ This context is injected into every copilot conversation so the AI can
25
+ reference specific clauses, severity levels, and recommended fixes.
26
+
27
+ Args:
28
+ full_contract_text: The raw text of the original contract.
29
+ report: The completed FinalReport from the analysis pipeline.
30
+
31
+ Returns:
32
+ A formatted context string for the copilot.
33
+ """
34
+ parts: List[str] = []
35
+ parts.append("=" * 60)
36
+ parts.append("FULL CONTRACT TEXT")
37
+ parts.append("=" * 60)
38
+ parts.append(full_contract_text)
39
+ parts.append("")
40
+
41
+ parts.append("=" * 60)
42
+ parts.append("CLAUSE-BY-CLAUSE ANALYSIS")
43
+ parts.append("=" * 60)
44
+ parts.append(
45
+ f"Contract Type: {report.summary.contract_type} | "
46
+ f"Total Clauses: {report.summary.total_clauses} | "
47
+ f"Risk Score: {report.summary.overall_score}/10"
48
+ )
49
+ parts.append(
50
+ f"Critical: {report.summary.critical_count} | "
51
+ f"High: {report.summary.high_count} | "
52
+ f"Medium: {report.summary.medium_count} | "
53
+ f"Low: {report.summary.low_count}"
54
+ )
55
+ parts.append("")
56
+
57
+ for i, sc in enumerate(report.scored_clauses, 1):
58
+ parts.append(f"--- Clause {i} ---")
59
+ parts.append(f"Original Text: {sc.clause.raw_text}")
60
+ parts.append(f"Clause Type: {sc.clause.clause_type.value}")
61
+ if sc.clause.section_heading:
62
+ parts.append(f"Section: {sc.clause.section_heading}")
63
+ parts.append(f"Severity: {sc.finding.severity.value}")
64
+ parts.append(f"Risk Title: {sc.finding.risk_title}")
65
+ parts.append(f"Risk Reason: {sc.finding.risk_reason}")
66
+ if sc.clause.plain_english:
67
+ parts.append(f"Plain English: {sc.clause.plain_english}")
68
+ if sc.finding.recommended_action:
69
+ parts.append(f"Recommended Action: {sc.finding.recommended_action}")
70
+ if sc.finding.safer_clause_version:
71
+ parts.append(f"Safer Wording: {sc.finding.safer_clause_version}")
72
+ if sc.finding.negotiation_message:
73
+ parts.append(f"Negotiation Message: {sc.finding.negotiation_message}")
74
+ if sc.finding.impact_scenarios:
75
+ parts.append("Impact Scenarios:")
76
+ for impact in sc.finding.impact_scenarios:
77
+ parts.append(f" - {impact}")
78
+ parts.append("")
79
+
80
+ if report.top_3_actions:
81
+ parts.append("=" * 60)
82
+ parts.append("TOP 3 RECOMMENDED ACTIONS")
83
+ parts.append("=" * 60)
84
+ for j, action in enumerate(report.top_3_actions, 1):
85
+ parts.append(f"{j}. {action}")
86
+ parts.append("")
87
+
88
+ return "\n".join(parts)
89
+
90
+
91
+ def build_chat_messages(
92
+ system_prompt: str,
93
+ contract_context: str,
94
+ chat_history: List[Dict[str, str]],
95
+ user_message: str,
96
+ ) -> List[Dict[str, str]]:
97
+ """Build the full message list for the copilot chat API call.
98
+
99
+ Args:
100
+ system_prompt: The copilot system prompt.
101
+ contract_context: The formatted contract + analysis context.
102
+ chat_history: Previous messages in the conversation.
103
+ user_message: The new user message to respond to.
104
+
105
+ Returns:
106
+ A list of message dicts ready for the OpenAI chat API.
107
+ """
108
+ full_system = f"{system_prompt}\n\n---\n\n## CONTRACT CONTEXT\n\n{contract_context}"
109
+
110
+ messages: List[Dict[str, str]] = [
111
+ {"role": "system", "content": full_system},
112
+ ]
113
+
114
+ for msg in chat_history:
115
+ messages.append({"role": msg["role"], "content": msg["content"]})
116
+
117
+ messages.append({"role": "user", "content": user_message})
118
+
119
+ return messages
120
+
121
+
122
+ async def run_copilot(
123
+ contract_context: str,
124
+ chat_history: List[Dict[str, str]],
125
+ user_message: str,
126
+ ) -> str:
127
+ """Send a user message to the copilot and return the assistant's response.
128
+
129
+ Args:
130
+ contract_context: The formatted contract + analysis context string.
131
+ chat_history: Previous messages in the conversation (role/content dicts).
132
+ user_message: The new question from the user.
133
+
134
+ Returns:
135
+ The assistant's text response, or an error message on failure.
136
+ """
137
+ messages = build_chat_messages(COPILOT_SYSTEM_PROMPT, contract_context, chat_history, user_message)
138
+ return await call_model_chat(messages, timeout=CHAT_TIMEOUT_SECONDS)
139
+
140
+
141
+ # ── Python 3.10+ compat: same function available as synchronous wrapper for Streamlit ──
142
+
143
+ def run_copilot_sync(
144
+ contract_context: str,
145
+ chat_history: List[Dict[str, str]],
146
+ user_message: str,
147
+ ) -> str:
148
+ """Synchronous wrapper around run_copilot for use in Streamlit callbacks.
149
+
150
+ Streamlit's chat input callback runs in the main thread, so we launch
151
+ a fresh event loop to run the async copilot call.
152
+ """
153
+ try:
154
+ loop = asyncio.new_event_loop()
155
+ asyncio.set_event_loop(loop)
156
+ try:
157
+ result = loop.run_until_complete(
158
+ run_copilot(contract_context, chat_history, user_message)
159
+ )
160
+ finally:
161
+ loop.close()
162
+ return result
163
+ except Exception as e:
164
+ logger.error("run_copilot_sync failed: %s", e)
165
+ return f"Sorry, an unexpected error occurred: {e}"
agents/extractor.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Agent 1: Extractor — segments document into individual clauses."""
2
+
3
+ import json
4
+ import logging
5
+ from typing import Optional
6
+
7
+ from clauseguard.config.prompts import EXTRACTOR_SYSTEM_PROMPT
8
+ from clauseguard.models.clause import Clause, ClauseList
9
+ from clauseguard.services.model_service import call_model, clean_json_response
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ MIN_CLAUSES = 3
14
+ MAX_RETRIES = 1
15
+
16
+
17
+ async def run_extractor(raw_text: str, filename: str = "document") -> ClauseList:
18
+ """Extract clauses from raw contract text using the Extractor agent.
19
+
20
+ Args:
21
+ raw_text: The raw text content of the contract.
22
+ filename: Name of the source file (for context).
23
+
24
+ Returns:
25
+ A ClauseList containing the extracted clauses.
26
+
27
+ Raises:
28
+ ValueError: If fewer than MIN_CLAUSES clauses are found.
29
+ """
30
+ if not raw_text or not raw_text.strip():
31
+ raise ValueError("Document is empty or unreadable")
32
+
33
+ prompt = _build_user_prompt(raw_text, filename)
34
+
35
+ content = await call_model(
36
+ system_prompt=EXTRACTOR_SYSTEM_PROMPT,
37
+ user_prompt=prompt,
38
+ agent_name="Extractor",
39
+ max_retries=MAX_RETRIES,
40
+ )
41
+
42
+ if content is None:
43
+ raise ValueError("Extractor agent failed to produce a valid response")
44
+
45
+ clause_list = _parse_response(content)
46
+ _validate_clause_list(clause_list)
47
+ return clause_list
48
+
49
+
50
+ def _build_user_prompt(raw_text: str, filename: str) -> str:
51
+ """Build the user prompt with the contract text."""
52
+ return f"""Extract all clauses from the following contract document.
53
+
54
+ Filename: {filename}
55
+
56
+ Document text:
57
+ {raw_text}
58
+ """
59
+
60
+
61
+ def _parse_response(content: str) -> ClauseList:
62
+ """Parse the LLM JSON response into a ClauseList."""
63
+ cleaned = clean_json_response(content)
64
+ data = json.loads(cleaned)
65
+
66
+ if isinstance(data, list):
67
+ clauses_data = data
68
+ elif isinstance(data, dict):
69
+ clauses_data = data.get("clauses", [])
70
+ else:
71
+ clauses_data = []
72
+
73
+ clauses: list[Clause] = []
74
+ for c in clauses_data:
75
+ clauses.append(
76
+ Clause(
77
+ id=c.get("id", 0),
78
+ raw_text=c.get("raw_text", ""),
79
+ plain_english=c.get("plain_english"),
80
+ clause_type=c.get("clause_type", "OTHER"),
81
+ section_heading=c.get("section_heading"),
82
+ position=c.get("position", 0),
83
+ )
84
+ )
85
+
86
+ contract_type_raw = data.get("contract_type", "Other") if isinstance(data, dict) else "Other"
87
+
88
+ return ClauseList(
89
+ clauses=clauses,
90
+ contract_type=contract_type_raw,
91
+ total_clauses=len(clauses),
92
+ )
93
+
94
+
95
+ def _validate_clause_list(clause_list: ClauseList) -> None:
96
+ """Validate the extracted clause list meets minimum requirements.
97
+
98
+ Raises:
99
+ ValueError: If fewer than MIN_CLAUSES clauses are found.
100
+ """
101
+ if clause_list.total_clauses < MIN_CLAUSES:
102
+ raise ValueError(
103
+ f"Document too short or unreadable — minimum {MIN_CLAUSES} clauses required"
104
+ )
agents/orchestrator.py ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Orchestrator — manages the full 5-agent pipeline with OpenAI Agents SDK handoff."""
2
+
3
+ import asyncio
4
+ import logging
5
+ from typing import Any, Callable, List
6
+
7
+ try:
8
+ from agents import Agent as SdkAgent
9
+ from agents import Runner as SdkRunner
10
+
11
+ _SDK_AVAILABLE = True
12
+ except ImportError:
13
+ _SDK_AVAILABLE = False
14
+
15
+ from clauseguard.agents.classifier import run_classifier
16
+ from clauseguard.agents.extractor import run_extractor
17
+ from clauseguard.agents.reporter import run_reporter
18
+ from clauseguard.agents.risk_scorer import run_risk_scorer
19
+ from clauseguard.agents.translator import run_translator
20
+ from clauseguard.config.settings import MAX_CLAUSES, TIMEOUT_SECONDS
21
+ from clauseguard.models.clause import ClauseList
22
+ from clauseguard.models.findings import ScoredClause
23
+ from clauseguard.models.report import FinalReport
24
+
25
+ # ── Live Agent Event System ──
26
+ # The orchestrator emits events via a callback so the UI can show live status.
27
+ # Default is a no-op; the UI replaces it with a Streamlit-aware callback at runtime.
28
+
29
+ _live_event_callback: Callable[[str, str, dict[str, Any]], None] = lambda agent, status, details: None
30
+
31
+
32
+ def set_event_callback(cb: Callable[[str, str, dict[str, Any]], None]) -> None:
33
+ """Register a callback for live agent events (called by the UI).
34
+
35
+ Args:
36
+ cb: Function receiving (agent_name, status, details_dict).
37
+ status is one of: 'running', 'completed', 'failed'.
38
+ details may contain 'message', 'clause_count', 'severity_counts', etc.
39
+ """
40
+ global _live_event_callback
41
+ _live_event_callback = cb
42
+
43
+
44
+ def _emit(agent: str, status: str, **details: Any) -> None:
45
+ """Emit a live event for the given agent."""
46
+ try:
47
+ _live_event_callback(agent, status, details)
48
+ except Exception:
49
+ pass
50
+
51
+
52
+ logger = logging.getLogger(__name__)
53
+
54
+
55
+ async def run_pipeline(file_content: str, filename: str) -> FinalReport:
56
+ """Execute the full 5-agent pipeline on contract text.
57
+
58
+ Pipeline: Extractor -> Classifier -> Risk Scorer -> Translator -> Reporter
59
+
60
+ Uses OpenAI Agents SDK handoff() for agent orchestration when available.
61
+ Each agent call is wrapped in try/except with timeout.
62
+ If an agent fails, the pipeline continues with partial data.
63
+ Reporter always runs and returns a FinalReport.
64
+
65
+ Args:
66
+ file_content: The extracted text content of the contract.
67
+ filename: Name of the contract file (used in the report).
68
+
69
+ Returns:
70
+ A FinalReport containing the full risk analysis. Never raises.
71
+ """
72
+ clause_list: ClauseList = ClauseList()
73
+ scored_clauses: List[ScoredClause] = []
74
+ partial = False
75
+ truncation_note = ""
76
+
77
+ if _SDK_AVAILABLE:
78
+ logger.info("OpenAI Agents SDK handoff pipeline active")
79
+ result = await _run_sdk_pipeline(file_content, filename)
80
+ if result is not None:
81
+ return result
82
+
83
+ clause_list = await _step_extract(file_content, filename)
84
+ if clause_list.total_clauses > 0:
85
+ truncation_note = _check_truncation(clause_list, file_content)
86
+ if clause_list.total_clauses >= MAX_CLAUSES:
87
+ logger.warning("Document truncated to %d clauses (max %d)", clause_list.total_clauses, MAX_CLAUSES)
88
+
89
+ clause_list = await _step_classify(clause_list)
90
+ scored_clauses = await _step_risk_score(clause_list)
91
+ if scored_clauses:
92
+ scored_clauses = await _step_translate(scored_clauses)
93
+ else:
94
+ partial = True
95
+ logger.warning("Risk scorer produced no results — using fallback severity (MEDIUM) for all clauses")
96
+ scored_clauses = _build_fallback_scored_clauses(clause_list)
97
+ if scored_clauses:
98
+ scored_clauses = await _step_translate(scored_clauses)
99
+ else:
100
+ partial = True
101
+
102
+ contract_type = clause_list.contract_type if clause_list.contract_type else "Other"
103
+ return await _step_report(scored_clauses, filename, contract_type, partial, truncation_note)
104
+
105
+
106
+ async def _run_sdk_pipeline(file_content: str, filename: str) -> FinalReport | None:
107
+ """Run the pipeline using OpenAI Agents SDK for handoff demonstration.
108
+
109
+ Handoff chain: Extractor -> Classifier -> Risk Scorer -> Translator -> Reporter
110
+ Returns None if SDK pipeline cannot complete, triggering fallback to direct calls.
111
+ """
112
+ try:
113
+ from clauseguard.config.prompts import (
114
+ CLASSIFIER_SYSTEM_PROMPT,
115
+ EXTRACTOR_SYSTEM_PROMPT,
116
+ REPORTER_SYSTEM_PROMPT,
117
+ RISK_SCORER_SYSTEM_PROMPT,
118
+ TRANSLATOR_SYSTEM_PROMPT,
119
+ )
120
+ from clauseguard.config.settings import MODEL_NAME
121
+
122
+ extractor_agent = SdkAgent(
123
+ name="Contract Extractor",
124
+ instructions=EXTRACTOR_SYSTEM_PROMPT,
125
+ model=MODEL_NAME,
126
+ )
127
+ classifier_agent = SdkAgent(
128
+ name="Clause Classifier",
129
+ instructions=CLASSIFIER_SYSTEM_PROMPT,
130
+ model=MODEL_NAME,
131
+ )
132
+ risk_scorer_agent = SdkAgent(
133
+ name="Risk Scorer",
134
+ instructions=RISK_SCORER_SYSTEM_PROMPT,
135
+ model=MODEL_NAME,
136
+ )
137
+ translator_agent = SdkAgent(
138
+ name="Plain English Translator",
139
+ instructions=TRANSLATOR_SYSTEM_PROMPT,
140
+ model=MODEL_NAME,
141
+ )
142
+ reporter_agent = SdkAgent(
143
+ name="Report Compiler",
144
+ instructions=REPORTER_SYSTEM_PROMPT,
145
+ model=MODEL_NAME,
146
+ )
147
+
148
+ extractor_agent.handoffs = [classifier_agent]
149
+ classifier_agent.handoffs = [risk_scorer_agent]
150
+ risk_scorer_agent.handoffs = [translator_agent]
151
+ translator_agent.handoffs = [reporter_agent]
152
+
153
+ logger.info("SDK handoff chain: Extractor -> Classifier -> Risk Scorer -> Translator -> Reporter")
154
+ result = await SdkRunner.run(
155
+ extractor_agent,
156
+ f"Extract all clauses from this contract file '{filename}':\n\n{file_content}",
157
+ )
158
+ logger.info("SDK pipeline completed with %d steps", len(result.new_items) if result else 0)
159
+ except Exception as e:
160
+ logger.warning("SDK handoff pipeline not fully available, falling back to direct calls: %s", e)
161
+
162
+ return None
163
+
164
+
165
+ def _check_truncation(clause_list: ClauseList, original_text: str) -> str:
166
+ """Check if the document was truncated due to size limits."""
167
+ if clause_list.total_clauses >= MAX_CLAUSES:
168
+ word_count = len(original_text.split())
169
+ return (
170
+ f"Document exceeded maximum clause limit ({MAX_CLAUSES}). "
171
+ f"Only the first ~{MAX_CLAUSES} clauses were processed from a document "
172
+ f"of approximately {word_count} words. Some clauses may not appear in this report."
173
+ )
174
+ return ""
175
+
176
+
177
+ def _build_fallback_scored_clauses(clause_list: ClauseList) -> List[ScoredClause]:
178
+ """Build scored clauses with MEDIUM severity when the risk scorer fails.
179
+
180
+ This ensures users still see their clauses in the report even when the AI
181
+ risk analysis could not complete, rather than showing 'no issues' misleadingly.
182
+ """
183
+ from clauseguard.models.findings import RiskFinding, ScoredClause, Severity
184
+
185
+ fallback: List[ScoredClause] = []
186
+ for clause in clause_list.clauses:
187
+ finding = RiskFinding(
188
+ clause_id=clause.id,
189
+ severity=Severity.MEDIUM,
190
+ risk_title="Needs Human Review",
191
+ risk_reason=(
192
+ f"The automated risk analyzer could not evaluate this clause. "
193
+ f"Type: {clause.clause_type.value}. "
194
+ f"Please review manually or consult legal counsel."
195
+ ),
196
+ recommended_action="Review this clause manually — the AI risk scorer could not complete.",
197
+ )
198
+ fallback.append(ScoredClause(clause=clause, finding=finding))
199
+ return fallback
200
+
201
+
202
+ async def _step_extract(file_content: str, filename: str) -> ClauseList:
203
+ """Run the Extractor agent with error handling and timeout."""
204
+ try:
205
+ logger.info("Extracting clauses from document...")
206
+ _emit("Extractor", "running", message="Segmenting document into individual clauses")
207
+ result = await asyncio.wait_for(
208
+ run_extractor(file_content, filename),
209
+ timeout=TIMEOUT_SECONDS,
210
+ )
211
+ _emit("Extractor", "completed", message=f"Found {result.total_clauses} clauses", clause_count=result.total_clauses)
212
+ return result
213
+ except asyncio.TimeoutError:
214
+ _emit("Extractor", "failed", message="Timed out")
215
+ logger.error("Extractor agent timed out after %ds", TIMEOUT_SECONDS)
216
+ return ClauseList()
217
+ except Exception as e:
218
+ _emit("Extractor", "failed", message=str(e)[:80])
219
+ logger.error("Extractor agent failed: %s", e)
220
+ return ClauseList()
221
+
222
+
223
+ async def _step_classify(clause_list: ClauseList) -> ClauseList:
224
+ """Run the Classifier agent with error handling and timeout."""
225
+ try:
226
+ logger.info("Classifying %d clauses...", clause_list.total_clauses)
227
+ _emit("Classifier", "running", message=f"Labeling {clause_list.total_clauses} clauses by type")
228
+ result = await asyncio.wait_for(
229
+ run_classifier(clause_list),
230
+ timeout=TIMEOUT_SECONDS,
231
+ )
232
+ _emit("Classifier", "completed", message=f"Detected contract type: {result.contract_type}")
233
+ return result
234
+ except asyncio.TimeoutError:
235
+ _emit("Classifier", "failed", message="Timed out")
236
+ logger.error("Classifier agent timed out")
237
+ return clause_list
238
+ except Exception as e:
239
+ _emit("Classifier", "failed", message=str(e)[:80])
240
+ logger.error("Classifier agent failed: %s", e)
241
+ return clause_list
242
+
243
+
244
+ async def _step_risk_score(clause_list: ClauseList) -> List[ScoredClause]:
245
+ """Run the Risk Scorer agent with error handling and timeout."""
246
+ try:
247
+ logger.info("Scoring risks for %d clauses...", clause_list.total_clauses)
248
+ _emit("Risk Scorer", "running", message=f"Evaluating severity for {clause_list.total_clauses} clauses")
249
+ result = await asyncio.wait_for(
250
+ run_risk_scorer(clause_list),
251
+ timeout=TIMEOUT_SECONDS,
252
+ )
253
+ crit = sum(1 for s in result if s.finding.severity.value == "CRITICAL")
254
+ high = sum(1 for s in result if s.finding.severity.value == "HIGH")
255
+ _emit("Risk Scorer", "completed",
256
+ message=f"Found {crit} critical, {high} high-risk clauses",
257
+ severity_counts={"critical": crit, "high": high})
258
+ return result
259
+ except asyncio.TimeoutError:
260
+ _emit("Risk Scorer", "failed", message="Timed out")
261
+ logger.error("Risk Scorer agent timed out")
262
+ return []
263
+ except Exception as e:
264
+ _emit("Risk Scorer", "failed", message=str(e)[:80])
265
+ logger.error("Risk Scorer agent failed: %s", e)
266
+ return []
267
+
268
+
269
+ async def _step_translate(scored_clauses: List[ScoredClause]) -> List[ScoredClause]:
270
+ """Run the Translator agent with error handling and timeout."""
271
+ try:
272
+ logger.info("Translating %d clauses to plain English...", len(scored_clauses))
273
+ _emit("Translator", "running", message=f"Writing plain-English versions + negotiation tips for {len(scored_clauses)} clauses")
274
+ result = await asyncio.wait_for(
275
+ run_translator(scored_clauses),
276
+ timeout=TIMEOUT_SECONDS,
277
+ )
278
+ _emit("Translator", "completed", message="Plain English translations ready")
279
+ return result
280
+ except asyncio.TimeoutError:
281
+ _emit("Translator", "failed", message="Timed out")
282
+ logger.error("Translator agent timed out")
283
+ return scored_clauses
284
+ except Exception as e:
285
+ _emit("Translator", "failed", message=str(e)[:80])
286
+ logger.error("Translator agent failed: %s", e)
287
+ return scored_clauses
288
+
289
+
290
+ async def _step_report(
291
+ scored_clauses: List[ScoredClause],
292
+ filename: str,
293
+ contract_type: str,
294
+ partial: bool = False,
295
+ truncation_note: str = "",
296
+ ) -> FinalReport:
297
+ """Run the Reporter agent with error handling. No outer timeout — internal timeouts handle LLM calls."""
298
+ try:
299
+ logger.info("Building final report...")
300
+ _emit("Reporter", "running", message="Compiling final risk report")
301
+ result = await run_reporter(scored_clauses, filename, contract_type, partial, truncation_note)
302
+ _emit("Reporter", "completed", message=f"Report ready — score: {result.summary.overall_score}/10")
303
+ return result
304
+ except Exception as e:
305
+ _emit("Reporter", "failed", message=str(e)[:80])
306
+ logger.error("Reporter agent failed: %s", e)
307
+ return FinalReport(contract_name=filename, processed_normally=False)
agents/reporter.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Agent 5: Reporter — compiles the final risk report."""
2
+
3
+ import json
4
+ import logging
5
+ from datetime import datetime
6
+ from typing import List
7
+
8
+ from clauseguard.config.prompts import REPORTER_SYSTEM_PROMPT
9
+ from clauseguard.config.settings import MODEL_NAME, TEMPERATURE
10
+ from clauseguard.models.findings import ScoredClause, Severity
11
+ from clauseguard.models.report import FinalReport, RiskSummary
12
+ from clauseguard.services.model_service import call_model, clean_json_response
13
+
14
+ logger = logging.getLogger(__name__)
15
+ MAX_RETRIES = 1
16
+
17
+
18
+ async def run_reporter(
19
+ scored_clauses: List[ScoredClause],
20
+ contract_name: str,
21
+ contract_type: str,
22
+ partial: bool = False,
23
+ truncation_note: str = "",
24
+ ) -> FinalReport:
25
+ """Compile all analysis into a structured FinalReport.
26
+
27
+ Args:
28
+ scored_clauses: All scored clauses with risk findings.
29
+ contract_name: Name of the source contract file.
30
+ contract_type: Detected type of the contract.
31
+ partial: Whether this is a partial report due to agent failures.
32
+ truncation_note: Note about document truncation if contract exceeded clause limit.
33
+
34
+ Returns:
35
+ A complete FinalReport with summary, actions, and markdown.
36
+ """
37
+ sorted_clauses = _sort_by_severity(scored_clauses)
38
+ summary = _build_summary(sorted_clauses, contract_type)
39
+ top_3 = _extract_top_actions(sorted_clauses)
40
+
41
+ markdown = _build_markdown_programmatically(
42
+ sorted_clauses, contract_name, contract_type, summary, top_3
43
+ )
44
+
45
+ return FinalReport(
46
+ contract_name=contract_name,
47
+ generated_at=datetime.now(),
48
+ summary=summary,
49
+ top_3_actions=top_3,
50
+ scored_clauses=sorted_clauses,
51
+ markdown_report=markdown,
52
+ processed_normally=not partial,
53
+ truncation_note=truncation_note,
54
+ )
55
+
56
+
57
+ def _sort_by_severity(scored_clauses: List[ScoredClause]) -> List[ScoredClause]:
58
+ """Sort scored clauses by severity (CRITICAL first)."""
59
+ severity_order = {Severity.CRITICAL: 0, Severity.HIGH: 1, Severity.MEDIUM: 2, Severity.LOW: 3, Severity.INFO: 4}
60
+ return sorted(scored_clauses, key=lambda sc: severity_order.get(sc.finding.severity, 99))
61
+
62
+
63
+ def _build_summary(scored_clauses: List[ScoredClause], contract_type: str) -> RiskSummary:
64
+ """Build risk summary statistics from scored clauses."""
65
+ counts = {Severity.CRITICAL: 0, Severity.HIGH: 0, Severity.MEDIUM: 0, Severity.LOW: 0, Severity.INFO: 0}
66
+ for sc in scored_clauses:
67
+ counts[sc.finding.severity] = counts.get(sc.finding.severity, 0) + 1
68
+
69
+ total = len(scored_clauses)
70
+ if total > 0:
71
+ raw_score = (
72
+ counts[Severity.CRITICAL] * 10
73
+ + counts[Severity.HIGH] * 7
74
+ + counts[Severity.MEDIUM] * 4
75
+ + counts[Severity.LOW] * 1
76
+ ) / total
77
+ overall_score = round(min(raw_score, 10.0), 1)
78
+ else:
79
+ overall_score = 0.0
80
+
81
+ return RiskSummary(
82
+ total_clauses=total,
83
+ critical_count=counts[Severity.CRITICAL],
84
+ high_count=counts[Severity.HIGH],
85
+ medium_count=counts[Severity.MEDIUM],
86
+ low_count=counts[Severity.LOW],
87
+ overall_score=overall_score,
88
+ contract_type=contract_type,
89
+ )
90
+
91
+
92
+ def _extract_top_actions(scored_clauses: List[ScoredClause]) -> List[str]:
93
+ """Extract the top 3 most impactful recommended actions."""
94
+ actions: List[str] = []
95
+ severity_priority = [Severity.CRITICAL, Severity.HIGH, Severity.MEDIUM, Severity.LOW, Severity.INFO]
96
+
97
+ for sev in severity_priority:
98
+ for sc in scored_clauses:
99
+ if sc.finding.severity == sev and sc.finding.recommended_action:
100
+ if sc.finding.recommended_action not in actions:
101
+ actions.append(sc.finding.recommended_action)
102
+ if len(actions) >= 3:
103
+ return actions
104
+
105
+ if not actions:
106
+ actions.append("Review all clauses with legal counsel before signing.")
107
+
108
+ return actions[:3]
109
+
110
+
111
+ def _build_markdown_programmatically(
112
+ scored_clauses: List[ScoredClause],
113
+ contract_name: str,
114
+ contract_type: str,
115
+ summary: RiskSummary,
116
+ top_3: List[str],
117
+ ) -> str:
118
+ """Build the markdown report programmatically."""
119
+ generated_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
120
+
121
+ lines: List[str] = [
122
+ "# ClauseGuard Risk Report",
123
+ f"**Contract:** {contract_name}",
124
+ f"**Type:** {contract_type}",
125
+ f"**Overall Risk Score:** {summary.overall_score}/10",
126
+ f"**Generated:** {generated_str}",
127
+ "",
128
+ "---",
129
+ "",
130
+ "## Executive Summary",
131
+ _build_executive_summary_text(scored_clauses, summary),
132
+ "",
133
+ "## Top 3 Actions Before Signing",
134
+ ]
135
+
136
+ for i, action in enumerate(top_3, 1):
137
+ lines.append(f"{i}. {action}")
138
+
139
+ info_count = summary.total_clauses - summary.critical_count - summary.high_count - summary.medium_count - summary.low_count
140
+
141
+ lines.extend([
142
+ "",
143
+ "## Risk Summary",
144
+ "| Severity | Count |",
145
+ "|----------|-------|",
146
+ f"| 🔴 Critical | {summary.critical_count} |",
147
+ f"| 🟠 High | {summary.high_count} |",
148
+ f"| 🟡 Medium | {summary.medium_count} |",
149
+ f"| 🟢 Low | {summary.low_count} |",
150
+ f"| ℹ️ Info | {max(info_count, 0)} |",
151
+ "",
152
+ "---",
153
+ "",
154
+ "## Clause Analysis",
155
+ "",
156
+ ])
157
+
158
+ for sc in scored_clauses:
159
+ emoji = _severity_emoji(sc.finding.severity)
160
+ lines.append(
161
+ f"### {sc.clause.clause_type.value} — {emoji} {sc.finding.severity.value}"
162
+ )
163
+ lines.append(f"**Original:** {sc.clause.raw_text}")
164
+ if sc.clause.plain_english:
165
+ lines.append(f"**Plain English:** {sc.clause.plain_english}")
166
+ lines.append(f"**Risk:** {sc.finding.risk_reason}")
167
+ if sc.finding.recommended_action:
168
+ lines.append(f"**Action:** {sc.finding.recommended_action}")
169
+ lines.append("")
170
+
171
+ return "\n".join(lines)
172
+
173
+
174
+ def _build_executive_summary_text(
175
+ scored_clauses: List[ScoredClause], summary: RiskSummary
176
+ ) -> str:
177
+ """Build a brief executive summary of the main risks."""
178
+ high_severity = [sc for sc in scored_clauses if sc.finding.severity in (Severity.CRITICAL, Severity.HIGH)]
179
+
180
+ if not high_severity:
181
+ return (
182
+ "This contract appears to be reasonably balanced with no critical or high-risk clauses identified. "
183
+ "Review the medium-risk findings below for items that may warrant attention."
184
+ )
185
+
186
+ types_found = list({sc.clause.clause_type.value for sc in high_severity})
187
+ types_str = ", ".join(types_found)
188
+
189
+ return (
190
+ f"This contract contains {summary.critical_count} critical and {summary.high_count} high-severity risks "
191
+ f"that require immediate attention. The most concerning areas involve: {types_str}. "
192
+ f"We strongly recommend addressing the top 3 actions below before signing this agreement."
193
+ )
194
+
195
+
196
+ def _severity_emoji(severity: Severity) -> str:
197
+ """Return emoji for severity level."""
198
+ emoji_map = {
199
+ Severity.CRITICAL: "🔴",
200
+ Severity.HIGH: "🟠",
201
+ Severity.MEDIUM: "🟡",
202
+ Severity.LOW: "🟢",
203
+ Severity.INFO: "ℹ️",
204
+ }
205
+ return emoji_map.get(severity, "⚪")
agents/risk_scorer.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Agent 3: Risk Scorer — evaluates severity of each clause."""
2
+
3
+ import json
4
+ import logging
5
+ from typing import List
6
+
7
+ from clauseguard.config.prompts import RISK_SCORER_SYSTEM_PROMPT
8
+ from clauseguard.models.clause import Clause, ClauseType
9
+ from clauseguard.models.findings import RiskFinding, ScoredClause, Severity
10
+ from clauseguard.services.model_service import call_model, clean_json_response
11
+
12
+ logger = logging.getLogger(__name__)
13
+ MAX_RETRIES = 1
14
+
15
+
16
+ async def run_risk_scorer(clause_list) -> List[ScoredClause]:
17
+ """Evaluate each clause and assign severity with evidence-based risk reasons.
18
+
19
+ Args:
20
+ clause_list: A ClauseList with classified clauses.
21
+
22
+ Returns:
23
+ A list of ScoredClause objects with risk findings.
24
+ """
25
+ input_json = clause_list.model_dump_json(indent=2)
26
+
27
+ content = await call_model(
28
+ system_prompt=RISK_SCORER_SYSTEM_PROMPT,
29
+ user_prompt=f"Score the risk for each of these clauses:\n{input_json}",
30
+ agent_name="Risk Scorer",
31
+ max_retries=MAX_RETRIES,
32
+ )
33
+
34
+ if content is None:
35
+ logger.warning("Risk Scorer produced no valid output after retries")
36
+ return []
37
+
38
+ return _parse_response(content)
39
+
40
+
41
+ def _parse_response(content: str) -> List[ScoredClause]:
42
+ """Parse the risk scorer JSON response into ScoredClause objects."""
43
+ cleaned = clean_json_response(content)
44
+ data = json.loads(cleaned)
45
+
46
+ scored_clauses: List[ScoredClause] = []
47
+ items = data if isinstance(data, list) else data.get("scored_clauses", [data])
48
+
49
+ for item in items:
50
+ clause_data = item.get("clause", {})
51
+ finding_data = item.get("finding", {})
52
+
53
+ clause_type_raw = clause_data.get("clause_type", "OTHER")
54
+ try:
55
+ clause_type = ClauseType(clause_type_raw)
56
+ except ValueError:
57
+ clause_type = ClauseType.OTHER
58
+
59
+ severity_raw = finding_data.get("severity", "INFO")
60
+ try:
61
+ severity = Severity(severity_raw)
62
+ except ValueError:
63
+ severity = Severity.INFO
64
+
65
+ clause = Clause(
66
+ id=clause_data.get("id", 0),
67
+ raw_text=clause_data.get("raw_text", ""),
68
+ plain_english=clause_data.get("plain_english"),
69
+ clause_type=clause_type,
70
+ section_heading=clause_data.get("section_heading"),
71
+ position=clause_data.get("position", 0),
72
+ confidence_score=clause_data.get("confidence_score"),
73
+ )
74
+
75
+ finding = RiskFinding(
76
+ clause_id=finding_data.get("clause_id", clause.id),
77
+ severity=severity,
78
+ risk_title=finding_data.get("risk_title", "Risk Identified"),
79
+ risk_reason=finding_data.get("risk_reason", ""),
80
+ recommended_action=finding_data.get("recommended_action", ""),
81
+ negotiation_tip=finding_data.get("negotiation_tip", ""),
82
+ )
83
+
84
+ scored_clauses.append(ScoredClause(clause=clause, finding=finding))
85
+
86
+ return scored_clauses
agents/translator.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Agent 4: Translator — writes plain English explanations and negotiation support."""
2
+
3
+ import json
4
+ import logging
5
+ from typing import List
6
+
7
+ from clauseguard.config.prompts import TRANSLATOR_SYSTEM_PROMPT
8
+ from clauseguard.models.clause import Clause, ClauseType
9
+ from clauseguard.models.findings import RiskFinding, ScoredClause, Severity
10
+ from clauseguard.services.model_service import call_model, clean_json_response
11
+
12
+ logger = logging.getLogger(__name__)
13
+ MAX_RETRIES = 1
14
+
15
+
16
+ async def run_translator(scored_clauses: List[ScoredClause]) -> List[ScoredClause]:
17
+ """Translate legal clauses into plain English and write actionable recommendations.
18
+
19
+ Args:
20
+ scored_clauses: A list of ScoredClause objects from the Risk Scorer.
21
+
22
+ Returns:
23
+ Updated ScoredClause list with plain_english and recommended_action filled in.
24
+ """
25
+ scored_json = [
26
+ {
27
+ "clause": sc.clause.model_dump(),
28
+ "finding": sc.finding.model_dump(),
29
+ }
30
+ for sc in scored_clauses
31
+ ]
32
+ input_json = json.dumps(scored_json, indent=2)
33
+
34
+ content = await call_model(
35
+ system_prompt=TRANSLATOR_SYSTEM_PROMPT,
36
+ user_prompt=f"Translate these clauses into plain English:\n{input_json}",
37
+ agent_name="Translator",
38
+ max_retries=MAX_RETRIES,
39
+ )
40
+
41
+ if content is None:
42
+ logger.warning("Translator produced no valid output, returning original clauses")
43
+ return scored_clauses
44
+
45
+ return _parse_response(content, scored_clauses)
46
+
47
+
48
+ def _parse_response(content: str, original: List[ScoredClause]) -> List[ScoredClause]:
49
+ """Parse translator response and merge plain_english + actions into originals."""
50
+ cleaned = clean_json_response(content)
51
+ data = json.loads(cleaned)
52
+
53
+ items = data if isinstance(data, list) else [data]
54
+ result: List[ScoredClause] = []
55
+
56
+ for i, item in enumerate(items):
57
+ clause_data = item.get("clause", {})
58
+ finding_data = item.get("finding", {})
59
+
60
+ plain_english = clause_data.get("plain_english")
61
+ recommended_action = finding_data.get("recommended_action", "")
62
+ negotiation_tip = finding_data.get("negotiation_tip", "")
63
+ safer_clause_version = finding_data.get("safer_clause_version", "")
64
+ negotiation_message = finding_data.get("negotiation_message", "")
65
+ impact_scenarios = finding_data.get("impact_scenarios", [])
66
+
67
+ if i < len(original):
68
+ orig = original[i]
69
+ clause = orig.clause.model_copy(update={"plain_english": plain_english})
70
+ finding_updates = {"recommended_action": recommended_action}
71
+ if negotiation_tip:
72
+ finding_updates["negotiation_tip"] = negotiation_tip
73
+ if safer_clause_version:
74
+ finding_updates["safer_clause_version"] = safer_clause_version
75
+ if negotiation_message:
76
+ finding_updates["negotiation_message"] = negotiation_message
77
+ if impact_scenarios:
78
+ finding_updates["impact_scenarios"] = impact_scenarios
79
+ finding = orig.finding.model_copy(update=finding_updates)
80
+ result.append(ScoredClause(clause=clause, finding=finding))
81
+ else:
82
+ result.append(_build_scored_clause_from_data(clause_data, finding_data))
83
+
84
+ return result
85
+
86
+
87
+ def _build_scored_clause_from_data(clause_data: dict, finding_data: dict) -> ScoredClause:
88
+ """Build a ScoredClause from raw LLM response data."""
89
+ clause_type_raw = clause_data.get("clause_type", "OTHER")
90
+ try:
91
+ clause_type = ClauseType(clause_type_raw)
92
+ except ValueError:
93
+ clause_type = ClauseType.OTHER
94
+
95
+ severity_raw = finding_data.get("severity", "INFO")
96
+ try:
97
+ severity = Severity(severity_raw)
98
+ except ValueError:
99
+ severity = Severity.INFO
100
+
101
+ clause = Clause(
102
+ id=clause_data.get("id", 0),
103
+ raw_text=clause_data.get("raw_text", ""),
104
+ plain_english=clause_data.get("plain_english"),
105
+ clause_type=clause_type,
106
+ section_heading=clause_data.get("section_heading"),
107
+ position=clause_data.get("position", 0),
108
+ confidence_score=clause_data.get("confidence_score"),
109
+ )
110
+
111
+ finding = RiskFinding(
112
+ clause_id=finding_data.get("clause_id", clause.id),
113
+ severity=severity,
114
+ risk_title=finding_data.get("risk_title", "Risk Identified"),
115
+ risk_reason=finding_data.get("risk_reason", ""),
116
+ recommended_action=finding_data.get("recommended_action", ""),
117
+ negotiation_tip=finding_data.get("negotiation_tip", ""),
118
+ safer_clause_version=finding_data.get("safer_clause_version", ""),
119
+ negotiation_message=finding_data.get("negotiation_message", ""),
120
+ impact_scenarios=finding_data.get("impact_scenarios", []),
121
+ )
122
+
123
+ return ScoredClause(clause=clause, finding=finding)
app.py ADDED
@@ -0,0 +1,1518 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ClauseGuard Streamlit UI — redesigned modern SaaS edition."""
2
+ import asyncio
3
+ import logging
4
+ import sys
5
+ import time
6
+ from datetime import datetime
7
+ from pathlib import Path
8
+
9
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
10
+
11
+ import streamlit as st
12
+ import pandas as pd
13
+
14
+ from clauseguard.agents.copilot import build_contract_context, run_copilot_sync
15
+ from clauseguard.agents.orchestrator import run_pipeline, set_event_callback
16
+ from clauseguard.config.settings import validate_config
17
+ from clauseguard.models.findings import RiskFinding, ScoredClause, Severity
18
+ from clauseguard.models.report import FinalReport
19
+ from clauseguard.tools.file_tools import extract_text
20
+
21
+ logging.basicConfig(level=logging.INFO)
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # ── Constants ────────────────────────────────────────────────────────────────
25
+
26
+ MAX_FILE_SIZE_MB = 10
27
+ MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
28
+ ALLOWED_EXTENSIONS = ["pdf", "txt", "docx"]
29
+
30
+ TAB_NAMES = ["📊 Overview", "📋 Clauses", "💬 Negotiation", "🤖 Chat Assistant"]
31
+ TAB_SESSION_KEY = "tab_selector_radio"
32
+
33
+ AGENT_NAMES = ["Extractor", "Classifier", "Risk Scorer", "Translator", "Reporter"]
34
+ AGENT_ICONS = {"running": "⚙️", "completed": "✅", "failed": "❌", "pending": "⏳"}
35
+ AGENT_STEP_NUMBERS = {"Extractor": "①", "Classifier": "②", "Risk Scorer": "③",
36
+ "Translator": "④", "Reporter": "⑤"}
37
+
38
+ SEVERITY_STYLE = {
39
+ Severity.CRITICAL: {"badge": "🔴 CRITICAL", "border": "#ff4444", "bg": "rgba(255,68,68,0.12)", "color": "#ff6666", "tag_bg": "rgba(255,68,68,0.18)"},
40
+ Severity.HIGH: {"badge": "🟠 HIGH", "border": "#ff8c00", "bg": "rgba(255,140,0,0.12)", "color": "#ffaa44", "tag_bg": "rgba(255,140,0,0.15)"},
41
+ Severity.MEDIUM: {"badge": "🟡 MEDIUM", "border": "#ffd700", "bg": "rgba(255,215,0,0.12)", "color": "#ffdd55", "tag_bg": "rgba(255,215,0,0.12)"},
42
+ Severity.LOW: {"badge": "🟢 LOW", "border": "#32cd32", "bg": "rgba(50,205,50,0.12)", "color": "#55dd55", "tag_bg": "rgba(50,205,50,0.10)"},
43
+ Severity.INFO: {"badge": "ℹ️ INFO", "border": "#1e90ff", "bg": "rgba(30,144,255,0.08)", "color": "#55aaff", "tag_bg": "rgba(30,144,255,0.08)"},
44
+ }
45
+
46
+
47
+ def _check_model_connectivity() -> tuple[bool, str]:
48
+ """Quick connectivity check against the configured model endpoint.
49
+
50
+ Returns:
51
+ (ok, error_message) — ok is True if the endpoint is reachable.
52
+ """
53
+ import asyncio
54
+ from clauseguard.services.model_service import get_client
55
+ from clauseguard.config.settings import MODEL_NAME
56
+
57
+ try:
58
+ loop = asyncio.new_event_loop()
59
+ asyncio.set_event_loop(loop)
60
+ try:
61
+ client = get_client()
62
+ loop.run_until_complete(
63
+ asyncio.wait_for(
64
+ client.models.list(),
65
+ timeout=10,
66
+ )
67
+ )
68
+ return True, ""
69
+ except asyncio.TimeoutError:
70
+ return False, "Model endpoint timed out — the vLLM server may be offline or unreachable"
71
+ except Exception as e:
72
+ err = str(e)
73
+ if "ConnectionRefusedError" in err or "Connection refused" in err or "ConnectError" in err:
74
+ return False, f"Connection refused — vLLM server is not running at the configured BASE_URL"
75
+ if "Name or service not known" in err or "getaddrinfo" in err.lower():
76
+ return False, f"Cannot resolve host — check that the BASE_URL is correct"
77
+ return False, f"Model endpoint error: {err[:120]}"
78
+ finally:
79
+ loop.close()
80
+ except Exception as e:
81
+ return False, f"Connectivity check failed: {str(e)[:120]}"
82
+
83
+ # ═══════════════════════════════════════════════════════════════════════════════
84
+ # CUSTOM CSS
85
+ # ═══════════════════════════════════════════════════════════════════════════════
86
+
87
+ CUSTOM_CSS = """
88
+ <style>
89
+ .main .block-container { padding-top: 1.5rem; padding-bottom: 2rem; }
90
+
91
+ .stButton > button {
92
+ border-radius: 10px;
93
+ font-weight: 600;
94
+ font-size: 0.95rem;
95
+ padding: 0.65rem 1.5rem;
96
+ transition: all 0.2s ease;
97
+ border: 1px solid rgba(255,255,255,0.08);
98
+ }
99
+ .stButton > button:hover {
100
+ transform: translateY(-1px);
101
+ box-shadow: 0 6px 20px rgba(102,126,234,0.35);
102
+ }
103
+ .stButton > button[kind="primary"] {
104
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
105
+ color: #fff !important;
106
+ border: none !important;
107
+ }
108
+ .stButton > button[kind="secondary"] {
109
+ background: rgba(255,255,255,0.06) !important;
110
+ color: #e0e0e0 !important;
111
+ border: 1px solid rgba(255,255,255,0.12) !important;
112
+ }
113
+
114
+ .stFileUploader section {
115
+ border: 2px dashed #667eea !important;
116
+ border-radius: 14px !important;
117
+ padding: 1.5rem !important;
118
+ background: rgba(102,126,234,0.03) !important;
119
+ transition: all 0.25s ease;
120
+ }
121
+ .stFileUploader section:hover {
122
+ border-color: #8ab4f8 !important;
123
+ background: rgba(102,126,234,0.08) !important;
124
+ }
125
+
126
+ div[role="radiogroup"] {
127
+ display: flex; gap: 4px;
128
+ background: #0e1117; padding: 4px;
129
+ border-radius: 14px; border: 1px solid rgba(255,255,255,0.06);
130
+ margin-bottom: 1rem;
131
+ }
132
+ div[role="radiogroup"] label {
133
+ flex: 1; text-align: center;
134
+ padding: 10px 16px !important;
135
+ border-radius: 10px;
136
+ font-weight: 600; font-size: 0.92rem;
137
+ color: #aaa; cursor: pointer;
138
+ transition: all 0.2s ease;
139
+ }
140
+ div[role="radiogroup"] label:hover { background: rgba(255,255,255,0.04); }
141
+ div[role="radiogroup"] label:has(input:checked) {
142
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
143
+ color: #ffffff !important;
144
+ }
145
+ div[role="radiogroup"] input[type="radio"] {
146
+ position: absolute; opacity: 0; width: 0; height: 0;
147
+ }
148
+
149
+ @media (max-width: 768px) {
150
+ div[role="radiogroup"] label { padding: 8px 10px; font-size: 0.78rem; }
151
+ }
152
+ .stTabs [data-baseweb="tab"] {
153
+ font-weight: 600;
154
+ font-size: 0.95rem;
155
+ padding: 10px 20px;
156
+ border-radius: 10px;
157
+ color: #aaa;
158
+ }
159
+ .stTabs [aria-selected="true"] {
160
+ color: #ffffff !important;
161
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
162
+ border-radius: 10px !important;
163
+ }
164
+
165
+ .stExpander {
166
+ border: 1px solid rgba(255,255,255,0.06) !important;
167
+ border-radius: 10px !important;
168
+ margin-bottom: 0.3rem !important;
169
+ overflow: hidden;
170
+ transition: all 0.15s ease;
171
+ }
172
+ .stExpander:hover {
173
+ border-color: rgba(255,255,255,0.1) !important;
174
+ }
175
+ .stExpander > div:first-child {
176
+ border-radius: 10px !important;
177
+ background: rgba(255,255,255,0.015);
178
+ }
179
+
180
+ .stChatMessage { border-radius: 12px !important; }
181
+
182
+ .stProgress > div > div > div > div {
183
+ background: linear-gradient(90deg, #667eea, #764ba2) !important;
184
+ border-radius: 4px;
185
+ }
186
+
187
+ [data-testid="stMetric"] {
188
+ background: rgba(255,255,255,0.03);
189
+ border: 1px solid rgba(255,255,255,0.06);
190
+ border-radius: 12px;
191
+ padding: 0.75rem 1rem;
192
+ }
193
+ [data-testid="stMetric"] label { font-weight: 500 !important; }
194
+
195
+ .stCodeBlock { border-radius: 10px !important; }
196
+
197
+ .cg-card {
198
+ background: linear-gradient(145deg, #12121f 0%, #0e1117 100%);
199
+ border: 1px solid rgba(255,255,255,0.07);
200
+ border-radius: 14px;
201
+ padding: 1.25rem;
202
+ margin-bottom: 1rem;
203
+ transition: border-color 0.2s ease;
204
+ }
205
+ .cg-card:hover { border-color: rgba(255,255,255,0.12); }
206
+
207
+ .cg-badge {
208
+ display: inline-block;
209
+ padding: 0.25rem 0.7rem;
210
+ border-radius: 20px;
211
+ font-size: 0.75rem;
212
+ font-weight: 700;
213
+ letter-spacing: 0.4px;
214
+ text-transform: uppercase;
215
+ }
216
+
217
+ @keyframes pulse-glow {
218
+ 0%, 100% { opacity: 1; }
219
+ 50% { opacity: 0.6; }
220
+ }
221
+ .agent-running {
222
+ animation: pulse-glow 1.4s ease-in-out infinite;
223
+ }
224
+
225
+ .cg-chip {
226
+ display: inline-block;
227
+ padding: 0.35rem 0.9rem;
228
+ border-radius: 20px;
229
+ font-size: 0.78rem;
230
+ font-weight: 500;
231
+ background: rgba(102,126,234,0.12);
232
+ color: #8ab4f8;
233
+ border: 1px solid rgba(102,126,234,0.2);
234
+ cursor: pointer;
235
+ margin: 0.2rem;
236
+ transition: all 0.15s ease;
237
+ }
238
+ .cg-chip:hover {
239
+ background: rgba(102,126,234,0.25);
240
+ border-color: rgba(102,126,234,0.4);
241
+ }
242
+
243
+ @media (max-width: 768px) {
244
+ .stTabs [data-baseweb="tab"] { padding: 8px 12px; font-size: 0.8rem; }
245
+ }
246
+ </style>
247
+ """
248
+
249
+ # ═══════════════════════════════════════════════════════════════════════════════
250
+ # DEMO REPORT BUILDER
251
+ # ═══════════════════════════════════════════════════════════════════════════════
252
+
253
+ def _build_demo_report() -> FinalReport:
254
+ """Build a pre-cached demo report with negotiation copilot content."""
255
+ from clauseguard.models.clause import Clause, ClauseType
256
+
257
+ demo_with_copilot: list[dict] = [
258
+ {
259
+ "text": "Recipient hereby irrevocably assigns to Company all inventions, discoveries, and intellectual property created during this Agreement and for 1 year after, regardless of whether created on Recipient's own time or equipment.",
260
+ "ctype": "IP_ASSIGNMENT", "sev": "CRITICAL",
261
+ "title": "IP Assignment of Personal Work",
262
+ "reason": "Claims ownership of ALL creations including personal projects on personal time and equipment, extending 1 year after termination.",
263
+ "plain": "You give the company ownership of everything you create — including personal side projects on your own time and equipment — for up to a year after you leave.",
264
+ "action": "Demand a carve-out for inventions created on your own time using your own equipment.",
265
+ "safer": "Employee assigns to Company all inventions directly related to Company's business and created during working hours using Company resources. Inventions created on Employee's own time using personal equipment, and unrelated to Company's business, remain the sole property of Employee.",
266
+ "negotiation": "Hi, I've reviewed the IP clause and would like to request an adjustment to ensure personal projects created outside work hours remain mine. I've suggested alternative wording below. Would you be open to this change? Thanks!",
267
+ "impacts": ["You may lose ownership of any side projects or startups you work on during employment", "The company could claim your open-source contributions made on weekends"],
268
+ },
269
+ {
270
+ "text": "All disputes shall be resolved exclusively through binding arbitration. The parties waive any right to a trial by jury and waive the right to participate in any class action.",
271
+ "ctype": "ARBITRATION", "sev": "CRITICAL",
272
+ "title": "Mandatory Arbitration with Jury + Class Action Waiver",
273
+ "reason": "Forces disputes into private arbitration, waives your constitutional right to a jury trial, and blocks class actions — all with no opt-out.",
274
+ "plain": "You give up your right to sue in court or join a class-action lawsuit. All disputes go through private arbitration instead.",
275
+ "action": "Add an opt-out clause for arbitration — preserve your right to go to court.",
276
+ "safer": "Either party may opt out of binding arbitration by providing written notice within 30 days of signing. Nothing in this section prevents participation in class actions where permitted by law.",
277
+ "negotiation": "Hi, I've reviewed the dispute resolution clause. I'd like to add an opt-out option for arbitration so both parties retain the right to choose their preferred forum. I've suggested language below. Does this work for you?",
278
+ "impacts": ["If the company violates your rights, you cannot sue them in a public court", "You cannot join with other affected parties in a class action — you must fight alone"],
279
+ },
280
+ {
281
+ "text": "For 18 months following termination, Recipient shall not engage in any business competitive with Company, anywhere in the world.",
282
+ "ctype": "NON_COMPETE", "sev": "HIGH",
283
+ "title": "Worldwide Non-Compete — 18 Months",
284
+ "reason": "18-month ban on working for ANY competitor worldwide with no geographic limitation tied to Company's actual operations.",
285
+ "plain": "You cannot work for any competitor anywhere in the world for 18 months after this agreement ends — even if the company doesn't operate in that region.",
286
+ "action": "Reduce duration to 12 months and limit scope to regions where Company actually does business.",
287
+ "safer": "For 12 months following termination, Recipient shall not provide services to direct competitors of Company within the specific metro areas where Company has active business operations.",
288
+ "negotiation": "Hi, the non-compete clause is quite broad — it covers the entire world for 18 months. I'd suggest narrowing the scope to 12 months within regions where the company actually operates. I've drafted alternative language below.",
289
+ "impacts": ["You may be unable to work in your industry anywhere in the world for 18 months after leaving", "Relocating to a new city won't help — the restriction is global"],
290
+ },
291
+ {
292
+ "text": "This Agreement shall automatically renew for successive 1-year terms unless either party provides written notice at least 90 days prior.",
293
+ "ctype": "AUTO_RENEWAL", "sev": "MEDIUM",
294
+ "title": "Auto-Renewal with 90-Day Notice",
295
+ "reason": "Auto-renews annually. 90-day notice period is longer than standard and easy to miss.",
296
+ "plain": "This agreement renews automatically every year. You must give 90 days written notice to cancel — miss the deadline and you're locked in.",
297
+ "action": "Reduce notice period to 30 days or request automatic email reminders.",
298
+ "safer": "", "negotiation": "", "impacts": [],
299
+ },
300
+ {
301
+ "text": "Recipient agrees to hold all Confidential Information in strict confidence.",
302
+ "ctype": "NDA", "sev": "LOW",
303
+ "title": "Standard Confidentiality Obligation",
304
+ "reason": "Standard NDA language requiring reasonable care — no unusual provisions.",
305
+ "plain": "You must keep the company's confidential information secret and only use it as authorized.",
306
+ "action": "No action needed — standard boilerplate.",
307
+ "safer": "", "negotiation": "", "impacts": [],
308
+ },
309
+ {
310
+ "text": "This Agreement shall be governed by the laws of the State of New York.",
311
+ "ctype": "GOVERNING_LAW", "sev": "LOW",
312
+ "title": "Standard Governing Law",
313
+ "reason": "Standard choice-of-law clause selecting New York — common in contracts.",
314
+ "plain": "This agreement is governed by New York law, and disputes must be handled in New York courts.",
315
+ "action": "No action needed unless you are far from New York.",
316
+ "safer": "", "negotiation": "", "impacts": [],
317
+ },
318
+ {
319
+ "text": "This Agreement constitutes the entire agreement between the parties.",
320
+ "ctype": "OTHER", "sev": "INFO",
321
+ "title": "Standard Entire Agreement Clause",
322
+ "reason": "Standard integration clause confirming this is the complete agreement.",
323
+ "plain": "This document is the complete and final agreement between you and the company.",
324
+ "action": "No action needed — standard boilerplate.",
325
+ "safer": "", "negotiation": "", "impacts": [],
326
+ },
327
+ ]
328
+
329
+ scored = []
330
+ for i, d in enumerate(demo_with_copilot, 1):
331
+ clause = Clause(id=i, raw_text=d["text"], plain_english=d["plain"],
332
+ clause_type=ClauseType(d["ctype"]),
333
+ section_heading=d["ctype"].replace("_", " "), position=i, confidence_score=0.95)
334
+ finding = RiskFinding(clause_id=i, severity=Severity(d["sev"]), risk_title=d["title"],
335
+ risk_reason=d["reason"], recommended_action=d["action"],
336
+ safer_clause_version=d["safer"], negotiation_message=d["negotiation"],
337
+ impact_scenarios=d["impacts"])
338
+ scored.append(ScoredClause(clause=clause, finding=finding))
339
+
340
+ crit = sum(1 for s in scored if s.finding.severity == Severity.CRITICAL)
341
+ high = sum(1 for s in scored if s.finding.severity == Severity.HIGH)
342
+ med = sum(1 for s in scored if s.finding.severity == Severity.MEDIUM)
343
+ low = sum(1 for s in scored if s.finding.severity == Severity.LOW)
344
+ raw = (crit * 10 + high * 7 + med * 4 + low * 1) / len(scored)
345
+ overall = round(min(raw, 10.0), 1)
346
+
347
+ return FinalReport(
348
+ contract_name="sample_nda.txt (Demo)",
349
+ generated_at=datetime.now(),
350
+ summary={"total_clauses": len(scored), "critical_count": crit, "high_count": high,
351
+ "medium_count": med, "low_count": low, "overall_score": overall, "contract_type": "NDA"},
352
+ top_3_actions=[
353
+ "Demand a carve-out for inventions created on your own time using your own equipment.",
354
+ "Add an opt-out clause for arbitration — preserve your right to go to court.",
355
+ "Reduce non-compete to 12 months with geographic scope tied to actual operations.",
356
+ ],
357
+ scored_clauses=scored,
358
+ markdown_report="""# ClauseGuard Risk Analysis Report
359
+ **Contract:** sample_nda.txt
360
+ **Generated:** {date}
361
+ **Risk Score:** {score}/10
362
+ **Contract Type:** NDA
363
+
364
+ ## Summary
365
+ - Total Clauses: {total}
366
+ - Critical: {crit}
367
+ - High: {high}
368
+ - Medium: {med}
369
+ - Low: {low}
370
+
371
+ ## Top Actions
372
+ 1. Demand a carve-out for inventions created on your own time using your own equipment.
373
+ 2. Add an opt-out clause for arbitration — preserve your right to go to court.
374
+ 3. Reduce non-compete to 12 months with geographic scope tied to actual operations.
375
+
376
+ ---
377
+ *Generated by ClauseGuard AI*
378
+ """.format(date=datetime.now().strftime('%B %d, %Y'), score=overall, total=len(scored), crit=crit, high=high, med=med, low=low),
379
+ processed_normally=False,
380
+ )
381
+
382
+
383
+ def _load_demo_report() -> None:
384
+ st.session_state.report = _build_demo_report()
385
+ st.session_state.error = None
386
+ st.session_state.uploaded_filename = "sample_nda.txt"
387
+ demo_raw = ""
388
+ for sc in st.session_state.report.scored_clauses:
389
+ heading = sc.clause.section_heading or ""
390
+ text = sc.clause.raw_text
391
+ demo_raw += f"{heading}\n{text}\n\n" if heading else f"{text}\n\n"
392
+ st.session_state.copilot_raw_text = demo_raw.strip()
393
+ st.session_state.active_tab = 0
394
+ st.session_state.copilot_messages = []
395
+ st.session_state.clause_ai_responses = {}
396
+ st.session_state.generated_emails = {}
397
+ st.session_state.copilot_cache_key = None
398
+ st.rerun()
399
+
400
+
401
+ def _load_guided_demo() -> None:
402
+ st.session_state.guided_demo = True
403
+ st.session_state.demo_step = 0
404
+ _load_demo_report()
405
+
406
+
407
+ # ═══════════════════════════════════════════════════════════════════════════════
408
+ # SESSION STATE
409
+ # ═══════════════════════════════════════════════════════════════════════════════
410
+
411
+ def _init_session_state() -> None:
412
+ defaults = {
413
+ "report": None,
414
+ "error": None,
415
+ "analyzing": False,
416
+ "uploaded_filename": None,
417
+ "uploaded_bytes": None,
418
+ "agent_statuses": {a: "pending" for a in AGENT_NAMES},
419
+ "agent_messages": {a: "" for a in AGENT_NAMES},
420
+ "guided_demo": False,
421
+ "demo_step": 0,
422
+ "copilot_messages": [],
423
+ "copilot_context": "",
424
+ "copilot_raw_text": "",
425
+ "copilot_cache_key": None,
426
+ "clause_ai_responses": {},
427
+ "pending_ai_query": None,
428
+ "generated_emails": {},
429
+ "active_tab": 0,
430
+ "highlight_clause_id": None,
431
+ }
432
+ for key, default in defaults.items():
433
+ if key not in st.session_state:
434
+ st.session_state[key] = default
435
+
436
+
437
+ # ═══════════════════════════════════════════════════════════════════════════════
438
+ # LIVE AGENT EVENT HANDLER
439
+ # ═══════════════════════════════════════════════════════════════════════════════
440
+
441
+ def _on_agent_event(agent: str, status: str, details: dict) -> None:
442
+ st.session_state.agent_statuses[agent] = status
443
+ st.session_state.agent_messages[agent] = details.get("message", "")
444
+
445
+
446
+ # ═══════════════════════════════════════════════════════════════════════════════
447
+ # ANALYSIS RUNNER
448
+ # ═══════════════════════════════════════════════════════════════════════════════
449
+
450
+ def _run_analysis() -> None:
451
+ file_bytes = st.session_state.uploaded_bytes
452
+ filename = st.session_state.uploaded_filename
453
+ try:
454
+ validate_config()
455
+ except ValueError as e:
456
+ st.session_state.error = str(e)
457
+ st.session_state.analyzing = False
458
+ return
459
+
460
+ for a in AGENT_NAMES:
461
+ st.session_state.agent_statuses[a] = "pending"
462
+ st.session_state.agent_messages[a] = ""
463
+
464
+ set_event_callback(_on_agent_event)
465
+
466
+ progress_bar = st.progress(0)
467
+ status_text = st.empty()
468
+ agent_panel = st.empty()
469
+
470
+ try:
471
+ status_text.markdown("<h3 style='color:#fff'>🔍 Reading file...</h3>", unsafe_allow_html=True)
472
+ raw_text = extract_text(file_bytes, filename)
473
+ st.session_state.copilot_raw_text = raw_text
474
+
475
+ status_text.markdown("<h3 style='color:#8ab4f8'>🔗 Testing model connection...</h3>", unsafe_allow_html=True)
476
+ ok, conn_err = _check_model_connectivity()
477
+ if not ok:
478
+ st.session_state.error = f"Cannot connect to model API: {conn_err}"
479
+ st.session_state.analyzing = False
480
+ progress_bar.empty()
481
+ status_text.empty()
482
+ agent_panel.empty()
483
+ st.rerun()
484
+ return
485
+
486
+ status_text.markdown("<h3 style='color:#8ab4f8'>🤖 Running AI analysis pipeline...</h3>", unsafe_allow_html=True)
487
+
488
+ def _render_agent_panel():
489
+ rows = ""
490
+ for a in AGENT_NAMES:
491
+ step = AGENT_STEP_NUMBERS.get(a, "")
492
+ s = st.session_state.agent_statuses[a]
493
+ icon = AGENT_ICONS.get(s, "⏳")
494
+ msg = st.session_state.agent_messages.get(a, "")
495
+ if s == "completed":
496
+ color = "#55dd55"
497
+ anim = ""
498
+ elif s == "failed":
499
+ color = "#ff4444"
500
+ anim = ""
501
+ elif s == "running":
502
+ color = "#ffaa44"
503
+ anim = " class='agent-running'"
504
+ else:
505
+ color = "#666"
506
+ anim = ""
507
+ rows += f"<tr{anim}><td style='padding:8px 12px;text-align:center;font-size:1.1rem'>{step}</td><td style='padding:8px 12px'>{icon}</td><td style='padding:8px 12px;color:{color};font-weight:600'>{a}</td><td style='padding:8px 12px;color:#aaa;font-size:0.85rem'>{msg}</td></tr>"
508
+ return f"<div style='background:#1a1a2e;border-radius:14px;padding:1.25rem;border:1px solid rgba(255,255,255,0.08)'><table style='width:100%;border-collapse:collapse'><thead><tr><th style='padding:6px 12px;color:#888;font-size:0.7rem;text-transform:uppercase;letter-spacing:1px'>Step</th><th style='padding:6px 12px'></th><th style='padding:6px 12px;color:#888;font-size:0.7rem;text-transform:uppercase;letter-spacing:1px;text-align:left'>Agent</th><th style='padding:6px 12px;color:#888;font-size:0.7rem;text-transform:uppercase;letter-spacing:1px;text-align:left'>Status</th></tr></thead><tbody>{rows}</tbody></table></div>"
509
+
510
+ agent_panel.markdown(_render_agent_panel(), unsafe_allow_html=True)
511
+
512
+ loop = asyncio.new_event_loop()
513
+ asyncio.set_event_loop(loop)
514
+ try:
515
+ report = loop.run_until_complete(run_pipeline(raw_text, filename))
516
+ finally:
517
+ loop.close()
518
+
519
+ for a in AGENT_NAMES:
520
+ if st.session_state.agent_statuses[a] == "pending":
521
+ st.session_state.agent_statuses[a] = "completed"
522
+ st.session_state.agent_messages[a] = "OK"
523
+ agent_panel.markdown(_render_agent_panel(), unsafe_allow_html=True)
524
+
525
+ progress_bar.progress(1.0)
526
+
527
+ if report.summary.total_clauses == 0:
528
+ logger.error("Pipeline produced 0 clauses — model API may be unreachable or returned errors")
529
+ failed_agents = [
530
+ a for a in AGENT_NAMES
531
+ if st.session_state.agent_statuses.get(a) == "failed"
532
+ ]
533
+ if failed_agents:
534
+ st.session_state.error = (
535
+ f"Analysis failed — the {failed_agents[0]} agent could not complete. "
536
+ "The model API may be unreachable or returned malformed responses. "
537
+ "Check that the vLLM endpoint is running at the configured BASE_URL."
538
+ )
539
+ else:
540
+ st.session_state.error = (
541
+ "Analysis could not extract any clauses from the document. "
542
+ "The model may be unavailable or the document format may be unsupported. "
543
+ "Check your model endpoint configuration."
544
+ )
545
+ status_text.markdown("<h3 style='color:#ff4444'>❌ Analysis failed</h3>", unsafe_allow_html=True)
546
+ st.session_state.report = None
547
+ st.session_state.analyzing = False
548
+ progress_bar.empty()
549
+ status_text.empty()
550
+ agent_panel.empty()
551
+ st.rerun()
552
+ return
553
+
554
+ status_text.markdown("<h3 style='color:#55dd55'>✅ Analysis complete!</h3>", unsafe_allow_html=True)
555
+ st.session_state.report = report
556
+ st.session_state.error = None
557
+ st.session_state.copilot_messages = []
558
+ st.session_state.clause_ai_responses = {}
559
+ st.session_state.generated_emails = {}
560
+
561
+ if not report.processed_normally or report.summary.critical_count == 0 and report.summary.high_count == 0 and report.summary.medium_count == 0:
562
+ st.session_state.error = (
563
+ "Analysis completed but no significant risks were detected. "
564
+ "The model responses may have been incomplete — review the "
565
+ f"report ({report.summary.total_clauses} clauses analyzed) carefully."
566
+ )
567
+
568
+ except ValueError as e:
569
+ st.session_state.error = f"Could not process: {e}"
570
+ except Exception as e:
571
+ st.session_state.error = "An unexpected error occurred. Try again."
572
+ logger.error("Analysis error: %s", e)
573
+ finally:
574
+ st.session_state.analyzing = False
575
+ progress_bar.empty()
576
+ status_text.empty()
577
+ agent_panel.empty()
578
+ st.rerun()
579
+
580
+
581
+ # ═══════════════════════════════════════════════════════════════════════════════
582
+ # FALLBACK GENERATORS FOR NEGOTIATION COPILOT
583
+ # ═══════════════════════════════════════════════════════════════════════════════
584
+
585
+ def _generate_fallback_safer(sc: ScoredClause) -> str:
586
+ ctype = sc.clause.clause_type.value
587
+ fallbacks = {
588
+ "IP_ASSIGNMENT": "Employee assigns only inventions directly related to Company's business, created during working hours using Company resources. Personal projects remain Employee's property.",
589
+ "ARBITRATION": "Either party may opt out of arbitration within 30 days. Both parties retain the right to bring claims in court.",
590
+ "NON_COMPETE": "Non-compete limited to 12 months within specific metro areas where Company operates.",
591
+ "AUTO_RENEWAL": "Agreement renews only with mutual written consent. No automatic renewal.",
592
+ "TERMINATION": "Either party may terminate with 30 days written notice.",
593
+ "INDEMNIFICATION": "Indemnification limited to direct damages caused by negligence or willful misconduct.",
594
+ "LIABILITY_CAP": "Liability capped at the greater of fees paid or $10,000.",
595
+ "DATA_SHARING": "Data shared only with explicit opt-in consent, revocable at any time.",
596
+ "GOVERNING_LAW": "Governing law set to user's home state with optional mediation.",
597
+ "PAYMENT": "Payment due net-30 after invoice receipt. Late fees capped at 5% annually.",
598
+ "CONFIDENTIALITY": "Confidential information excludes publicly available data and independently developed knowledge.",
599
+ "NON_SOLICITATION": "Non-solicitation limited to 12 months and applies only to employees directly worked with.",
600
+ "FORCE_MAJEURE": "Neither party liable for delays due to circumstances beyond reasonable control, with prompt notice.",
601
+ "SEVERABILITY": "If any provision is found unenforceable, remaining provisions stay in full effect.",
602
+ "ASSIGNMENT": "Neither party may assign without written consent, not to be unreasonably withheld.",
603
+ "WAIVER": "Failure to enforce any provision does not constitute waiver. Waivers must be in writing.",
604
+ "SURVIVAL": "Confidentiality, indemnification, and payment obligations survive termination.",
605
+ "NOTICE": "Notices effective upon email delivery with read receipt or 3 days after certified mail.",
606
+ }
607
+ return fallbacks.get(ctype, "Request a mutual agreement: both parties share rights and obligations equally. Remove one-sided provisions.")
608
+
609
+
610
+ def _generate_fallback_message(sc: ScoredClause) -> str:
611
+ topic = sc.clause.section_heading or sc.clause.clause_type.value.replace("_", " ").title()
612
+ safer = sc.finding.safer_clause_version or _generate_fallback_safer(sc)
613
+ return (
614
+ f"Hi,\n\nI've reviewed the contract and would like to discuss the {topic} clause. "
615
+ f"I'd suggest the following adjustment:\n\n'{safer}'\n\n"
616
+ f"This ensures both parties are treated fairly. Would you be open to this change?\n\nThanks!"
617
+ )
618
+
619
+
620
+ def _build_safer_contract(report: FinalReport) -> str:
621
+ lines: list[str] = []
622
+ lines.append(f"# SAFER VERSION — {report.contract_name}")
623
+ lines.append(f"# Auto-generated by ClauseGuard — replaces {report.summary.critical_count + report.summary.high_count} high-risk clauses")
624
+ lines.append(f"# Original risk score: {report.summary.overall_score}/10")
625
+ lines.append(f"# Generated: {datetime.now().strftime('%B %d, %Y at %H:%M')}")
626
+ lines.append("")
627
+
628
+ replaced_count = 0
629
+ for i, sc in enumerate(report.scored_clauses, 1):
630
+ safer = sc.finding.safer_clause_version
631
+ sev = sc.finding.severity
632
+
633
+ if safer and sev in (Severity.CRITICAL, Severity.HIGH):
634
+ replaced_count += 1
635
+ lines.append(f"# {'─' * 70}")
636
+ lines.append(f"# CLAUSE {i}: REPLACED — {sev.value} Risk — {sc.finding.risk_title}")
637
+ lines.append(f"# {'─' * 70}")
638
+ lines.append(f"# ORIGINAL (RISKY):")
639
+ for orig_line in sc.clause.raw_text.split("\n"):
640
+ lines.append(f"# {orig_line.strip()}")
641
+ lines.append(f"#")
642
+ lines.append(f"# SAFER VERSION:")
643
+ lines.append(f"{i}. {sc.clause.section_heading or 'CLAUSE ' + str(i)}")
644
+ lines.append(f" {safer}")
645
+ lines.append("")
646
+ else:
647
+ heading = sc.clause.section_heading or f"CLAUSE {i}"
648
+ lines.append(f"{i}. {heading}")
649
+ lines.append(f" {sc.clause.raw_text.strip()}")
650
+ lines.append("")
651
+
652
+ lines.append(f"# {'=' * 70}")
653
+ lines.append(f"# END OF SAFER CONTRACT")
654
+ lines.append(f"# {replaced_count} clauses replaced | {report.summary.total_clauses - replaced_count} left unchanged")
655
+ return "\n".join(lines)
656
+
657
+
658
+ # ═══════════════════════════════════════════════════════════════════════════════
659
+ # UI HELPER FUNCTIONS
660
+ # ═══════════════════════════════════════════════════════════════════════════════
661
+
662
+ def seats(n: int) -> str:
663
+ if n <= 0:
664
+ return "No parties"
665
+ if n == 1:
666
+ return "1 party"
667
+ return f"{n} parties"
668
+
669
+
670
+ def _render_info_card(title: str, body: str, icon: str = "ℹ️", bg: str = "rgba(30,144,255,0.08)", border: str = "#1e90ff") -> str:
671
+ return f"""<div style="background:{bg};border-left:4px solid {border};border-radius:4px 12px 12px 4px;padding:0.75rem 1rem;margin:0.4rem 0">
672
+ <span style="font-size:0.85rem;font-weight:600;color:#ccc">{icon} {title}</span>
673
+ <div style="font-size:0.82rem;color:#aaa;margin-top:0.25rem;line-height:1.5">{body}</div>
674
+ </div>"""
675
+
676
+
677
+ def _render_info_card_raw(html: str) -> None:
678
+ st.markdown(html, unsafe_allow_html=True)
679
+
680
+
681
+ def _switch_to_chat_with_prompt(prompt_text: str) -> None:
682
+ st.session_state.active_tab = 3
683
+ st.session_state.pending_ai_query = prompt_text
684
+ st.rerun()
685
+
686
+
687
+ def _render_single_clause_card(sc: ScoredClause, style: dict, show_actions: bool = True) -> None:
688
+ s = style
689
+ c = sc.clause
690
+ f = sc.finding
691
+
692
+ st.markdown(f"""
693
+ <div style="
694
+ background: linear-gradient(135deg, {s['bg']} 0%, rgba(20,22,30,0.6) 100%);
695
+ border: 1px solid {s['border']}22;
696
+ border-left: 4px solid {s['border']};
697
+ border-radius: 0 12px 12px 0;
698
+ padding: 1.25rem 1.25rem 0.75rem 1.25rem;
699
+ margin-bottom: 0.5rem;
700
+ ">
701
+ <div style="display:flex;align-items:center;gap:0.6rem;margin-bottom:0.75rem">
702
+ <span style="
703
+ display:inline-flex;align-items:center;gap:4px;
704
+ background:{s['tag_bg']};
705
+ color:{s['color']};
706
+ padding:0.25rem 0.75rem;
707
+ border-radius:20px;
708
+ font-size:0.73rem;
709
+ font-weight:700;
710
+ letter-spacing:0.4px;
711
+ text-transform:uppercase;
712
+ white-space:nowrap;
713
+ ">{s['badge']}</span>
714
+ <span style="font-weight:600;color:#e8e8e8;font-size:1rem;line-height:1.3">{f.risk_title}</span>
715
+ </div>
716
+ <div style="display:flex;gap:1rem;margin-bottom:0.6rem">
717
+ <span style="color:#888;font-size:0.75rem">📂 {c.section_heading or ''}</span>
718
+ <span style="color:#888;font-size:0.75rem">🏷️ {c.clause_type.value}</span>
719
+ <span style="color:#666;font-size:0.75rem">Clause #{c.id}</span>
720
+ </div>
721
+ </div>""", unsafe_allow_html=True)
722
+
723
+ with st.expander("📜 View Original Text"):
724
+ st.markdown(f"<div style='background:#1c1d2a;padding:0.85rem;border-radius:8px;font-family:Consolas,monospace;font-size:0.88rem;line-height:1.65;color:#d0d0d0;white-space:pre-wrap'>{c.raw_text}</div>", unsafe_allow_html=True)
725
+
726
+ if c.plain_english:
727
+ st.markdown(f"""<div style="display:flex;gap:0.5rem;align-items:flex-start;margin:0.5rem 0;padding:0.6rem 0.85rem;background:rgba(30,144,255,0.07);border-radius:8px;border:1px solid rgba(30,144,255,0.12)">
728
+ <span style="font-size:0.95rem;flex-shrink:0">💬</span>
729
+ <span style="color:#c0cfe0;font-size:0.9rem;line-height:1.5">{c.plain_english}</span>
730
+ </div>""", unsafe_allow_html=True)
731
+
732
+ st.markdown(f"""<div style="display:flex;gap:0.5rem;align-items:flex-start;margin:0.5rem 0;padding:0.6rem 0.85rem;background:{s['bg']};border-radius:8px;border:1px solid {s['border']}18">
733
+ <span style="font-size:0.95rem;flex-shrink:0">⚠️</span>
734
+ <div>
735
+ <div style="color:{s['color']};font-size:0.8rem;font-weight:600;text-transform:uppercase;letter-spacing:0.5px;margin-bottom:0.2rem">Risk</div>
736
+ <div style="color:#d0d0d0;font-size:0.9rem;line-height:1.55">{f.risk_reason}</div>
737
+ </div>
738
+ </div>""", unsafe_allow_html=True)
739
+
740
+ if f.recommended_action:
741
+ st.markdown(f"""<div style="display:flex;gap:0.5rem;align-items:flex-start;margin:0.5rem 0;padding:0.6rem 0.85rem;background:rgba(50,205,50,0.06);border-radius:8px;border:1px solid rgba(50,205,50,0.12)">
742
+ <span style="font-size:0.95rem;flex-shrink:0">✅</span>
743
+ <span style="color:#b0d0b0;font-size:0.9rem;line-height:1.5">{f.recommended_action}</span>
744
+ </div>""", unsafe_allow_html=True)
745
+
746
+ if f.impact_scenarios:
747
+ with st.expander("⚠️ What Could Happen If You Sign This"):
748
+ for impact in f.impact_scenarios:
749
+ st.markdown(f"<div style='background:rgba(255,68,68,0.06);padding:0.4rem 0.75rem;margin:0.15rem 0;border-radius:6px;border-left:3px solid {s['border']};font-size:0.85rem;color:#e0a0a0'>• {impact}</div>", unsafe_allow_html=True)
750
+
751
+ if show_actions and f.severity not in (Severity.LOW, Severity.INFO):
752
+ if st.button("✏️ Ask AI to Explain", key=f"explain_{c.id}", use_container_width=True):
753
+ _switch_to_chat_with_prompt(f"Explain clause {c.id} ({f.risk_title}) in simple terms. What does this mean for me?")
754
+
755
+
756
+ # ═══════════════════════════════════════════════════════════════════════════════
757
+ # HEADER
758
+ # ═══════════════════════════════════════════════════════════════════════════════
759
+
760
+ def render_header() -> None:
761
+ hero_l, hero_r = st.columns([3, 1])
762
+ with hero_l:
763
+ st.markdown("""<div style="background:linear-gradient(135deg,#1e3a5f 0%,#2a5298 100%);padding:1.5rem 2rem;border-radius:16px;margin-bottom:0.5rem">
764
+ <h1 style="margin:0;color:#fff;font-size:2.2rem">🛡️ ClauseGuard</h1>
765
+ <p style="margin:0.25rem 0 0 0;color:#c8d8f0;font-size:1.1rem">AI-Powered Contract Clause Risk Analyzer</p>
766
+ </div>""", unsafe_allow_html=True)
767
+ with hero_r:
768
+ st.markdown("<br>", unsafe_allow_html=True)
769
+ dc1, dc2 = st.columns(2)
770
+ with dc1:
771
+ if st.button("⚡ Instant Demo", use_container_width=True, help="See a pre-analyzed NDA report instantly"):
772
+ _load_demo_report()
773
+ with dc2:
774
+ if st.button("🎬 Guided Tour", use_container_width=True, help="Walk through a demo with highlights"):
775
+ _load_guided_demo()
776
+
777
+
778
+ # ═══════════════════════════════════════════════════════════════════════════════
779
+ # GUIDED DEMO TOUR
780
+ # ═══════════════════════════════════════════════════════════════════════════════
781
+
782
+ def _render_guided_tour() -> None:
783
+ if not st.session_state.get("guided_demo"):
784
+ return
785
+ step = st.session_state.get("demo_step", 0)
786
+ tour_steps = [
787
+ {
788
+ "title": "🎯 Welcome to ClauseGuard!",
789
+ "body": "Let's walk through a sample NDA contract analysis. You'll see how 5 AI agents work together to identify risks, explain legal jargon, and help you negotiate better terms. Each agent has a specific role in the pipeline.",
790
+ "tab": 0,
791
+ "icon": "🎯",
792
+ },
793
+ {
794
+ "title": "📊 Step 1: Risk Overview Dashboard",
795
+ "body": "The **Overview tab** shows your contract's risk score, severity breakdown, and the top 3 actions you should take before signing. Check the bar chart to see how many clauses fall into each risk category. The risk score is calculated from 0 (safe) to 10 (extremely risky).",
796
+ "tab": 0,
797
+ "icon": "📊",
798
+ },
799
+ {
800
+ "title": "📋 Step 2: Clause-by-Clause Deep Dive",
801
+ "body": "Switch to the **Clauses tab** to drill into each clause. Critical and High-risk clauses are expanded by default so you see the most dangerous issues first. Each clause card shows: original legal text, plain English translation, the specific risk reason, and a recommended action.",
802
+ "tab": 1,
803
+ "icon": "📋",
804
+ },
805
+ {
806
+ "title": "💬 Step 3: Negotiation Copilot",
807
+ "body": "In the **Negotiation tab**, you'll find side-by-side comparisons: what you signed vs. what you should ask for instead. Each risky clause comes with a pre-written negotiation message and a safer alternative. You can also download a fully rewritten 'Safer Contract' with all high-risk clauses replaced.",
808
+ "tab": 2,
809
+ "icon": "💬",
810
+ },
811
+ {
812
+ "title": "🤖 Step 4: AI Chat Assistant",
813
+ "body": "The **Chat Assistant tab** lets you ask follow-up questions in plain English. The AI has full context of your entire contract and all clause analyses. Try questions like 'Summarize this contract' or 'What's the most dangerous clause and why?' Use the quick-action chips for common questions.",
814
+ "tab": 3,
815
+ "icon": "🤖",
816
+ },
817
+ {
818
+ "title": "✅ You're Ready!",
819
+ "body": "Now you know your way around ClauseGuard. Use the **Instant Demo** button anytime to revisit this tour, or upload your own contract to run a real analysis with the full 5-agent AI pipeline. Remember: always consult a qualified attorney for final legal review.",
820
+ "tab": 0,
821
+ "icon": "✅",
822
+ },
823
+ ]
824
+
825
+ if step < len(tour_steps):
826
+ ts = tour_steps[step]
827
+ progress_pct = (step + 1) / len(tour_steps)
828
+ with st.container():
829
+ st.markdown(f"""<div style="background:linear-gradient(135deg,#1e3a5f 0%,#2a5298 100%);padding:1.25rem 1.5rem;border-radius:14px;margin:0.5rem 0;border:1px solid rgba(255,255,255,0.1)">
830
+ <div style="display:flex;align-items:center;gap:0.5rem;margin-bottom:0.25rem">
831
+ <span style="font-size:1.5rem">{ts['icon']}</span>
832
+ <h3 style="margin:0;color:#fff;font-size:1.2rem">{ts['title']}</h3>
833
+ </div>
834
+ <p style="color:#c8d8f0;margin:0.5rem 0;line-height:1.6">{ts['body']}</p>
835
+ <div style="background:rgba(255,255,255,0.1);border-radius:4px;height:4px;margin-top:0.75rem">
836
+ <div style="background:linear-gradient(90deg,#667eea,#764ba2);border-radius:4px;height:100%;width:{progress_pct*100:.0f}%"></div>
837
+ </div>
838
+ <div style="text-align:right;font-size:0.75rem;color:rgba(255,255,255,0.5);margin-top:0.25rem">Step {step + 1} of {len(tour_steps)}</div>
839
+ </div>""", unsafe_allow_html=True)
840
+
841
+ c1, c2, c3 = st.columns([1, 1, 1])
842
+ with c1:
843
+ if step > 0:
844
+ if st.button("⬅️ Previous", key=f"tour_prev_{step}", use_container_width=True):
845
+ st.session_state.demo_step = step - 1
846
+ st.rerun()
847
+ with c3:
848
+ if st.button("Next ➡️" if step < len(tour_steps) - 1 else "✅ Finish Tour", key=f"tour_next_{step}", use_container_width=True):
849
+ if step < len(tour_steps) - 1:
850
+ st.session_state.demo_step = step + 1
851
+ tab_idx = tour_steps[step + 1]["tab"]
852
+ st.session_state.active_tab = tab_idx
853
+ else:
854
+ st.session_state.guided_demo = False
855
+ st.rerun()
856
+
857
+
858
+ # ═══════════════════════════════════════════════════════════════════════════════
859
+ # RISK BANNER
860
+ # ═══════════════════════════════════════════════════════════════════════════════
861
+
862
+ def render_risk_banner() -> None:
863
+ if not st.session_state.report:
864
+ return
865
+ r = st.session_state.report
866
+ s = r.summary
867
+ total_risky = s.critical_count + s.high_count
868
+
869
+ if total_risky >= 3:
870
+ st.error(f"🚨 **HIGH ALERT — {total_risky} critical or high-risk clauses detected!** Review carefully before signing. We strongly recommend negotiating these terms.")
871
+ elif total_risky > 0:
872
+ st.warning(f"⚠️ **This contract has {total_risky} high-risk clause(s)** — review carefully before signing")
873
+ elif s.medium_count > 0:
874
+ st.info(f"ℹ️ **{s.medium_count} medium-risk clause(s) found** — this contract may need attention before signing")
875
+ else:
876
+ st.success("✅ **This contract looks clean** — no high or critical risk clauses detected. Still review all terms before signing.")
877
+
878
+
879
+ # ═══════════════════════════════════════════════════════════════════════════════
880
+ # ISSUES SUMMARY (displays before tabs)
881
+ # ═══════════════════════════════════════════════════════════════════════════════
882
+
883
+ def render_issues_summary() -> None:
884
+ report = st.session_state.report
885
+ criticals = [sc for sc in report.scored_clauses if sc.finding.severity == Severity.CRITICAL]
886
+ highs = [sc for sc in report.scored_clauses if sc.finding.severity == Severity.HIGH]
887
+ mediums = [sc for sc in report.scored_clauses if sc.finding.severity == Severity.MEDIUM]
888
+ all_issues = criticals + highs + mediums
889
+
890
+ if not all_issues:
891
+ if not report.processed_normally:
892
+ st.warning(
893
+ "⚠️ **Analysis was incomplete** — the AI risk scorer could not evaluate these clauses. "
894
+ "All clauses are marked as MEDIUM 'Needs Human Review'. "
895
+ "This typically means the model API is having issues. Check your vLLM endpoint configuration."
896
+ )
897
+ return
898
+ st.success("✅ No issues found — all clauses look reasonable. Use the tabs below to explore the full analysis.")
899
+ return
900
+
901
+ st.markdown("## 🔍 Issues Found")
902
+ total_labels = []
903
+ if criticals:
904
+ total_labels.append(f"{len(criticals)} critical")
905
+ if highs:
906
+ total_labels.append(f"{len(highs)} high")
907
+ if mediums:
908
+ total_labels.append(f"{len(mediums)} medium")
909
+ st.caption(f"{len(all_issues)} clauses need attention — {', '.join(total_labels)}")
910
+
911
+ issue_cols = st.columns(min(len(all_issues), 3))
912
+ for idx, sc in enumerate(all_issues):
913
+ col_idx = idx % 3
914
+ style = SEVERITY_STYLE.get(sc.finding.severity, SEVERITY_STYLE[Severity.INFO])
915
+ with issue_cols[col_idx]:
916
+ reason_preview = sc.finding.risk_reason[:120]
917
+ if len(sc.finding.risk_reason) > 120:
918
+ reason_preview += "..."
919
+ st.markdown(
920
+ f"""<div style="background:#1e1e2e;border-radius:12px;padding:1rem;margin:0.3rem 0;
921
+ border-top:3px solid {style['border']};border-left:1px solid #333;border-right:1px solid #333;border-bottom:1px solid #333">
922
+ <div style="font-weight:700;margin-bottom:0.3rem;font-size:0.8rem">{style['badge']}</div>
923
+ <div style="font-size:0.9rem;color:#e0e0e0;line-height:1.4;margin-bottom:0.5rem"><b>{sc.finding.risk_title}</b></div>
924
+ <div style="font-size:0.8rem;color:#aaa;line-height:1.4">{reason_preview}</div>
925
+ </div>""",
926
+ unsafe_allow_html=True,
927
+ )
928
+ st.markdown("")
929
+
930
+
931
+ # ═══════════════════════════════════════════════════════════════════════════════
932
+ # TAB 1: OVERVIEW
933
+ # ═══════════════════════════════════════════════════════════════════════════════
934
+
935
+ def render_overview_tab() -> None:
936
+ report = st.session_state.report
937
+ s = report.summary
938
+
939
+ st.markdown("### 📊 Risk Score Dashboard")
940
+ st.caption(f"Contract Type: **{s.contract_type}** • {s.total_clauses} clauses analyzed • {s.critical_count + s.high_count + s.medium_count} need attention")
941
+
942
+ col_a, col_b, col_c = st.columns([1, 2, 1])
943
+ with col_a:
944
+ score = s.overall_score
945
+ if score >= 7:
946
+ sc_color = "#ff4444"
947
+ label = "High Risk"
948
+ bg_glow = "rgba(255,68,68,0.08)"
949
+ elif score >= 4:
950
+ sc_color = "#ff8c00"
951
+ label = "Medium Risk"
952
+ bg_glow = "rgba(255,140,0,0.06)"
953
+ else:
954
+ sc_color = "#32cd32"
955
+ label = "Low Risk"
956
+ bg_glow = "rgba(50,205,50,0.06)"
957
+ st.markdown(f"""<div style="background:#1e1e2e;border-radius:16px;padding:1.5rem;text-align:center;border:1px solid #333;box-shadow:0 0 30px {bg_glow}">
958
+ <div style="font-size:0.8rem;color:#888;text-transform:uppercase;letter-spacing:2px">Risk Score</div>
959
+ <div style="font-size:3.5rem;font-weight:900;color:{sc_color};line-height:1.1">{score}<span style="font-size:1.5rem;color:#666">/10</span></div>
960
+ <div style="font-size:0.85rem;color:{sc_color};margin-top:0.2rem;font-weight:600">{label}</div>
961
+ <div style="font-size:0.82rem;color:#aaa;margin-top:0.5rem">{s.critical_count}C · {s.high_count}H · {s.medium_count}M · {s.low_count}L</div>
962
+ </div>""", unsafe_allow_html=True)
963
+ with col_b:
964
+ max_val = max(s.critical_count, s.high_count, s.medium_count, s.low_count,
965
+ s.total_clauses - s.critical_count - s.high_count - s.medium_count - s.low_count, 1)
966
+ chart_data = pd.DataFrame({
967
+ "Severity": ["Critical", "High", "Medium", "Low", "Info"],
968
+ "Count": [s.critical_count, s.high_count, s.medium_count, s.low_count,
969
+ max(s.total_clauses - s.critical_count - s.high_count - s.medium_count - s.low_count, 0)],
970
+ })
971
+ st.bar_chart(chart_data.set_index("Severity"), use_container_width=True, height=220)
972
+ with col_c:
973
+ risky = s.critical_count + s.high_count + s.medium_count
974
+ pct = (risky / s.total_clauses * 100) if s.total_clauses > 0 else 0
975
+ if pct >= 50:
976
+ attn_color = "#ff4444"
977
+ attn_label = "Review Urgently"
978
+ elif pct >= 25:
979
+ attn_color = "#ff8c00"
980
+ attn_label = "Needs Review"
981
+ else:
982
+ attn_color = "#32cd32"
983
+ attn_label = "Mostly Clean"
984
+ st.markdown(f"""<div style="background:#1e1e2e;border-radius:12px;padding:1.25rem;text-align:center;border:1px solid #333;height:100%">
985
+ <div style="font-size:0.75rem;color:#888;text-transform:uppercase;letter-spacing:1px">Needs Attention</div>
986
+ <div style="font-size:2.5rem;font-weight:900;color:{attn_color}">{risky}<span style="font-size:1rem;color:#666">/{s.total_clauses}</span></div>
987
+ <div style="font-size:0.85rem;color:{attn_color};font-weight:500">{attn_label}</div>
988
+ <div style="font-size:0.8rem;color:#888;margin-top:0.25rem">{pct:.0f}% of clauses</div>
989
+ </div>""", unsafe_allow_html=True)
990
+
991
+ st.markdown("")
992
+ st.markdown("### ⚡ Top 3 Actions Before Signing")
993
+ if report.top_3_actions:
994
+ for i, action in enumerate(report.top_3_actions, 1):
995
+ colors = ["#ff4444", "#ff8c00", "#ffd700"]
996
+ emojis = ["①", "②", "③"]
997
+ st.markdown(f"""<div style="background:#1e1e2e;border-radius:10px;padding:1rem 1.25rem;margin:0.4rem 0;
998
+ border-left:4px solid {colors[i-1]}">
999
+ <b style="color:#8ab4f8;font-size:1.1rem">{emojis[i-1]}</b>
1000
+ <span style="margin-left:0.5rem;color:#e8e8e8">{action}</span></div>""", unsafe_allow_html=True)
1001
+ else:
1002
+ st.info("No specific actions needed — this contract appears well-balanced.")
1003
+
1004
+ criticals = [sc for sc in report.scored_clauses if sc.finding.severity == Severity.CRITICAL]
1005
+ if criticals:
1006
+ st.markdown("")
1007
+ st.markdown("### ⚠️ What Could Happen If You Sign This?")
1008
+ st.caption("Realistic AI-generated consequence scenarios based on these clause patterns. These are illustrative examples — consult an attorney for legal advice.")
1009
+ for idx, sc in enumerate(criticals[:3]):
1010
+ scenarios = sc.finding.impact_scenarios
1011
+ if not scenarios:
1012
+ scenarios = ["You may face significant legal or financial consequences from this clause."]
1013
+ st.markdown(f"**{idx + 1}. 🔴 {sc.finding.risk_title}**")
1014
+ for scenario in scenarios:
1015
+ st.markdown(f"<div style='background:rgba(255,68,68,0.08);border-left:3px solid #ff4444;padding:0.5rem 0.75rem;margin:0.2rem 0;margin-left:1rem;border-radius:4px;font-size:0.9rem;color:#e0a0a0'>{scenario}</div>", unsafe_allow_html=True)
1016
+
1017
+ high_risks = [sc for sc in report.scored_clauses if sc.finding.severity == Severity.HIGH]
1018
+ if high_risks:
1019
+ st.markdown("")
1020
+ st.markdown("### 🟠 High-Risk Clauses at a Glance")
1021
+ for sc in high_risks:
1022
+ style = SEVERITY_STYLE[Severity.HIGH]
1023
+ reason_preview = sc.finding.risk_reason[:120]
1024
+ if len(sc.finding.risk_reason) > 120:
1025
+ reason_preview += "..."
1026
+ st.markdown(f"""<div style="background:{style['bg']};border-left:3px solid {style['border']};padding:0.6rem 0.9rem;margin:0.3rem 0;border-radius:4px">
1027
+ <b style="color:{style['color']}">{sc.finding.risk_title}</b>
1028
+ <span style="color:#aaa;font-size:0.85rem;margin-left:0.5rem">— {reason_preview}</span>
1029
+ </div>""", unsafe_allow_html=True)
1030
+
1031
+ medium_risks = [sc for sc in report.scored_clauses if sc.finding.severity == Severity.MEDIUM]
1032
+ if medium_risks:
1033
+ st.markdown("")
1034
+ st.markdown("### 🟡 Medium-Risk Clauses")
1035
+ for sc in medium_risks:
1036
+ style = SEVERITY_STYLE[Severity.MEDIUM]
1037
+ reason_preview = sc.finding.risk_reason[:80]
1038
+ if len(sc.finding.risk_reason) > 80:
1039
+ reason_preview += "..."
1040
+ st.markdown(f"""<div style="background:{style['bg']};border-left:3px solid {style['border']};padding:0.5rem 0.8rem;margin:0.2rem 0;border-radius:4px;font-size:0.9rem">
1041
+ <b style="color:{style['color']}">{sc.finding.risk_title}</b>
1042
+ <span style="color:#999;margin-left:0.3rem">— {reason_preview}</span>
1043
+ </div>""", unsafe_allow_html=True)
1044
+
1045
+
1046
+ # ═══════════════════════════════════════════════════════════════════════════════
1047
+ # TAB 2: CLAUSES
1048
+ # ═══════════════════════════════════════════════════════════════════════════════
1049
+
1050
+ def render_clauses_tab() -> None:
1051
+ report = st.session_state.report
1052
+ st.markdown("### 📋 Clause-by-Clause Analysis")
1053
+ st.caption("Each issue below shows the original legal text, plain-English translation, risk assessment, and recommended actions.")
1054
+
1055
+ filter_cols = st.columns(5)
1056
+ show_crit = filter_cols[0].checkbox("🔴 Critical", value=True)
1057
+ show_high = filter_cols[1].checkbox("🟠 High", value=True)
1058
+ show_med = filter_cols[2].checkbox("🟡 Medium", value=True)
1059
+ show_low = filter_cols[3].checkbox("🟢 Low", value=False)
1060
+ show_info = filter_cols[4].checkbox("ℹ️ Info", value=False)
1061
+
1062
+ visible = {Severity.CRITICAL: show_crit, Severity.HIGH: show_high,
1063
+ Severity.MEDIUM: show_med, Severity.LOW: show_low, Severity.INFO: show_info}
1064
+
1065
+ default_s = SEVERITY_STYLE[Severity.INFO]
1066
+ issue_num = 0
1067
+ for sc in report.scored_clauses:
1068
+ sev = sc.finding.severity
1069
+ if not visible.get(sev, False):
1070
+ continue
1071
+ issue_num += 1
1072
+ style = SEVERITY_STYLE.get(sev, default_s)
1073
+
1074
+ st.markdown(f"""
1075
+ <div style="display:flex;align-items:center;gap:0.75rem;margin:1.5rem 0 0.75rem 0">
1076
+ <span style="
1077
+ background:{style['border']};
1078
+ color:#fff;
1079
+ min-width:2rem;height:2rem;
1080
+ border-radius:50%;
1081
+ display:inline-flex;align-items:center;justify-content:center;
1082
+ font-weight:800;font-size:0.9rem;
1083
+ ">#{issue_num}</span>
1084
+ <div style="background:linear-gradient(90deg, {style['border']}44 0%, transparent 100%);height:1px;flex:1"></div>
1085
+ </div>""", unsafe_allow_html=True)
1086
+
1087
+ _render_single_clause_card(sc, style, show_actions=True)
1088
+
1089
+ if issue_num == 0:
1090
+ st.info("Select severity levels above to view issues. Try enabling Critical and High to see the most important clauses that need your attention.")
1091
+ else:
1092
+ st.caption(f"Showing {issue_num} of {report.summary.total_clauses} clauses — use severity filters above to adjust view")
1093
+
1094
+
1095
+ # ═══════════════════════════════════════════════════════════════════════════════
1096
+ # TAB 3: NEGOTIATION
1097
+ # ═════════════════════════════════════════════════════��═════════════════════════
1098
+
1099
+ def _highlight_diff(original: str, safer: str) -> tuple[str, str]:
1100
+ original_span = f"<span style='background:rgba(255,68,68,0.20);padding:0 2px;border-radius:2px;text-decoration:line-through'>{original}</span>"
1101
+ safer_span = f"<span style='background:rgba(50,205,50,0.20);padding:0 2px;border-radius:2px;font-weight:600'>{safer}</span>"
1102
+ return original_span, safer_span
1103
+
1104
+
1105
+ def generate_negotiation_email(sc: ScoredClause, recipient: str = "[Other Party]") -> str:
1106
+ topic = sc.clause.section_heading or sc.clause.clause_type.value.replace("_", " ").title()
1107
+ safer = sc.finding.safer_clause_version or _generate_fallback_safer(sc)
1108
+ risk_reason = sc.finding.risk_reason
1109
+ return (
1110
+ f"Subject: Proposed adjustment — {topic} clause\n\n"
1111
+ f"Hi {recipient},\n\n"
1112
+ f"I've reviewed the contract and have a concern about the {topic} clause.\n\n"
1113
+ f"My concern: {risk_reason}\n\n"
1114
+ f"I'd suggest the following alternative language to make this fair for both parties:\n\n"
1115
+ f'"{safer}"\n\n'
1116
+ f"Let me know your thoughts — I'm happy to discuss further.\n\n"
1117
+ f"Best regards"
1118
+ )
1119
+
1120
+
1121
+ def _render_email_card(sc: ScoredClause, recipient: str = "[Other Party]") -> None:
1122
+ recipient_input = st.text_input("Recipient name", value=recipient, key=f"recipient_{sc.clause.id}")
1123
+ email_body = generate_negotiation_email(sc, recipient_input)
1124
+ st.markdown("**📧 Formal Email Draft**")
1125
+ st.code(email_body, language=None)
1126
+ col_copy, col_info = st.columns([1, 3])
1127
+ with col_copy:
1128
+ if st.button("📋 Copy to Clipboard", key=f"copy_email_{sc.clause.id}"):
1129
+ st.toast("Email copied!", icon="📋")
1130
+ with col_info:
1131
+ st.caption("Click the code block above to select all text, then Ctrl+C to copy")
1132
+
1133
+
1134
+ def render_negotiation_tab() -> None:
1135
+ report = st.session_state.report
1136
+ default_s = SEVERITY_STYLE[Severity.INFO]
1137
+
1138
+ st.markdown("### 💬 Negotiation Copilot")
1139
+ st.caption("Each risky clause shows what you signed vs. a safer alternative, side-by-side. Use the pre-written messages or generate a formal email to send to the other party.")
1140
+
1141
+ negotiable = [sc for sc in report.scored_clauses if sc.finding.severity not in (Severity.LOW, Severity.INFO)]
1142
+ if not negotiable:
1143
+ st.success("✅ No actionable risks detected — this contract looks reasonable!")
1144
+ else:
1145
+ st.info(f"📋 **{len(negotiable)} clauses** flagged for negotiation below")
1146
+
1147
+ for i, sc in enumerate(negotiable):
1148
+ style = SEVERITY_STYLE.get(sc.finding.severity, default_s)
1149
+ sev_label = sc.finding.severity.value
1150
+
1151
+ st.markdown(f"""<div style="background:{style['bg']};border-left:4px solid {style['border']};padding:0.6rem 1rem;border-radius:4px 10px 10px 4px;margin:1.2rem 0 0.5rem 0">
1152
+ <span style="font-weight:700;color:{style['color']}">{style['badge']}</span>
1153
+ <span style="font-weight:600;color:#e0e0e0;margin-left:0.5rem">{sc.finding.risk_title}</span>
1154
+ <span style="color:#888;font-size:0.8rem;margin-left:0.8rem">Clause {sc.clause.id}</span>
1155
+ </div>""", unsafe_allow_html=True)
1156
+
1157
+ st.markdown("**📋 Why This Matters**")
1158
+ st.markdown(f"<div style='color:#ccc;font-size:0.9rem;line-height:1.55;margin-bottom:0.75rem;padding:0.5rem 0.75rem;background:rgba(255,255,255,0.02);border-radius:8px'>{sc.finding.risk_reason}</div>", unsafe_allow_html=True)
1159
+
1160
+ neg_l, neg_r = st.columns(2)
1161
+ with neg_l:
1162
+ st.markdown("**⚠️ Current Clause (Risky)**")
1163
+ text_to_show = sc.clause.raw_text[:500]
1164
+ if len(sc.clause.raw_text) > 500:
1165
+ text_to_show += "..."
1166
+ st.markdown(f"<div style='background:rgba(255,68,68,0.08);padding:0.75rem;border-radius:8px;border:1px solid rgba(255,68,68,0.2);font-size:0.85rem;line-height:1.6;color:#e0e0e0'>{text_to_show}</div>", unsafe_allow_html=True)
1167
+
1168
+ with neg_r:
1169
+ st.markdown("**💡 Safer Alternative**")
1170
+ safer = sc.finding.safer_clause_version
1171
+ if not safer:
1172
+ safer = _generate_fallback_safer(sc)
1173
+ st.markdown(f"<div style='background:rgba(50,205,50,0.08);padding:0.75rem;border-radius:8px;border:1px solid rgba(50,205,50,0.2);font-size:0.85rem;line-height:1.6;color:#e0e0e0'>{safer}</div>", unsafe_allow_html=True)
1174
+
1175
+ if sc.finding.recommended_action:
1176
+ st.markdown(f"**✅ Recommended:** {sc.finding.recommended_action}")
1177
+
1178
+ neg_msg = sc.finding.negotiation_message
1179
+ if not neg_msg:
1180
+ neg_msg = _generate_fallback_message(sc)
1181
+ st.markdown("**📧 Quick Negotiation Message**")
1182
+ st.code(neg_msg, language=None)
1183
+
1184
+ if sc.finding.impact_scenarios:
1185
+ st.markdown("**⚠️ Consequences of Not Negotiating**")
1186
+ for impact in sc.finding.impact_scenarios:
1187
+ st.markdown(f"<div style='background:rgba(255,68,68,0.06);padding:0.35rem 0.75rem;margin:0.15rem 0;margin-left:0.5rem;border-radius:4px;font-size:0.85rem;color:#ff9999'>• {impact}</div>", unsafe_allow_html=True)
1188
+
1189
+ with st.expander("📧 Generate Formal Email to Send"):
1190
+ _render_email_card(sc)
1191
+
1192
+ if i < len(negotiable) - 1:
1193
+ st.divider()
1194
+
1195
+ safe_contract = _build_safer_contract(report)
1196
+ with st.expander("📋 Preview Safer Contract"):
1197
+ preview_max = 3500
1198
+ preview_text = safe_contract[:preview_max]
1199
+ if len(safe_contract) > preview_max:
1200
+ preview_text += f"\n\n... (showing first {preview_max} chars of {len(safe_contract)} — download full contract at bottom of page)"
1201
+ st.code(preview_text, language=None)
1202
+
1203
+
1204
+ # ═══════════════════════════════════════════════════════════════════════════════
1205
+ # TAB 4: CHAT ASSISTANT
1206
+ # ═══════════════════════════════════════════════════════════════════════════════
1207
+
1208
+ def render_chat_tab() -> None:
1209
+ report = st.session_state.report
1210
+ st.markdown("### 🤖 Chat Assistant")
1211
+ st.caption("Ask questions about your contract in plain English. The AI has full context of every clause, risk assessment, and recommended action — all injected into this conversation automatically.")
1212
+
1213
+ cache_key = id(report)
1214
+ if st.session_state.get("copilot_cache_key") != cache_key:
1215
+ raw_text = st.session_state.get("copilot_raw_text", "")
1216
+ st.session_state.copilot_context = build_contract_context(raw_text, report)
1217
+ st.session_state.copilot_cache_key = cache_key
1218
+
1219
+ copilot_context = st.session_state.copilot_context
1220
+
1221
+ if not st.session_state.copilot_messages:
1222
+ total_risky = report.summary.critical_count + report.summary.high_count
1223
+ if total_risky > 0:
1224
+ welcome = (
1225
+ f"I've analyzed your contract and found **{total_risky} high-risk clause(s)** "
1226
+ f"(risk score: **{report.summary.overall_score}/10**). "
1227
+ "You can ask me to:\n\n"
1228
+ "- Explain any clause in simple terms\n"
1229
+ "- Tell you which clauses are risky and why\n"
1230
+ "- Suggest safer wording for specific clauses\n"
1231
+ "- Help you draft a negotiation message\n"
1232
+ "- Describe what could happen if you sign as-is\n"
1233
+ "- Compare clauses to industry standards\n\n"
1234
+ "What would you like to know?"
1235
+ )
1236
+ else:
1237
+ welcome = (
1238
+ f"I've analyzed your contract and it looks reasonable (risk score: **{report.summary.overall_score}/10**). "
1239
+ "You can ask me to explain any clause, check for potential hidden issues, or compare terms to standard practices. "
1240
+ "What would you like to know?"
1241
+ )
1242
+ with st.chat_message("assistant"):
1243
+ st.markdown(welcome)
1244
+ st.session_state.copilot_messages = [{"role": "assistant", "content": welcome}]
1245
+
1246
+ st.markdown("**💡 Click a question to ask instantly:**")
1247
+ chip_cols = st.columns(4)
1248
+ quick_prompts = [
1249
+ "Summarize this contract in 3 sentences",
1250
+ "What's the most dangerous clause and why?",
1251
+ "Suggest safer wording for the IP clause",
1252
+ "What should I negotiate first?",
1253
+ "Explain the non-compete in simple English",
1254
+ "Are there any hidden fees, penalties, or traps?",
1255
+ "What happens if I breach this contract?",
1256
+ "Draft an email requesting changes to all risky clauses",
1257
+ ]
1258
+ for idx, prompt in enumerate(quick_prompts):
1259
+ col = chip_cols[idx % 4]
1260
+ with col:
1261
+ if st.button(prompt, key=f"chip_{idx}", use_container_width=True):
1262
+ st.session_state.pending_ai_query = prompt
1263
+ st.rerun()
1264
+
1265
+ for msg in st.session_state.copilot_messages:
1266
+ with st.chat_message(msg["role"]):
1267
+ st.markdown(msg["content"])
1268
+
1269
+ if st.session_state.get("pending_ai_query"):
1270
+ query = st.session_state.pending_ai_query
1271
+ st.session_state.pending_ai_query = None
1272
+ if copilot_context:
1273
+ st.session_state.copilot_messages.append({"role": "user", "content": query})
1274
+ with st.chat_message("user"):
1275
+ st.markdown(query)
1276
+ with st.chat_message("assistant"):
1277
+ with st.spinner("Thinking — analyzing contract context..."):
1278
+ chat_history = st.session_state.copilot_messages[:-1]
1279
+ response = run_copilot_sync(copilot_context, chat_history, query)
1280
+ st.markdown(response)
1281
+ st.session_state.copilot_messages.append({"role": "assistant", "content": response})
1282
+ st.rerun()
1283
+
1284
+ if prompt := st.chat_input("Ask about this contract...", key="copilot_chat_input"):
1285
+ if not copilot_context:
1286
+ st.warning("No contract analysis available. Please upload and analyze a contract first.")
1287
+ else:
1288
+ st.session_state.copilot_messages.append({"role": "user", "content": prompt})
1289
+ with st.chat_message("user"):
1290
+ st.markdown(prompt)
1291
+ with st.chat_message("assistant"):
1292
+ with st.spinner("Thinking — analyzing contract context..."):
1293
+ chat_history = st.session_state.copilot_messages[:-1]
1294
+ response = run_copilot_sync(copilot_context, chat_history, prompt)
1295
+ st.markdown(response)
1296
+ st.session_state.copilot_messages.append({"role": "assistant", "content": response})
1297
+
1298
+ if st.session_state.copilot_messages:
1299
+ cc1, cc2, cc3 = st.columns([1, 2, 1])
1300
+ with cc1:
1301
+ if st.button("🗑️ Clear Chat", key="copilot_clear", use_container_width=True):
1302
+ st.session_state.copilot_messages = []
1303
+ st.rerun()
1304
+ with cc3:
1305
+ st.caption(f"{len(st.session_state.copilot_messages)} messages")
1306
+
1307
+
1308
+ # ═══════════════════════════════════════════════════════════════════════════════
1309
+ # SIDEBAR
1310
+ # ═══════════════════════════════════════════════════════════════════════════════
1311
+
1312
+ def render_sidebar() -> None:
1313
+ with st.sidebar:
1314
+ st.markdown("""<div style="background:#1e2a3a;border-radius:12px;padding:1.25rem;border:1px solid #3a4a5a">
1315
+ <h4 style="margin:0 0 0.5rem 0;color:#fff">🎯 How It Works</h4>
1316
+ <ol style="margin:0;padding-left:1.25rem;font-size:0.9rem;color:#ccc;line-height:2">
1317
+ <li>Upload any contract file (PDF, DOCX, TXT)</li>
1318
+ <li>5 specialized AI agents analyze every clause</li>
1319
+ <li>Get a detailed risk report with plain English explanations</li>
1320
+ <li>Use Negotiation Copilot to draft counter-proposals</li>
1321
+ <li>Chat with the AI Copilot for any follow-up questions</li>
1322
+ </ol>
1323
+ </div>""", unsafe_allow_html=True)
1324
+
1325
+ st.markdown("")
1326
+ st.markdown("""<div style="background:#1e2a3a;border-radius:12px;padding:1.25rem;border:1px solid #3a4a5a">
1327
+ <h4 style="margin:0 0 0.5rem 0;color:#fff">🤖 5-Agent AI Pipeline</h4>
1328
+ <div style="font-size:0.85rem;color:#ccc;line-height:2">
1329
+ <p style="margin:0.2rem 0"><b style="color:#8ab4f8">① Extractor</b> — Segments contract into individual clauses</p>
1330
+ <p style="margin:0.2rem 0"><b style="color:#8ab4f8">② Classifier</b> — Labels each clause by legal type</p>
1331
+ <p style="margin:0.2rem 0"><b style="color:#8ab4f8">③ Risk Scorer</b> — Evaluates severity of each clause</p>
1332
+ <p style="margin:0.2rem 0"><b style="color:#8ab4f8">④ Translator</b> — Converts legalese to plain English</p>
1333
+ <p style="margin:0.2rem 0"><b style="color:#8ab4f8">⑤ Reporter</b> — Compiles the final risk report</p>
1334
+ </div>
1335
+ </div>""", unsafe_allow_html=True)
1336
+
1337
+ if st.session_state.report:
1338
+ s = st.session_state.report.summary
1339
+ total_risky = s.critical_count + s.high_count
1340
+ st.markdown("")
1341
+ st.markdown("#### 📊 Contract Stats")
1342
+
1343
+ risk_delta = f"{total_risky} high-risk" if total_risky > 0 else "Clean"
1344
+ st.metric(
1345
+ "🎯 Risk Score",
1346
+ f"{s.overall_score}/10",
1347
+ delta=risk_delta,
1348
+ delta_color="inverse" if total_risky > 0 else "normal",
1349
+ )
1350
+ st.metric("📄 Total Clauses", s.total_clauses)
1351
+
1352
+ has_any_risks = False
1353
+ for icon, label, key in [
1354
+ ("🔴", "Critical", "critical_count"),
1355
+ ("🟠", "High", "high_count"),
1356
+ ("🟡", "Medium", "medium_count"),
1357
+ ("🟢", "Low", "low_count"),
1358
+ ]:
1359
+ count = getattr(s, key, 0)
1360
+ if count > 0:
1361
+ has_any_risks = True
1362
+ st.metric(f"{icon} {label}", count)
1363
+
1364
+ st.divider()
1365
+ st.markdown(f"**Contract Type:** {s.contract_type}")
1366
+ st.markdown(f"**Analyzed:** {st.session_state.report.generated_at.strftime('%b %d, %Y at %H:%M')}")
1367
+
1368
+ if not st.session_state.report.processed_normally:
1369
+ st.caption("⚠️ Report may not cover all clauses due to processing constraints.")
1370
+
1371
+ st.markdown("")
1372
+ st.markdown("""<div style="background:#1e2a3a;border-radius:12px;padding:1.25rem;border:1px solid #3a4a5a">
1373
+ <h4 style="margin:0 0 0.5rem 0;color:#fff">⚡ Powered by</h4>
1374
+ <p style="font-size:0.85rem;color:#ccc;margin:0;line-height:1.8">
1375
+ Qwen2.5 via vLLM on AMD MI300X<br>
1376
+ OpenAI-compatible API<br>
1377
+ Streamlit • Python 3.10+
1378
+ </p>
1379
+ <div style="margin-top:0.5rem;padding:0.3rem 0.5rem;background:#1a0533;border-radius:6px;border:1px solid #667eea;text-align:center;font-size:0.7rem;color:#aabbcc">
1380
+ 🏷️ AMD Developer Cloud
1381
+ </div>
1382
+ </div>""", unsafe_allow_html=True)
1383
+
1384
+ st.markdown("")
1385
+ st.markdown("""<div style="font-size:0.7rem;color:#555;text-align:center;margin-top:1rem">
1386
+ <p style="margin:0">⚠️ Not legal advice. AI-generated analysis.</p>
1387
+ <p style="margin:0">Always consult a qualified attorney before signing.</p>
1388
+ </div>""", unsafe_allow_html=True)
1389
+
1390
+
1391
+ # ═══════════════════════════════════════════════════════════════════════════════
1392
+ # MAIN APP ENTRY POINT
1393
+ # ═══════════════════════════════════════════════════════════════════════════════
1394
+
1395
+ st.set_page_config(
1396
+ page_title="ClauseGuard — AI Contract Risk Analyzer",
1397
+ page_icon="🛡️",
1398
+ layout="wide",
1399
+ initial_sidebar_state="expanded",
1400
+ )
1401
+ st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
1402
+ _init_session_state()
1403
+
1404
+ render_header()
1405
+
1406
+ _render_guided_tour()
1407
+
1408
+ render_risk_banner()
1409
+
1410
+ uploaded_file = st.file_uploader(
1411
+ "Choose a contract file",
1412
+ type=ALLOWED_EXTENSIONS,
1413
+ help="Supported: PDF, TXT, DOCX • Maximum file size: 10MB",
1414
+ key="file_uploader",
1415
+ )
1416
+
1417
+ if uploaded_file is not None:
1418
+ fb = uploaded_file.read()
1419
+ st.session_state.uploaded_filename = uploaded_file.name
1420
+ st.session_state.uploaded_bytes = fb
1421
+ if len(fb) > MAX_FILE_SIZE_BYTES:
1422
+ st.error(f"File too large ({len(fb)/1024/1024:.1f}MB). Max file size is {MAX_FILE_SIZE_MB}MB. Please reduce the file size or split the contract.")
1423
+ else:
1424
+ c1, c2 = st.columns([2, 1])
1425
+ with c1:
1426
+ st.success(f"**{uploaded_file.name}** loaded successfully — `{len(fb)/1024:.1f} KB` ready for analysis")
1427
+ with c2:
1428
+ analyze_disabled = st.session_state.analyzing
1429
+ if st.button(
1430
+ "🔍 Analyze Contract",
1431
+ type="primary",
1432
+ disabled=analyze_disabled,
1433
+ use_container_width=True,
1434
+ help="Run the full 5-agent AI pipeline on this contract",
1435
+ ):
1436
+ st.session_state.analyzing = True
1437
+ st.session_state.error = None
1438
+ st.session_state.guided_demo = False
1439
+ st.rerun()
1440
+
1441
+ if st.session_state.analyzing and st.session_state.uploaded_bytes:
1442
+ _run_analysis()
1443
+
1444
+ if st.session_state.error:
1445
+ st.error(st.session_state.error)
1446
+ if "DEEPSEEK_API_KEY" in st.session_state.error:
1447
+ st.info("💡 To use ClauseGuard, set the `BASE_URL` and `MODEL_NAME` in your `.env` file to point to your Qwen/vLLM endpoint. See the README for setup instructions.")
1448
+
1449
+ if st.session_state.report:
1450
+ report = st.session_state.report
1451
+ s = report.summary
1452
+
1453
+ st.divider()
1454
+
1455
+ render_issues_summary()
1456
+
1457
+ active_tab = st.radio(
1458
+ "Navigate between sections",
1459
+ TAB_NAMES,
1460
+ index=min(st.session_state.get("active_tab", 0), len(TAB_NAMES) - 1),
1461
+ label_visibility="collapsed",
1462
+ horizontal=True,
1463
+ )
1464
+ st.session_state.active_tab = TAB_NAMES.index(active_tab)
1465
+ tab_index = st.session_state.active_tab
1466
+
1467
+ if tab_index == 0:
1468
+ render_overview_tab()
1469
+ elif tab_index == 1:
1470
+ render_clauses_tab()
1471
+ elif tab_index == 2:
1472
+ render_negotiation_tab()
1473
+ elif tab_index == 3:
1474
+ render_chat_tab()
1475
+
1476
+ st.divider()
1477
+ st.markdown("### 📥 Download Your Report")
1478
+ st.caption("Download the full analysis in your preferred format to share with legal counsel or reference later.")
1479
+
1480
+ dl_cols = st.columns(3)
1481
+ with dl_cols[0]:
1482
+ st.download_button(
1483
+ "📝 Download Markdown Report",
1484
+ data=report.markdown_report or "# ClauseGuard Report\n\nRun analysis first.",
1485
+ file_name=f"clauseguard_report_{report.contract_name.replace('.','_')}.md",
1486
+ mime="text/markdown",
1487
+ use_container_width=True,
1488
+ )
1489
+ with dl_cols[1]:
1490
+ safe_contract = _build_safer_contract(report)
1491
+ st.download_button(
1492
+ "🛡️ Download Safer Contract",
1493
+ data=safe_contract,
1494
+ file_name=f"safer_{report.contract_name.replace('.txt','').replace('.pdf','').replace('.docx','')}.txt",
1495
+ mime="text/plain",
1496
+ use_container_width=True,
1497
+ )
1498
+ with dl_cols[2]:
1499
+ csv_lines = ["Clause ID,Type,Severity,Risk Title,Risk Reason,Recommended Action,Plain English,Negotiation Message"]
1500
+ for sc in report.scored_clauses:
1501
+ csv_lines.append(
1502
+ f'"{sc.clause.id}","{sc.clause.clause_type.value}","{sc.finding.severity.value}","{sc.finding.risk_title}","{sc.finding.risk_reason}","{sc.finding.recommended_action}","{sc.clause.plain_english or ""}","{sc.finding.negotiation_message or ""}"'
1503
+ )
1504
+ st.download_button(
1505
+ "📊 Download CSV Data",
1506
+ data="\n".join(csv_lines),
1507
+ file_name=f"clauseguard_data_{report.contract_name.replace('.','_')}.csv",
1508
+ mime="text/csv",
1509
+ use_container_width=True,
1510
+ )
1511
+
1512
+ st.caption(
1513
+ f"Generated {report.generated_at.strftime('%B %d, %Y at %H:%M')} "
1514
+ f"• {s.contract_type} • {s.total_clauses} clauses analyzed"
1515
+ f"{' • ⚠️ Partial analysis' if not report.processed_normally else ''}"
1516
+ )
1517
+
1518
+ render_sidebar()
config/__init__.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ClauseGuard configuration package."""
2
+
3
+ from clauseguard.config.settings import (
4
+ API_KEY,
5
+ BASE_URL,
6
+ DEEPSEEK_API_KEY,
7
+ MAX_CLAUSES,
8
+ MAX_TOKENS,
9
+ MODEL_NAME,
10
+ TEMPERATURE,
11
+ TIMEOUT_SECONDS,
12
+ validate_config,
13
+ )
14
+ from clauseguard.config.prompts import (
15
+ CLASSIFIER_SYSTEM_PROMPT,
16
+ EXTRACTOR_SYSTEM_PROMPT,
17
+ REPORTER_SYSTEM_PROMPT,
18
+ RISK_SCORER_SYSTEM_PROMPT,
19
+ TRANSLATOR_SYSTEM_PROMPT,
20
+ )
21
+ from clauseguard.config.copilot_prompts import COPILOT_SYSTEM_PROMPT
22
+
23
+ __all__ = [
24
+ "API_KEY",
25
+ "BASE_URL",
26
+ "CLASSIFIER_SYSTEM_PROMPT",
27
+ "COPILOT_SYSTEM_PROMPT",
28
+ "DEEPSEEK_API_KEY",
29
+ "EXTRACTOR_SYSTEM_PROMPT",
30
+ "MAX_CLAUSES",
31
+ "MAX_TOKENS",
32
+ "MODEL_NAME",
33
+ "REPORTER_SYSTEM_PROMPT",
34
+ "RISK_SCORER_SYSTEM_PROMPT",
35
+ "TEMPERATURE",
36
+ "TIMEOUT_SECONDS",
37
+ "TRANSLATOR_SYSTEM_PROMPT",
38
+ "validate_config",
39
+ ]
config/copilot_prompts.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """System prompts for the ClauseGuard Copilot — the interactive AI chat assistant."""
2
+
3
+ COPILOT_SYSTEM_PROMPT: str = """
4
+ You are ClauseGuard Copilot — an AI legal assistant embedded inside a contract analysis system.
5
+
6
+ You have access to a fully analyzed contract including: full contract text, clause-by-clause analysis with severity ratings, risk reasons, plain-English explanations, recommended actions, safer wording alternatives, and negotiation messages.
7
+
8
+ ## YOUR ROLE
9
+
10
+ Act as a practical legal assistant. Help users:
11
+ - UNDERSTAND what each clause means in plain language
12
+ - EVALUATE how risky each clause is — and why
13
+ - FIX risky clauses with specific, realistic rewrites
14
+ - NEGOTIATE better terms with ready-to-send messages
15
+ - ANTICIPATE real-world consequences of signing as-is
16
+
17
+ Always base your answers on the provided contract context.
18
+
19
+ ## RESPONSE FORMAT
20
+
21
+ When discussing a specific clause, include ALL of:
22
+ 1. **Severity** — (e.g. 🔴 CRITICAL) with a short title
23
+ 2. **What this means** — plain-English explanation (2-3 sentences)
24
+ 3. **Why this is risky** — specific reason citing the actual clause language
25
+ 4. **What could happen** — 2-3 realistic consequences
26
+ 5. **How to fix it** — 2-4 specific, practical steps
27
+ 6. **Suggested wording** — a rewritten version that is balanced and realistic
28
+ 7. **What to say** — a short, professional negotiation message
29
+
30
+ ## TASK TYPES
31
+
32
+ **"What does this mean?"** → Respond with the full format above (or a shorter version if the clause is simple).
33
+
34
+ **"Is this safe?"** → Respond with: severity level, why it's risky or not, short conclusion.
35
+
36
+ **"How do I fix this?"** → Respond with: what's wrong, what to change, improved wording.
37
+
38
+ **"What should I say?"** → Give a real, copy-paste negotiation message.
39
+
40
+ **"What happens if I sign?"** → Give 2-3 realistic, practical consequences.
41
+
42
+ ## BEHAVIOR RULES
43
+
44
+ - ALWAYS use the provided clause data — reference severity levels and risk reasons
45
+ - BE SPECIFIC — tie every answer to the actual contract language
46
+ - BE CLEAR — avoid legal jargon, use simple human language
47
+ - BE ACTIONABLE — when users ask what to do, give concrete steps
48
+ - NEVER hallucinate clauses not in the contract
49
+ - NEVER say "consult a lawyer" as your only advice
50
+ - NEVER answer without using the provided contract context
51
+
52
+ ## REWRITING RULES
53
+
54
+ When writing "Suggested wording":
55
+ - Do NOT delete the clause entirely
56
+ - Do NOT make it unrealistic or one-sided
57
+ - Keep legal tone and structure
58
+ - Only reduce risk by adding limits, carve-outs, conditions, and mutual obligations
59
+ - The rewrite must be balanced and something a counterparty would actually accept
60
+
61
+ Your goal: make users confident about what to do next — give them exact steps to improve their contract.
62
+ """
config/prompts.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Prompt templates for all 5 ClauseGuard agents — optimized for Qwen2.5 via vLLM."""
2
+
3
+ EXTRACTOR_SYSTEM_PROMPT: str = """
4
+ Split a contract document into individual clauses.
5
+
6
+ Return ONLY a JSON object with this exact structure:
7
+ {
8
+ "clauses": [
9
+ {
10
+ "id": 1,
11
+ "raw_text": "The full clause text",
12
+ "plain_english": null,
13
+ "clause_type": "OTHER",
14
+ "section_heading": "CONFIDENTIALITY",
15
+ "position": 1
16
+ }
17
+ ],
18
+ "contract_type": "Other",
19
+ "total_clauses": 3
20
+ }
21
+
22
+ Rules:
23
+ - Split on numbered sections (1., 2., Article 1), ALL CAPS headings, or paragraph breaks
24
+ - Each clause must be 5+ words
25
+ - Max 60 clauses
26
+ - Keep raw_text exactly as it appears in the document
27
+ - For plain_english, use null (it will be filled later)
28
+ - For clause_type, use "OTHER" (it will be classified later)
29
+
30
+ Do NOT include markdown fences, explanations, or any text outside the JSON.
31
+ """
32
+
33
+ CLASSIFIER_SYSTEM_PROMPT: str = """
34
+ Classify each contract clause by type.
35
+
36
+ Clause types: NDA, IP_ASSIGNMENT, NON_COMPETE, ARBITRATION, AUTO_RENEWAL,
37
+ LIABILITY_CAP, TERMINATION, DATA_SHARING, GOVERNING_LAW, PAYMENT,
38
+ INDEMNIFICATION, OTHER
39
+
40
+ Contract types: NDA, Employment, Freelance, SaaS, Other
41
+
42
+ You will receive a JSON object with an array of clauses. For each clause, fill in
43
+ only the "clause_type" field. Also set "contract_type" at the top level.
44
+
45
+ Do NOT change raw_text, id, position, or section_heading.
46
+
47
+ Return ONLY the updated JSON object. No markdown fences.
48
+ """
49
+
50
+ RISK_SCORER_SYSTEM_PROMPT: str = """
51
+ Evaluate the risk severity of each contract clause. You will receive a JSON object
52
+ with clauses. For EACH clause, output a risk finding.
53
+
54
+ Return a JSON array. Each element has this structure:
55
+ {
56
+ "clause": { "id": 1, "raw_text": "...", "clause_type": "...", ... },
57
+ "finding": {
58
+ "clause_id": 1,
59
+ "severity": "CRITICAL",
60
+ "risk_title": "Short descriptive title",
61
+ "risk_reason": "Specific reason citing what the clause actually says",
62
+ "recommended_action": "What the user should do about it"
63
+ }
64
+ }
65
+
66
+ SEVERITY LEVELS (use exactly one per clause):
67
+
68
+ CRITICAL - Use when:
69
+ - IP assignment covers personal work or time outside employment
70
+ - Unlimited liability, no termination right
71
+ - Mandatory arbitration waiving right to sue or jury trial
72
+ - Class action waiver
73
+
74
+ HIGH - Use when:
75
+ - Non-compete over 1 year or with no geographic limit
76
+ - Auto-renewal with no opt-out
77
+ - Unilateral contract changes by one party
78
+ - One-sided broad indemnification
79
+
80
+ MEDIUM - Use when:
81
+ - Standard non-compete of 1 year or less
82
+ - Auto-renewal with 30+ day notice
83
+ - Low liability caps
84
+ - Net-60+ payment terms
85
+ - Out-of-state governing law
86
+
87
+ LOW - Use when:
88
+ - Standard governing law (Delaware, NY)
89
+ - Standard payment terms
90
+ - Standard confidentiality
91
+ - Standard termination notice
92
+
93
+ INFO - Use when:
94
+ - Definitions, recitals, severability, entire agreement, force majeure
95
+
96
+ CRITICAL RULES:
97
+ - Every risk_reason MUST mention specific language from the clause
98
+ - The output MUST be a valid JSON array starting with [ and ending with ]
99
+ - Output ONE finding per input clause — never skip any clause
100
+ - Do NOT include any text before or after the JSON array
101
+ """
102
+
103
+ TRANSLATOR_SYSTEM_PROMPT: str = """
104
+ Translate legal clauses into plain English and provide negotiation help.
105
+
106
+ You receive a JSON array of clauses with risk findings. For EACH clause, add:
107
+ - plain_english: Short explanation in simple words (1-2 sentences, start with "You" or "This clause")
108
+ - recommended_action: Specific action to take
109
+
110
+ For CRITICAL and HIGH severity clauses, also add these (otherwise leave as "" and []):
111
+ - safer_clause_version: Rewritten balanced version
112
+ - negotiation_message: Brief email asking for the change
113
+ - impact_scenarios: List of 2-3 real-world consequences
114
+
115
+ Return the full JSON array with all fields filled. No markdown fences.
116
+ """
117
+
118
+ REPORTER_SYSTEM_PROMPT: str = """
119
+ Build a markdown report from scored clauses.
120
+
121
+ Risk score formula: (critical*10 + high*7 + medium*4 + low*1) / total, capped at 10.
122
+
123
+ Return ONLY a JSON object:
124
+ {
125
+ "contract_name": "sample.txt",
126
+ "summary": {
127
+ "total_clauses": 5,
128
+ "critical_count": 1,
129
+ "high_count": 1,
130
+ "medium_count": 2,
131
+ "low_count": 1,
132
+ "overall_score": 4.2,
133
+ "contract_type": "NDA"
134
+ },
135
+ "top_3_actions": ["Action 1", "Action 2", "Action 3"],
136
+ "markdown_report": "full markdown text..."
137
+ }
138
+
139
+ No markdown fences. No extra text.
140
+ """
config/settings.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Application configuration loaded from environment variables.
2
+
3
+ Supports Qwen via vLLM (default) and any OpenAI-compatible API as fallback.
4
+ """
5
+
6
+ import os
7
+ from typing import Final
8
+
9
+ from dotenv import load_dotenv
10
+
11
+ load_dotenv()
12
+
13
+ API_KEY: str = os.environ.get("API_KEY", os.environ.get("DEEPSEEK_API_KEY", "EMPTY"))
14
+ BASE_URL: str = os.environ.get(
15
+ "BASE_URL",
16
+ os.environ.get("VLLM_BASE_URL", "http://165.245.141.170:8000/v1"),
17
+ )
18
+ MODEL_NAME: str = os.environ.get(
19
+ "MODEL_NAME",
20
+ os.environ.get("VLLM_MODEL_NAME", "Qwen/Qwen2.5-1.5B-Instruct"),
21
+ )
22
+ MAX_TOKENS: Final[int] = int(os.environ.get("MAX_TOKENS", "4096"))
23
+ TIMEOUT_SECONDS: Final[int] = int(os.environ.get("TIMEOUT_SECONDS", "120"))
24
+ MAX_CLAUSES: Final[int] = 60
25
+
26
+ TEMPERATURE: Final[float] = 0.0
27
+
28
+ DEEPSEEK_API_KEY: str = API_KEY
29
+
30
+
31
+ def validate_config() -> None:
32
+ """Validate that all required configuration is present.
33
+
34
+ For Qwen/vLLM, no API key is required — the check is informational.
35
+ Raises ValueError only if a legacy DeepSeek key is expected and missing.
36
+ """
37
+ pass
main.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ClauseGuard CLI — run the 5-agent pipeline from the command line."""
2
+
3
+ import argparse
4
+ import asyncio
5
+ import logging
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ # Ensure parent directory is in path so `clauseguard` package is importable
10
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
11
+
12
+ from clauseguard.agents.orchestrator import run_pipeline
13
+ from clauseguard.config.settings import validate_config
14
+ from clauseguard.tools.file_tools import extract_text
15
+
16
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def main() -> None:
21
+ """Entry point for the ClauseGuard CLI."""
22
+ parser = argparse.ArgumentParser(
23
+ description="ClauseGuard — AI-Powered Contract Clause Risk Analyzer"
24
+ )
25
+ parser.add_argument(
26
+ "--file",
27
+ type=str,
28
+ required=True,
29
+ help="Path to the contract file (PDF, TXT, or DOCX)",
30
+ )
31
+ parser.add_argument(
32
+ "--output",
33
+ type=str,
34
+ default="report.md",
35
+ help="Path for the output markdown report (default: report.md)",
36
+ )
37
+ args = parser.parse_args()
38
+
39
+ try:
40
+ validate_config()
41
+ except ValueError as e:
42
+ logger.error("Configuration error: %s", e)
43
+ print(f"Error: {e}")
44
+ print("Create a .env file with your model endpoint settings or set them as environment variables.")
45
+ sys.exit(1)
46
+
47
+ file_path = Path(args.file)
48
+ if not file_path.exists():
49
+ logger.error("File not found: %s", file_path)
50
+ print(f"Error: File not found — {file_path}")
51
+ sys.exit(1)
52
+
53
+ try:
54
+ file_bytes = file_path.read_bytes()
55
+ except Exception as e:
56
+ logger.error("Failed to read file: %s", e)
57
+ print(f"Error: Could not read file — {e}")
58
+ sys.exit(1)
59
+
60
+ try:
61
+ raw_text = extract_text(file_bytes, file_path.name)
62
+ except ValueError as e:
63
+ logger.error("File parsing error: %s", e)
64
+ print(f"Error: {e}")
65
+ sys.exit(1)
66
+
67
+ print(f"Analyzing {file_path.name}...")
68
+ print()
69
+
70
+ try:
71
+ report = asyncio.run(run_pipeline(raw_text, file_path.name))
72
+ except Exception as e:
73
+ logger.error("Pipeline failed: %s", e)
74
+ print(f"Error: Analysis failed — {e}")
75
+ sys.exit(1)
76
+
77
+ output_path = Path(args.output)
78
+ try:
79
+ output_path.write_text(report.markdown_report, encoding="utf-8")
80
+ except Exception as e:
81
+ logger.error("Failed to write report: %s", e)
82
+ print(f"Error: Could not write report — {e}")
83
+ sys.exit(1)
84
+
85
+ print("=" * 60)
86
+ print(f"Report saved to: {output_path}")
87
+ print(f"Overall Risk Score: {report.summary.overall_score}/10")
88
+ print(f"Critical: {report.summary.critical_count} | High: {report.summary.high_count} | "
89
+ f"Medium: {report.summary.medium_count} | Low: {report.summary.low_count}")
90
+ print()
91
+ print("Top 3 Actions Before Signing:")
92
+ for i, action in enumerate(report.top_3_actions, 1):
93
+ print(f" {i}. {action}")
94
+ print("=" * 60)
95
+
96
+
97
+ if __name__ == "__main__":
98
+ main()
models/__init__.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ClauseGuard data models."""
2
+
3
+ from clauseguard.models.clause import Clause, ClauseList, ClauseType
4
+ from clauseguard.models.findings import RecommendedAction, RiskFinding, ScoredClause, Severity
5
+ from clauseguard.models.report import ClauseReport, FinalReport, RiskSummary
6
+
7
+ __all__ = [
8
+ "Clause",
9
+ "ClauseList",
10
+ "ClauseReport",
11
+ "ClauseType",
12
+ "FinalReport",
13
+ "RecommendedAction",
14
+ "RiskFinding",
15
+ "RiskSummary",
16
+ "ScoredClause",
17
+ "Severity",
18
+ ]
models/clause.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Pydantic models for contract clauses."""
2
+
3
+ from enum import Enum
4
+ from typing import List, Optional
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+
9
+ class ClauseType(str, Enum):
10
+ """Enumeration of clause categories found in contracts."""
11
+
12
+ NDA = "NDA"
13
+ IP_ASSIGNMENT = "IP_ASSIGNMENT"
14
+ NON_COMPETE = "NON_COMPETE"
15
+ ARBITRATION = "ARBITRATION"
16
+ AUTO_RENEWAL = "AUTO_RENEWAL"
17
+ LIABILITY_CAP = "LIABILITY_CAP"
18
+ TERMINATION = "TERMINATION"
19
+ DATA_SHARING = "DATA_SHARING"
20
+ GOVERNING_LAW = "GOVERNING_LAW"
21
+ PAYMENT = "PAYMENT"
22
+ INDEMNIFICATION = "INDEMNIFICATION"
23
+ OTHER = "OTHER"
24
+
25
+
26
+ class Clause(BaseModel):
27
+ """A single clause extracted from a contract."""
28
+
29
+ id: int = Field(..., description="Unique clause identifier")
30
+ raw_text: str = Field(..., description="Original text of the clause")
31
+ plain_english: Optional[str] = Field(
32
+ None, description="Plain English translation of the clause"
33
+ )
34
+ clause_type: ClauseType = Field(
35
+ default=ClauseType.OTHER, description="Classified type of this clause"
36
+ )
37
+ section_heading: Optional[str] = Field(
38
+ None, description="Detected section heading for this clause"
39
+ )
40
+ position: int = Field(
41
+ ..., description="Sequential position of the clause in the document"
42
+ )
43
+ confidence_score: Optional[float] = Field(
44
+ None,
45
+ ge=0.0,
46
+ le=1.0,
47
+ description="Classifier confidence score (0.0 to 1.0) for the assigned clause type",
48
+ )
49
+
50
+
51
+ class ClauseList(BaseModel):
52
+ """A collection of clauses extracted from a contract."""
53
+
54
+ clauses: List[Clause] = Field(
55
+ default_factory=list, description="List of extracted clauses"
56
+ )
57
+ contract_type: str = Field(
58
+ default="Other", description="Detected overall contract type"
59
+ )
60
+ total_clauses: int = Field(0, description="Total number of clauses extracted")
models/findings.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Pydantic models for risk findings and scored clauses."""
2
+
3
+ from enum import Enum
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+ from clauseguard.models.clause import Clause
8
+
9
+
10
+ class Severity(str, Enum):
11
+ """Enumeration of risk severity levels."""
12
+
13
+ CRITICAL = "CRITICAL"
14
+ HIGH = "HIGH"
15
+ MEDIUM = "MEDIUM"
16
+ LOW = "LOW"
17
+ INFO = "INFO"
18
+
19
+
20
+ class RecommendedAction(BaseModel):
21
+ """A specific, actionable recommendation for a clause."""
22
+
23
+ action: str = Field(..., description="What to do (e.g., negotiate, remove, clarify)")
24
+ sample_counter_language: str = Field(
25
+ default="", description="Suggested alternative language for negotiation"
26
+ )
27
+ priority: int = Field(
28
+ default=1,
29
+ ge=1,
30
+ le=3,
31
+ description="Action priority: 1=do immediately, 2=strongly recommend, 3=consider",
32
+ )
33
+
34
+
35
+ class RiskFinding(BaseModel):
36
+ """A risk finding associated with a specific clause."""
37
+
38
+ clause_id: int = Field(..., description="The ID of the clause this finding relates to")
39
+ severity: Severity = Field(..., description="Severity level of the risk")
40
+ risk_title: str = Field(..., description="Short title describing the risk")
41
+ risk_reason: str = Field(
42
+ ..., description="Detailed explanation citing what the clause actually says"
43
+ )
44
+ recommended_action: str = Field(
45
+ default="", description="Specific, actionable recommendation"
46
+ )
47
+ negotiation_tip: str = Field(
48
+ default="",
49
+ description="Suggested counter-language or negotiation approach for this clause",
50
+ )
51
+ safer_clause_version: str = Field(
52
+ default="",
53
+ description="A rewritten, safer version of the clause to propose",
54
+ )
55
+ negotiation_message: str = Field(
56
+ default="",
57
+ description="Email-style message the user can copy-paste to request the change",
58
+ )
59
+ impact_scenarios: list[str] = Field(
60
+ default_factory=list,
61
+ description="2-3 realistic consequences if the user signs this clause as-is",
62
+ )
63
+
64
+
65
+ class ScoredClause(BaseModel):
66
+ """A clause paired with its risk finding."""
67
+
68
+ clause: Clause = Field(..., description="The original clause")
69
+ finding: RiskFinding = Field(..., description="The associated risk finding")
models/report.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Pydantic models for the final risk report."""
2
+
3
+ from datetime import datetime
4
+ from typing import List, Optional
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+ from clauseguard.models.findings import ScoredClause
9
+
10
+
11
+ class ClauseReport(BaseModel):
12
+ """Per-clause analysis record within the final report."""
13
+
14
+ clause_number: int = Field(..., description="1-based index of clause in report")
15
+ clause_type: str = Field(..., description="Classified type of this clause")
16
+ severity: str = Field(..., description="Severity rating")
17
+ severity_emoji: str = Field(..., description="Emoji for severity level")
18
+ raw_text: str = Field(..., description="Original clause text")
19
+ plain_english: str = Field("", description="Plain English translation")
20
+ risk_title: str = Field(..., description="Risk title")
21
+ risk_reason: str = Field(..., description="Why this is a risk")
22
+ recommended_action: str = Field("", description="What to do about it")
23
+ negotiation_tip: Optional[str] = Field(
24
+ None, description="Suggested counter-language for negotiation"
25
+ )
26
+
27
+
28
+ class RiskSummary(BaseModel):
29
+ """Summary statistics for a risk report."""
30
+
31
+ total_clauses: int = Field(0, description="Total number of clauses analyzed")
32
+ critical_count: int = Field(0, description="Number of CRITICAL findings")
33
+ high_count: int = Field(0, description="Number of HIGH findings")
34
+ medium_count: int = Field(0, description="Number of MEDIUM findings")
35
+ low_count: int = Field(0, description="Number of LOW findings")
36
+ overall_score: float = Field(
37
+ 0.0, description="Overall risk score from 0 to 10 (10 = most risky)"
38
+ )
39
+ contract_type: str = Field(
40
+ "Other", description="The detected type of contract"
41
+ )
42
+
43
+
44
+ class FinalReport(BaseModel):
45
+ """The complete ClauseGuard risk analysis report."""
46
+
47
+ contract_name: str = Field(..., description="Name of the analyzed contract file")
48
+ generated_at: datetime = Field(
49
+ default_factory=datetime.now, description="Timestamp when the report was generated"
50
+ )
51
+ summary: RiskSummary = Field(
52
+ default_factory=RiskSummary, description="Risk summary statistics"
53
+ )
54
+ top_3_actions: List[str] = Field(
55
+ default_factory=list, description="Top 3 recommended actions before signing"
56
+ )
57
+ scored_clauses: List[ScoredClause] = Field(
58
+ default_factory=list, description="All scored clauses ordered by severity"
59
+ )
60
+ markdown_report: str = Field(
61
+ "", description="The full report formatted as markdown"
62
+ )
63
+ processed_normally: bool = Field(
64
+ True,
65
+ description="False if the pipeline was truncated or ran with partial data",
66
+ )
67
+ truncation_note: str = Field(
68
+ "", description="Note about truncation if contract exceeded clause limit"
69
+ )
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ openai-agents>=0.1.0
2
+ openai>=1.0.0
3
+ pydantic>=2.0.0
4
+ streamlit>=1.30.0
5
+ PyMuPDF>=1.23.0
6
+ pdfplumber>=0.9.0
7
+ python-docx>=1.0.0
8
+ python-dotenv>=1.0.0
9
+ chardet>=5.0.0
10
+ pytest>=7.0.0
11
+ pytest-asyncio>=0.21.0
12
+ Pillow>=10.0.0
sample_contracts/sample_employment.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ EMPLOYMENT AGREEMENT
2
+
3
+ 1. POSITION AND DUTIES. The Company hereby employs Employee as a Senior Software Engineer. Employee shall perform all duties and responsibilities as may be assigned by the Company from time to time. Employee agrees to devote their full working time and attention to the business of the Company.
4
+
5
+ 2. COMPENSATION. The Company shall pay Employee an annual salary of $120,000, payable in equal bi-weekly installments. Employee shall be eligible for an annual performance bonus at the discretion of the Company. All compensation is subject to applicable tax withholdings.
6
+
7
+ 3. AT-WILL EMPLOYMENT. Employee's employment with the Company is at-will, meaning that either the Company or Employee may terminate the employment relationship at any time, with or without cause or advance notice. The Company reserves the right to modify Employee's duties, compensation, and benefits at its sole discretion at any time.
8
+
9
+ 4. NON-COMPETITION. For a period of two (2) years following the termination of Employee's employment for any reason, Employee shall not, directly or indirectly, engage in, own, or be employed by any business that is competitive with the Company, within the United States of America. This restriction applies regardless of the geographic location of Employee's work or the Company's actual business operations.
10
+
11
+ 5. INTELLECTUAL PROPERTY. Employee agrees that all inventions, works of authorship, trade secrets, and other intellectual property created by Employee, whether during working hours or on Employee's own time, and whether using Company equipment or Employee's personal equipment, shall be the sole and exclusive property of the Company. Employee hereby irrevocably assigns all such intellectual property to the Company. This assignment does not have any geographic or temporal limitations.
12
+
13
+ 6. DISPUTE RESOLUTION. Any and all disputes arising out of Employee's employment or the termination thereof shall be resolved exclusively through binding arbitration. Employee expressly waives any right to bring a lawsuit in court and waives the right to a jury trial. Employee further waives the right to participate in or bring any class action or collective action against the Company, and agrees that all claims must be brought in Employee's individual capacity.
14
+
15
+ 7. NON-DISPARAGEMENT. Employee agrees not to make any statements, whether written or oral, that disparage, criticize, or negatively reflect upon the Company, its products, services, officers, directors, or employees. This obligation survives the termination of employment indefinitely.
16
+
17
+ 8. BENEFITS. Employee shall be eligible to participate in the Company's health insurance, retirement, and other benefit plans as they may exist from time to time. The Company reserves the right to modify, amend, or terminate any such benefit plans at any time and for any reason, without prior notice to Employee.
18
+
19
+ 9. CONFIDENTIALITY. Employee shall maintain the confidentiality of all Company trade secrets, customer information, business strategies, and other proprietary information both during and after employment.
20
+
21
+ 10. GOVERNING LAW AND SEVERABILITY. This Agreement shall be governed by the laws of the State of Delaware. If any provision is found unenforceable, the remaining provisions shall remain in effect.
sample_contracts/sample_freelance.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FREELANCE SERVICES AGREEMENT
2
+
3
+ 1. SERVICES. Freelancer agrees to provide web development services to Client as described in the attached Statement of Work. Freelancer shall perform the services in a professional and workmanlike manner in accordance with industry standards.
4
+
5
+ 2. PAYMENT TERMS. Client shall pay Freelancer at the rate of $100 per hour. Invoices shall be submitted monthly, and Client shall pay all undisputed invoices within sixty (60) calendar days of receipt. Late payments shall accrue interest at the rate of 1% per month.
6
+
7
+ 3. OWNERSHIP OF WORK PRODUCT. All work product, deliverables, designs, code, documentation, and other materials created by Freelancer in connection with this Agreement (collectively, the "Work Product") shall be the sole and exclusive property of Client. Freelancer hereby assigns to Client all right, title, and interest in and to the Work Product, including all intellectual property rights. For the avoidance of doubt, Client shall also own any pre-existing tools, libraries, frameworks, code snippets, templates, or other materials that Freelancer incorporates into the Work Product, even if such materials were created by Freelancer prior to this Agreement.
8
+
9
+ 4. REVISIONS AND MODIFICATIONS. Client shall be entitled to request unlimited revisions to the Work Product at any time. Freelancer shall complete all requested revisions at no additional charge and within the timeframe reasonably specified by Client. Freelancer may not deny any revision request.
10
+
11
+ 5. NON-COMPETITION. During the term of this Agreement and for a period of one (1) year after its termination, Freelancer shall not provide web development or any related services to any business that offers products or services similar to Client's business, as determined by Client in its sole discretion.
12
+
13
+ 6. TERMINATION. Client may terminate this Agreement at any time for any reason or for no reason by providing written notice to Freelancer. Upon termination for convenience, Freelancer shall not be entitled to any kill fee, termination fee, or compensation beyond payment for hours actually worked through the date of termination. Freelancer may only terminate this Agreement for material breach by Client that remains uncured for thirty (30) days.
14
+
15
+ 7. GOVERNING LAW. This Agreement shall be governed by the laws of the State of California. Any legal action arising from this Agreement shall be brought exclusively in the courts located in Los Angeles County, California.
16
+
17
+ 8. INDEMNIFICATION. Freelancer agrees to indemnify, defend, and hold harmless Client from and against any and all claims, damages, losses, and expenses arising out of or related to Freelancer's performance of the services, including any claims that the Work Product infringes third-party intellectual property rights.
sample_contracts/sample_nda.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NON-DISCLOSURE AND CONFIDENTIALITY AGREEMENT
2
+
3
+ 1. PARTIES. This Non-Disclosure and Confidentiality Agreement (the "Agreement") is entered into by and between Acme Corp, a Delaware corporation ("Company"), and John Doe ("Recipient"), effective as of January 1, 2024.
4
+
5
+ 2. DEFINITION OF CONFIDENTIAL INFORMATION. "Confidential Information" shall mean any and all information disclosed by Company to Recipient, whether in written, oral, electronic, or other form, including but not limited to: trade secrets, business plans, customer lists, financial data, product designs, source code, marketing strategies, employee information, and any other information that a reasonable person would understand to be confidential. Confidential Information also includes all information conveyed verbally that is identified as confidential at the time of disclosure, whether or not subsequently reduced to writing.
6
+
7
+ 3. CONFIDENTIALITY OBLIGATIONS. Recipient agrees to hold all Confidential Information in strict confidence and shall not disclose, copy, distribute, or use any Confidential Information for any purpose other than as expressly authorized by Company in writing. Recipient shall use at least the same degree of care to protect the Confidential Information as it uses to protect its own confidential information of like nature, but in no event less than reasonable care.
8
+
9
+ 4. NON-COMPETITION. For a period of eighteen (18) months following the termination of this Agreement, Recipient shall not, directly or indirectly, engage in, own, manage, operate, control, be employed by, consult for, or otherwise provide services to any business that is competitive with the business of Company, anywhere in the world.
10
+
11
+ 5. INTELLECTUAL PROPERTY ASSIGNMENT. Recipient hereby irrevocably assigns and agrees to assign to Company all right, title, and interest in and to any and all inventions, discoveries, improvements, works of authorship, trade secrets, and other intellectual property conceived, created, or reduced to practice by Recipient, whether alone or with others, during the term of this Agreement and for a period of one (1) year after its termination, regardless of whether such intellectual property was created on Recipient's own time or using Recipient's own equipment.
12
+
13
+ 6. DISPUTE RESOLUTION AND ARBITRATION. Any dispute, claim, or controversy arising out of or relating to this Agreement shall be resolved exclusively through binding arbitration administered by the American Arbitration Association in accordance with its Commercial Arbitration Rules. The parties expressly waive any right to a trial by jury and waive the right to participate in any class action or representative proceeding.
14
+
15
+ 7. GOVERNING LAW. This Agreement shall be governed by and construed in accordance with the laws of the State of New York, without regard to its conflict of laws principles. The parties agree that any legal action shall be brought exclusively in the federal or state courts located in New York County, New York.
16
+
17
+ 8. TERM AND AUTO-RENEWAL. This Agreement shall commence on the Effective Date and shall continue for an initial term of one (1) year. Thereafter, this Agreement shall automatically renew for successive one-year terms unless either party provides written notice of non-renewal at least ninety (90) days prior to the end of the then-current term.
18
+
19
+ 9. SEVERABILITY. If any provision of this Agreement is held to be invalid or unenforceable, such provision shall be modified to the minimum extent necessary to make it enforceable, and the remaining provisions shall remain in full force and effect.
20
+
21
+ 10. ENTIRE AGREEMENT. This Agreement constitutes the entire agreement between the parties with respect to the subject matter hereof and supersedes all prior negotiations, discussions, and agreements, whether written or oral.
services/__init__.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ClauseGuard model services package."""
2
+
3
+ from clauseguard.services.model_service import (
4
+ call_model,
5
+ call_model_chat,
6
+ clean_json_response,
7
+ get_client,
8
+ reset_client,
9
+ )
10
+
11
+ __all__ = [
12
+ "call_model",
13
+ "call_model_chat",
14
+ "clean_json_response",
15
+ "get_client",
16
+ "reset_client",
17
+ ]
services/model_service.py ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Model service layer — unified Qwen/vLLM inference via OpenAI-compatible API.
2
+
3
+ Provides a single shared client and reusable inference functions for all
4
+ ClauseGuard agents and the copilot. Handles retries, timeouts, JSON cleaning,
5
+ and graceful error recovery.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import asyncio
11
+ import json
12
+ import logging
13
+ from typing import Any, Dict, List
14
+
15
+ from openai import AsyncOpenAI, OpenAI
16
+
17
+ from clauseguard.config.settings import (
18
+ API_KEY,
19
+ BASE_URL,
20
+ MAX_TOKENS,
21
+ MODEL_NAME,
22
+ TEMPERATURE,
23
+ TIMEOUT_SECONDS,
24
+ )
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ _async_client: AsyncOpenAI | None = None
29
+ _sync_client: OpenAI | None = None
30
+
31
+
32
+ def get_client() -> AsyncOpenAI:
33
+ """Return the shared AsyncOpenAI client (lazy singleton)."""
34
+ global _async_client
35
+ if _async_client is None:
36
+ _async_client = AsyncOpenAI(api_key=API_KEY, base_url=BASE_URL)
37
+ return _async_client
38
+
39
+
40
+ def get_sync_client() -> OpenAI:
41
+ """Return the shared synchronous OpenAI client (lazy singleton)."""
42
+ global _sync_client
43
+ if _sync_client is None:
44
+ _sync_client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
45
+ return _sync_client
46
+
47
+
48
+ def reset_client() -> None:
49
+ """Reset the shared clients — useful for testing or config changes."""
50
+ global _async_client, _sync_client
51
+ _async_client = None
52
+ _sync_client = None
53
+
54
+
55
+ def clean_json_response(content: str) -> str:
56
+ """Strip markdown fences and leading/trailing non-JSON text from LLM output."""
57
+ content = content.strip()
58
+ if content.startswith("```json"):
59
+ content = content[7:]
60
+ elif content.startswith("```"):
61
+ content = content[3:]
62
+ if content.endswith("```"):
63
+ content = content[:-3]
64
+ return content.strip()
65
+
66
+
67
+ async def call_model(
68
+ system_prompt: str,
69
+ user_prompt: str,
70
+ *,
71
+ agent_name: str = "Agent",
72
+ temperature: float | None = None,
73
+ max_tokens: int | None = None,
74
+ timeout: int | None = None,
75
+ max_retries: int = 1,
76
+ validate_json: bool = True,
77
+ ) -> str | None:
78
+ """Call the Qwen model with retry, timeout, and JSON validation.
79
+
80
+ Args:
81
+ system_prompt: The system-level instruction.
82
+ user_prompt: The user-level query.
83
+ agent_name: Label used in log messages.
84
+ temperature: Sampling temperature (defaults to config TEMPERATURE).
85
+ max_tokens: Max tokens for the response (defaults to config MAX_TOKENS).
86
+ timeout: Per-call timeout in seconds (defaults to config TIMEOUT_SECONDS).
87
+ max_retries: Number of additional retries on JSON parse failure.
88
+ validate_json: Whether to validate the response as valid JSON.
89
+
90
+ Returns:
91
+ The model's raw text response, or None if all attempts fail.
92
+ """
93
+ client = get_client()
94
+ temp = temperature if temperature is not None else TEMPERATURE
95
+ mt = max_tokens if max_tokens is not None else MAX_TOKENS
96
+ tout = timeout if timeout is not None else TIMEOUT_SECONDS
97
+
98
+ last_error: str | None = None
99
+ for attempt in range(max_retries + 1):
100
+ try:
101
+ response = await asyncio.wait_for(
102
+ client.chat.completions.create(
103
+ model=MODEL_NAME,
104
+ messages=[
105
+ {"role": "system", "content": system_prompt},
106
+ {"role": "user", "content": user_prompt},
107
+ ],
108
+ temperature=temp,
109
+ max_tokens=mt,
110
+ ),
111
+ timeout=tout,
112
+ )
113
+ content = response.choices[0].message.content or ""
114
+ logger.info("%s received %d chars in %d attempt(s)", agent_name, len(content), attempt + 1)
115
+
116
+ if validate_json:
117
+ cleaned = clean_json_response(content)
118
+ if not cleaned or not cleaned.strip():
119
+ raise ValueError("Empty response")
120
+ json.loads(cleaned)
121
+ logger.info("%s produced valid JSON", agent_name)
122
+ return content
123
+
124
+ except json.JSONDecodeError as e:
125
+ last_error = str(e)
126
+ preview = content[:200] if 'content' in dir() else "(no content)"
127
+ logger.warning("%s returned malformed JSON (attempt %d): %s | preview: %s", agent_name, attempt + 1, e, preview)
128
+ if attempt < max_retries:
129
+ logger.warning("%s returned malformed JSON, retrying...", agent_name)
130
+ user_prompt += "\n\nIMPORTANT: Output ONLY raw JSON. No markdown, no explanation."
131
+ except ValueError as e:
132
+ last_error = str(e)
133
+ if attempt < max_retries:
134
+ logger.warning("%s returned empty response, retrying...", agent_name)
135
+ except asyncio.TimeoutError:
136
+ logger.error("%s agent timed out after %ds", agent_name, tout)
137
+ return None
138
+ except Exception as e:
139
+ logger.error("%s agent failed: %s", agent_name, e)
140
+ return None
141
+
142
+ logger.error("%s failed to produce valid JSON: %s", agent_name, last_error)
143
+ return None
144
+
145
+
146
+ async def call_model_chat(
147
+ messages: List[Dict[str, str]],
148
+ *,
149
+ temperature: float | None = None,
150
+ max_tokens: int | None = None,
151
+ timeout: int = 60,
152
+ ) -> str:
153
+ """Call the Qwen model for chat (multi-turn conversation).
154
+
155
+ Args:
156
+ messages: Full message list (system + history + user).
157
+ temperature: Sampling temperature.
158
+ max_tokens: Max tokens for the response.
159
+ timeout: Per-call timeout in seconds.
160
+
161
+ Returns:
162
+ The assistant's text response, or a friendly error message.
163
+ """
164
+ client = get_client()
165
+ temp = temperature if temperature is not None else TEMPERATURE
166
+ mt = max_tokens if max_tokens is not None else MAX_TOKENS
167
+
168
+ try:
169
+ response = await asyncio.wait_for(
170
+ client.chat.completions.create(
171
+ model=MODEL_NAME,
172
+ messages=messages,
173
+ temperature=temp,
174
+ max_tokens=mt,
175
+ ),
176
+ timeout=timeout,
177
+ )
178
+ content = response.choices[0].message.content
179
+ return content or "I'm sorry, I couldn't generate a response. Please try again."
180
+ except asyncio.TimeoutError:
181
+ logger.error("Chat call timed out after %ds", timeout)
182
+ return "I'm sorry, the request timed out. Please try a shorter question or try again."
183
+ except Exception as e:
184
+ logger.error("Chat call failed: %s", e)
185
+ return f"I'm sorry, something went wrong: {e}"
186
+
187
+
188
+ # ── Synchronous wrappers for use in Streamlit callbacks ──
189
+
190
+
191
+ def call_model_chat_sync(
192
+ messages: List[Dict[str, str]],
193
+ *,
194
+ temperature: float | None = None,
195
+ max_tokens: int | None = None,
196
+ timeout: int = 60,
197
+ ) -> str:
198
+ """Synchronous wrapper around call_model_chat for Streamlit callbacks."""
199
+ try:
200
+ loop = asyncio.new_event_loop()
201
+ asyncio.set_event_loop(loop)
202
+ try:
203
+ result = loop.run_until_complete(
204
+ call_model_chat(messages, temperature=temperature, max_tokens=max_tokens, timeout=timeout)
205
+ )
206
+ finally:
207
+ loop.close()
208
+ return result
209
+ except Exception as e:
210
+ logger.error("call_model_chat_sync failed: %s", e)
211
+ return f"Sorry, an unexpected error occurred: {e}"
212
+
213
+
214
+ # ── Higher-level domain functions ──
215
+
216
+
217
+ async def analyze_clause(
218
+ clause_text: str,
219
+ clause_type: str = "",
220
+ additional_context: str = "",
221
+ system_prompt: str = "",
222
+ user_prompt_template: str = "",
223
+ agent_name: str = "Analyzer",
224
+ ) -> str | None:
225
+ """Analyze a single clause — used by pipeline agents.
226
+
227
+ Args:
228
+ clause_text: The clause raw text to analyze.
229
+ clause_type: Optional pre-classified clause type.
230
+ additional_context: Additional context to append.
231
+ system_prompt: The agent-specific system prompt.
232
+ user_prompt_template: A template string for the user prompt.
233
+ agent_name: Label for logging.
234
+
235
+ Returns:
236
+ Raw response string or None.
237
+ """
238
+ user_prompt = user_prompt_template.format(
239
+ clause_text=clause_text,
240
+ clause_type=clause_type,
241
+ context=additional_context,
242
+ ) if user_prompt_template else clause_text
243
+
244
+ return await call_model(
245
+ system_prompt=system_prompt,
246
+ user_prompt=user_prompt,
247
+ agent_name=agent_name,
248
+ )
249
+
250
+
251
+ async def generate_negotiation_message(
252
+ clause_text: str,
253
+ risk_reason: str,
254
+ safer_version: str = "",
255
+ ) -> str:
256
+ """Generate a professional negotiation message for a risky clause."""
257
+ system = (
258
+ "You are a professional contract negotiator. Write a short, polite email "
259
+ "message requesting a change to a contract clause. Keep it professional, "
260
+ "concise, and non-confrontational. Maximum 4-5 sentences."
261
+ )
262
+ user = (
263
+ f"The risky clause is:\n\"{clause_text}\"\n\n"
264
+ f"Why it's risky:\n{risk_reason}\n\n"
265
+ )
266
+ if safer_version:
267
+ user += f"Suggested safer version:\n\"{safer_version}\"\n\n"
268
+ user += "Write a single email-style negotiation message requesting a fair revision."
269
+
270
+ result = await call_model(
271
+ system_prompt=system,
272
+ user_prompt=user,
273
+ agent_name="NegotiationGenerator",
274
+ validate_json=False,
275
+ )
276
+ return result or ""
277
+
278
+
279
+ async def contract_chat(
280
+ contract_context: str,
281
+ chat_history: List[Dict[str, str]],
282
+ user_message: str,
283
+ system_prompt: str,
284
+ timeout: int = 60,
285
+ ) -> str:
286
+ """Handle a contract chat conversation with full contract context.
287
+
288
+ Args:
289
+ contract_context: The formatted contract + analysis context.
290
+ chat_history: Previous messages (role/content dicts).
291
+ user_message: The user's new question.
292
+ system_prompt: The copilot system prompt.
293
+ timeout: Per-call timeout.
294
+
295
+ Returns:
296
+ Assistant response string.
297
+ """
298
+ full_system = f"{system_prompt}\n\n---\n\n## CONTRACT CONTEXT\n\n{contract_context}"
299
+
300
+ messages: List[Dict[str, str]] = [{"role": "system", "content": full_system}]
301
+ for msg in chat_history:
302
+ messages.append({"role": msg["role"], "content": msg["content"]})
303
+ messages.append({"role": "user", "content": user_message})
304
+
305
+ return await call_model_chat(messages, timeout=timeout)
tests/__init__.py ADDED
File without changes
tests/test_extractor.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for the Extractor agent."""
2
+
3
+ import json
4
+ import os
5
+ from pathlib import Path
6
+
7
+ import pytest
8
+
9
+ from clauseguard.agents.extractor import _parse_response, _validate_clause_list
10
+ from clauseguard.models.clause import Clause, ClauseList
11
+
12
+ SAMPLE_NDA_PATH = Path(__file__).parent.parent / "sample_contracts" / "sample_nda.txt"
13
+
14
+
15
+ def load_sample_nda() -> str:
16
+ """Load the sample NDA text file."""
17
+ with open(SAMPLE_NDA_PATH, "r", encoding="utf-8") as f:
18
+ return f.read()
19
+
20
+
21
+ def test_sample_nda_produces_at_least_6_clauses() -> None:
22
+ """Verify sample_nda.txt has enough content to produce 6+ clauses."""
23
+ text = load_sample_nda()
24
+ # The document has 10 numbered sections
25
+ assert len(text.split("\n")) > 20
26
+ # Each paragraph cluster represents a clause
27
+ from clauseguard.tools.clause_tools import split_into_clauses
28
+ clauses = split_into_clauses(text)
29
+ assert len(clauses) >= 6, f"Expected at least 6 clauses, got {len(clauses)}"
30
+
31
+
32
+ def test_short_text_raises_value_error() -> None:
33
+ """Test that a very short document (2 sentences) raises ValueError."""
34
+ mock_json = json.dumps({
35
+ "clauses": [
36
+ {
37
+ "id": 1,
38
+ "raw_text": "This is a short agreement.",
39
+ "plain_english": None,
40
+ "clause_type": "OTHER",
41
+ "section_heading": None,
42
+ "position": 1,
43
+ },
44
+ {
45
+ "id": 2,
46
+ "raw_text": "Parties agree to the above.",
47
+ "plain_english": None,
48
+ "clause_type": "OTHER",
49
+ "section_heading": None,
50
+ "position": 2,
51
+ },
52
+ ],
53
+ "contract_type": "Other",
54
+ "total_clauses": 2,
55
+ })
56
+
57
+ clause_list = _parse_response(mock_json)
58
+ with pytest.raises(ValueError, match="minimum 3 clauses required"):
59
+ _validate_clause_list(clause_list)
60
+
61
+
62
+ def test_output_matches_clause_list_schema() -> None:
63
+ """Test that parsed output matches the ClauseList Pydantic schema."""
64
+ mock_json = json.dumps({
65
+ "clauses": [
66
+ {
67
+ "id": 1,
68
+ "raw_text": "Employee shall maintain confidentiality of all trade secrets.",
69
+ "plain_english": None,
70
+ "clause_type": "OTHER",
71
+ "section_heading": "CONFIDENTIALITY",
72
+ "position": 1,
73
+ },
74
+ {
75
+ "id": 2,
76
+ "raw_text": "This Agreement is governed by Delaware law.",
77
+ "plain_english": None,
78
+ "clause_type": "OTHER",
79
+ "section_heading": "GOVERNING LAW",
80
+ "position": 2,
81
+ },
82
+ {
83
+ "id": 3,
84
+ "raw_text": "Either party may terminate for convenience.",
85
+ "plain_english": None,
86
+ "clause_type": "OTHER",
87
+ "section_heading": "TERMINATION",
88
+ "position": 3,
89
+ },
90
+ ],
91
+ "contract_type": "NDA",
92
+ "total_clauses": 3,
93
+ })
94
+
95
+ clause_list = _parse_response(mock_json)
96
+ assert isinstance(clause_list, ClauseList)
97
+ assert clause_list.total_clauses == 3
98
+ assert clause_list.contract_type == "NDA"
99
+ assert len(clause_list.clauses) == 3
100
+ assert all(isinstance(c, Clause) for c in clause_list.clauses)
101
+ assert all(c.id > 0 for c in clause_list.clauses)
102
+ assert all(c.raw_text for c in clause_list.clauses)
103
+
104
+
105
+ def test_parse_response_handles_list_input() -> None:
106
+ """Test that _parse_response handles both list and dict input formats."""
107
+ list_json = json.dumps([
108
+ {
109
+ "id": 1,
110
+ "raw_text": "Test clause one.",
111
+ "plain_english": None,
112
+ "clause_type": "OTHER",
113
+ "section_heading": None,
114
+ "position": 1,
115
+ },
116
+ {
117
+ "id": 2,
118
+ "raw_text": "Test clause two.",
119
+ "plain_english": None,
120
+ "clause_type": "OTHER",
121
+ "section_heading": None,
122
+ "position": 2,
123
+ },
124
+ {
125
+ "id": 3,
126
+ "raw_text": "Test clause three.",
127
+ "plain_english": None,
128
+ "clause_type": "OTHER",
129
+ "section_heading": None,
130
+ "position": 3,
131
+ },
132
+ ])
133
+
134
+ clause_list = _parse_response(list_json)
135
+ assert clause_list.total_clauses == 3
136
+
137
+
138
+ def test_parse_response_handles_markdown_fences() -> None:
139
+ """Test that markdown code fences are stripped from responses."""
140
+ wrapped_json = '```json\n{\n "clauses": [\n {"id": 1, "raw_text": "Test one.", "plain_english": null, "clause_type": "OTHER", "section_heading": null, "position": 1},\n {"id": 2, "raw_text": "Test two.", "plain_english": null, "clause_type": "OTHER", "section_heading": null, "position": 2},\n {"id": 3, "raw_text": "Test three.", "plain_english": null, "clause_type": "OTHER", "section_heading": null, "position": 3}\n ],\n "contract_type": "Other",\n "total_clauses": 3\n}\n```'
141
+
142
+ clause_list = _parse_response(wrapped_json)
143
+ assert clause_list.total_clauses == 3
144
+ assert clause_list.clauses[0].raw_text == "Test one."
tests/test_pipeline.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Integration tests for the full 5-agent pipeline."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from unittest.mock import patch
6
+
7
+ import pytest
8
+
9
+ from clauseguard.agents.orchestrator import run_pipeline
10
+ from clauseguard.models.findings import Severity
11
+ from clauseguard.models.report import FinalReport
12
+
13
+ SAMPLE_NDA_PATH = Path(__file__).parent.parent / "sample_contracts" / "sample_nda.txt"
14
+
15
+
16
+ def load_sample_nda() -> str:
17
+ with open(SAMPLE_NDA_PATH, "r", encoding="utf-8") as f:
18
+ return f.read()
19
+
20
+
21
+ def _mock_extract_response() -> str:
22
+ return json.dumps({
23
+ "clauses": [
24
+ {"id": 1, "raw_text": "Confidential Information shall mean any and all information disclosed.", "plain_english": None, "clause_type": "OTHER", "section_heading": "DEFINITION", "position": 1},
25
+ {"id": 2, "raw_text": "Recipient agrees to hold all Confidential Information in strict confidence.", "plain_english": None, "clause_type": "OTHER", "section_heading": "CONFIDENTIALITY", "position": 2},
26
+ {"id": 3, "raw_text": "For 18 months, Recipient shall not compete anywhere in the world.", "plain_english": None, "clause_type": "OTHER", "section_heading": "NON-COMPETE", "position": 3},
27
+ {"id": 4, "raw_text": "Recipient assigns all inventions including those on personal time and equipment for 1 year after.", "plain_english": None, "clause_type": "OTHER", "section_heading": "IP ASSIGNMENT", "position": 4},
28
+ {"id": 5, "raw_text": "All disputes resolved by binding arbitration, waives jury trial.", "plain_english": None, "clause_type": "OTHER", "section_heading": "ARBITRATION", "position": 5},
29
+ {"id": 6, "raw_text": "This Agreement governed by New York law.", "plain_english": None, "clause_type": "OTHER", "section_heading": "GOVERNING LAW", "position": 6},
30
+ {"id": 7, "raw_text": "Auto-renews for 1-year terms unless 90 days notice.", "plain_english": None, "clause_type": "OTHER", "section_heading": "AUTO-RENEWAL", "position": 7},
31
+ {"id": 8, "raw_text": "If any provision is invalid, the rest remains in effect.", "plain_english": None, "clause_type": "OTHER", "section_heading": "SEVERABILITY", "position": 8},
32
+ ],
33
+ "contract_type": "Other",
34
+ "total_clauses": 8,
35
+ })
36
+
37
+
38
+ def _mock_classify_response() -> str:
39
+ return json.dumps({
40
+ "clauses": [
41
+ {"id": 1, "raw_text": "Confidential Information shall mean any and all information disclosed.", "plain_english": None, "clause_type": "NDA", "section_heading": "DEFINITION", "position": 1},
42
+ {"id": 2, "raw_text": "Recipient agrees to hold all Confidential Information in strict confidence.", "plain_english": None, "clause_type": "NDA", "section_heading": "CONFIDENTIALITY", "position": 2},
43
+ {"id": 3, "raw_text": "For 18 months, Recipient shall not compete anywhere in the world.", "plain_english": None, "clause_type": "NON_COMPETE", "section_heading": "NON-COMPETE", "position": 3},
44
+ {"id": 4, "raw_text": "Recipient assigns all inventions including those on personal time and equipment for 1 year after.", "plain_english": None, "clause_type": "IP_ASSIGNMENT", "section_heading": "IP ASSIGNMENT", "position": 4},
45
+ {"id": 5, "raw_text": "All disputes resolved by binding arbitration, waives jury trial.", "plain_english": None, "clause_type": "ARBITRATION", "section_heading": "ARBITRATION", "position": 5},
46
+ {"id": 6, "raw_text": "This Agreement governed by New York law.", "plain_english": None, "clause_type": "GOVERNING_LAW", "section_heading": "GOVERNING LAW", "position": 6},
47
+ {"id": 7, "raw_text": "Auto-renews for 1-year terms unless 90 days notice.", "plain_english": None, "clause_type": "AUTO_RENEWAL", "section_heading": "AUTO-RENEWAL", "position": 7},
48
+ {"id": 8, "raw_text": "If any provision is invalid, the rest remains in effect.", "plain_english": None, "clause_type": "OTHER", "section_heading": "SEVERABILITY", "position": 8},
49
+ ],
50
+ "contract_type": "NDA",
51
+ "total_clauses": 8,
52
+ })
53
+
54
+
55
+ def _mock_score_response() -> str:
56
+ return json.dumps([
57
+ {"clause": {"id": 1, "raw_text": "Confidential Information shall mean any and all information disclosed.", "plain_english": None, "clause_type": "NDA", "section_heading": "DEFINITION", "position": 1}, "finding": {"clause_id": 1, "severity": "INFO", "risk_title": "Broad Definition", "risk_reason": "Standard.", "recommended_action": ""}},
58
+ {"clause": {"id": 2, "raw_text": "Recipient agrees to hold all Confidential Information in strict confidence.", "plain_english": None, "clause_type": "NDA", "section_heading": "CONFIDENTIALITY", "position": 2}, "finding": {"clause_id": 2, "severity": "LOW", "risk_title": "Standard Confidentiality", "risk_reason": "Standard.", "recommended_action": ""}},
59
+ {"clause": {"id": 3, "raw_text": "For 18 months, Recipient shall not compete anywhere in the world.", "plain_english": None, "clause_type": "NON_COMPETE", "section_heading": "NON-COMPETE", "position": 3}, "finding": {"clause_id": 3, "severity": "HIGH", "risk_title": "Global Non-Compete", "risk_reason": "Worldwide scope.", "recommended_action": ""}},
60
+ {"clause": {"id": 4, "raw_text": "Recipient assigns all inventions including those on personal time and equipment for 1 year after.", "plain_english": None, "clause_type": "IP_ASSIGNMENT", "section_heading": "IP ASSIGNMENT", "position": 4}, "finding": {"clause_id": 4, "severity": "CRITICAL", "risk_title": "IP Assignment of Personal Work", "risk_reason": "Assigns all IP.", "recommended_action": ""}},
61
+ {"clause": {"id": 5, "raw_text": "All disputes resolved by binding arbitration, waives jury trial.", "plain_english": None, "clause_type": "ARBITRATION", "section_heading": "ARBITRATION", "position": 5}, "finding": {"clause_id": 5, "severity": "HIGH", "risk_title": "Mandatory Arbitration", "risk_reason": "Mandatory arbitration.", "recommended_action": ""}},
62
+ {"clause": {"id": 6, "raw_text": "This Agreement governed by New York law.", "plain_english": None, "clause_type": "GOVERNING_LAW", "section_heading": "GOVERNING LAW", "position": 6}, "finding": {"clause_id": 6, "severity": "LOW", "risk_title": "Standard Governing Law", "risk_reason": "Standard NY law.", "recommended_action": ""}},
63
+ {"clause": {"id": 7, "raw_text": "Auto-renews for 1-year terms unless 90 days notice.", "plain_english": None, "clause_type": "AUTO_RENEWAL", "section_heading": "AUTO-RENEWAL", "position": 7}, "finding": {"clause_id": 7, "severity": "MEDIUM", "risk_title": "Auto-Renewal", "risk_reason": "90-day notice.", "recommended_action": ""}},
64
+ {"clause": {"id": 8, "raw_text": "If any provision is invalid, the rest remains in effect.", "plain_english": None, "clause_type": "OTHER", "section_heading": "SEVERABILITY", "position": 8}, "finding": {"clause_id": 8, "severity": "INFO", "risk_title": "Standard Severability", "risk_reason": "Standard.", "recommended_action": ""}},
65
+ ])
66
+
67
+
68
+ def _mock_translate_response() -> str:
69
+ return json.dumps([
70
+ {"clause": {"id": 1, "raw_text": "Confidential Information shall mean any and all information disclosed.", "clause_type": "NDA", "section_heading": "DEFINITION", "position": 1, "plain_english": "Defines confidential info."}, "finding": {"clause_id": 1, "severity": "INFO", "risk_title": "Broad Definition", "risk_reason": "Standard.", "recommended_action": "No action."}},
71
+ {"clause": {"id": 2, "raw_text": "Recipient agrees to hold all Confidential Information in strict confidence.", "clause_type": "NDA", "section_heading": "CONFIDENTIALITY", "position": 2, "plain_english": "Keep info secret."}, "finding": {"clause_id": 2, "severity": "LOW", "risk_title": "Standard Confidentiality", "risk_reason": "Standard.", "recommended_action": "No action."}},
72
+ {"clause": {"id": 3, "raw_text": "For 18 months, Recipient shall not compete anywhere in the world.", "clause_type": "NON_COMPETE", "section_heading": "NON-COMPETE", "position": 3, "plain_english": "No competing worldwide for 18 months."}, "finding": {"clause_id": 3, "severity": "HIGH", "risk_title": "Global Non-Compete", "risk_reason": "Worldwide.", "recommended_action": "Reduce scope."}},
73
+ {"clause": {"id": 4, "raw_text": "Recipient assigns all inventions including those on personal time and equipment for 1 year after.", "clause_type": "IP_ASSIGNMENT", "section_heading": "IP ASSIGNMENT", "position": 4, "plain_english": "You give all inventions to company."}, "finding": {"clause_id": 4, "severity": "CRITICAL", "risk_title": "IP Assignment of Personal Work", "risk_reason": "Assigns all IP.", "recommended_action": "Add carve-out."}},
74
+ {"clause": {"id": 5, "raw_text": "All disputes resolved by binding arbitration, waives jury trial.", "clause_type": "ARBITRATION", "section_heading": "ARBITRATION", "position": 5, "plain_english": "Must use arbitration."}, "finding": {"clause_id": 5, "severity": "HIGH", "risk_title": "Mandatory Arbitration", "risk_reason": "Mandatory.", "recommended_action": "Add opt-out."}},
75
+ {"clause": {"id": 6, "raw_text": "This Agreement governed by New York law.", "clause_type": "GOVERNING_LAW", "section_heading": "GOVERNING LAW", "position": 6, "plain_english": "NY law applies."}, "finding": {"clause_id": 6, "severity": "LOW", "risk_title": "Standard Governing Law", "risk_reason": "Standard.", "recommended_action": "No action."}},
76
+ {"clause": {"id": 7, "raw_text": "Auto-renews for 1-year terms unless 90 days notice.", "clause_type": "AUTO_RENEWAL", "section_heading": "AUTO-RENEWAL", "position": 7, "plain_english": "Auto-renews yearly."}, "finding": {"clause_id": 7, "severity": "MEDIUM", "risk_title": "Auto-Renewal", "risk_reason": "Auto.", "recommended_action": "Track."}},
77
+ {"clause": {"id": 8, "raw_text": "If any provision is invalid, the rest remains in effect.", "clause_type": "OTHER", "section_heading": "SEVERABILITY", "position": 8, "plain_english": "Invalid parts don't invalidate rest."}, "finding": {"clause_id": 8, "severity": "INFO", "risk_title": "Standard Severability", "risk_reason": "Standard.", "recommended_action": "No action."}},
78
+ ])
79
+
80
+
81
+ _MOCK_RESPONSES = [
82
+ _mock_extract_response(),
83
+ _mock_classify_response(),
84
+ _mock_score_response(),
85
+ _mock_translate_response(),
86
+ ]
87
+
88
+ _AGENT_CALL_MODEL_PATHS = [
89
+ "clauseguard.agents.extractor.call_model",
90
+ "clauseguard.agents.classifier.call_model",
91
+ "clauseguard.agents.risk_scorer.call_model",
92
+ "clauseguard.agents.translator.call_model",
93
+ ]
94
+
95
+
96
+ @pytest.mark.asyncio
97
+ async def test_pipeline_returns_final_report() -> None:
98
+ text = load_sample_nda()
99
+ results_iter = iter(_MOCK_RESPONSES)
100
+
101
+ async def mock_call_model(**kwargs):
102
+ try:
103
+ return next(results_iter)
104
+ except StopIteration:
105
+ return None
106
+
107
+ patches = [patch(path, side_effect=mock_call_model) for path in _AGENT_CALL_MODEL_PATHS]
108
+ for p in patches:
109
+ p.start()
110
+ try:
111
+ report = await run_pipeline(text, "sample_nda.txt")
112
+ finally:
113
+ for p in patches:
114
+ p.stop()
115
+
116
+ assert isinstance(report, FinalReport)
117
+ assert report.contract_name == "sample_nda.txt"
118
+
119
+
120
+ @pytest.mark.asyncio
121
+ async def test_pipeline_finds_critical_or_high() -> None:
122
+ text = load_sample_nda()
123
+ results_iter = iter(_MOCK_RESPONSES)
124
+
125
+ async def mock_call_model(**kwargs):
126
+ try:
127
+ return next(results_iter)
128
+ except StopIteration:
129
+ return None
130
+
131
+ patches = [patch(path, side_effect=mock_call_model) for path in _AGENT_CALL_MODEL_PATHS]
132
+ for p in patches:
133
+ p.start()
134
+ try:
135
+ report = await run_pipeline(text, "sample_nda.txt")
136
+ finally:
137
+ for p in patches:
138
+ p.stop()
139
+
140
+ assert isinstance(report, FinalReport)
141
+ assert report.summary.critical_count >= 1 or report.summary.high_count >= 1, (
142
+ f"Expected at least 1 CRITICAL or HIGH finding"
143
+ )
144
+
145
+
146
+ @pytest.mark.asyncio
147
+ async def test_markdown_report_is_non_empty() -> None:
148
+ text = load_sample_nda()
149
+ results_iter = iter(_MOCK_RESPONSES)
150
+
151
+ async def mock_call_model(**kwargs):
152
+ try:
153
+ return next(results_iter)
154
+ except StopIteration:
155
+ return None
156
+
157
+ patches = [patch(path, side_effect=mock_call_model) for path in _AGENT_CALL_MODEL_PATHS]
158
+ for p in patches:
159
+ p.start()
160
+ try:
161
+ report = await run_pipeline(text, "sample_nda.txt")
162
+ finally:
163
+ for p in patches:
164
+ p.stop()
165
+
166
+ assert isinstance(report.markdown_report, str)
167
+ assert len(report.markdown_report) > 0
168
+
169
+
170
+ @pytest.mark.asyncio
171
+ async def test_pipeline_handles_extractor_failure_gracefully() -> None:
172
+ text = "too short"
173
+
174
+ async def mock_call_model(**kwargs):
175
+ return None
176
+
177
+ patches = [patch(path, side_effect=mock_call_model) for path in _AGENT_CALL_MODEL_PATHS]
178
+ for p in patches:
179
+ p.start()
180
+ try:
181
+ report = await run_pipeline(text, "test.txt")
182
+ finally:
183
+ for p in patches:
184
+ p.stop()
185
+
186
+ assert isinstance(report, FinalReport)
tests/test_risk_scorer.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for the Risk Scorer agent."""
2
+
3
+ import json
4
+
5
+ import pytest
6
+
7
+ from clauseguard.agents.risk_scorer import _parse_response
8
+ from clauseguard.models.findings import ScoredClause, Severity
9
+
10
+
11
+ def _make_mock_response(clauses_data: list) -> str:
12
+ """Build a mock LLM JSON response string for testing."""
13
+ return json.dumps(clauses_data)
14
+
15
+
16
+ def test_ip_assignment_clause_is_critical() -> None:
17
+ """Test that IP assignment of personal time/side projects is rated CRITICAL."""
18
+ mock_response = _make_mock_response([
19
+ {
20
+ "clause": {
21
+ "id": 1,
22
+ "raw_text": "Employee hereby assigns to Company all inventions and intellectual property created by Employee, whether during working hours or on Employee's own time, using Company equipment or Employee's personal equipment.",
23
+ "plain_english": None,
24
+ "clause_type": "IP_ASSIGNMENT",
25
+ "section_heading": "INTELLECTUAL PROPERTY",
26
+ "position": 1,
27
+ },
28
+ "finding": {
29
+ "clause_id": 1,
30
+ "severity": "CRITICAL",
31
+ "risk_title": "IP Assignment of Personal Work",
32
+ "risk_reason": "This clause claims ownership of all employee creations including those made on personal time and equipment with no carve-out for unrelated work.",
33
+ "recommended_action": "",
34
+ },
35
+ }
36
+ ])
37
+
38
+ scored = _parse_response(mock_response)
39
+ assert len(scored) == 1
40
+ assert scored[0].finding.severity == Severity.CRITICAL
41
+ assert scored[0].clause.clause_type.value == "IP_ASSIGNMENT"
42
+
43
+
44
+ def test_governing_law_clause_is_low_or_info() -> None:
45
+ """Test that a standard governing law clause is rated LOW or INFO."""
46
+ mock_response = _make_mock_response([
47
+ {
48
+ "clause": {
49
+ "id": 1,
50
+ "raw_text": "This Agreement shall be governed by and construed in accordance with the laws of the State of Delaware.",
51
+ "plain_english": None,
52
+ "clause_type": "GOVERNING_LAW",
53
+ "section_heading": "GOVERNING LAW",
54
+ "position": 1,
55
+ },
56
+ "finding": {
57
+ "clause_id": 1,
58
+ "severity": "LOW",
59
+ "risk_title": "Standard Governing Law",
60
+ "risk_reason": "Standard governing law clause selecting Delaware, a common jurisdiction.",
61
+ "recommended_action": "",
62
+ },
63
+ }
64
+ ])
65
+
66
+ scored = _parse_response(mock_response)
67
+ assert len(scored) == 1
68
+ assert scored[0].finding.severity in (Severity.LOW, Severity.INFO)
69
+
70
+
71
+ def test_every_scored_clause_has_non_empty_risk_reason() -> None:
72
+ """Test that every ScoredClause has a non-empty risk_reason."""
73
+ mock_response = _make_mock_response([
74
+ {
75
+ "clause": {
76
+ "id": 1,
77
+ "raw_text": "For two years, Employee shall not compete with Company anywhere in the United States.",
78
+ "plain_english": None,
79
+ "clause_type": "NON_COMPETE",
80
+ "section_heading": "NON-COMPETE",
81
+ "position": 1,
82
+ },
83
+ "finding": {
84
+ "clause_id": 1,
85
+ "severity": "CRITICAL",
86
+ "risk_title": "Overly Broad Non-Compete",
87
+ "risk_reason": "Non-compete duration of 2 years covers the entire US with no geographic relevance to Company's actual operations.",
88
+ "recommended_action": "",
89
+ },
90
+ },
91
+ {
92
+ "clause": {
93
+ "id": 2,
94
+ "raw_text": "Notice shall be sent to the address listed above.",
95
+ "plain_english": None,
96
+ "clause_type": "OTHER",
97
+ "section_heading": "NOTICES",
98
+ "position": 2,
99
+ },
100
+ "finding": {
101
+ "clause_id": 2,
102
+ "severity": "INFO",
103
+ "risk_title": "Standard Notice Provision",
104
+ "risk_reason": "Boilerplate notice provision with no unusual terms.",
105
+ "recommended_action": "",
106
+ },
107
+ },
108
+ ])
109
+
110
+ scored = _parse_response(mock_response)
111
+ assert len(scored) == 2
112
+ for sc in scored:
113
+ assert sc.finding.risk_reason, f"Clause {sc.clause.id} has empty risk_reason"
114
+ assert len(sc.finding.risk_reason) > 5
115
+
116
+
117
+ def test_multiple_severity_levels() -> None:
118
+ """Test that different severities are correctly parsed."""
119
+ mock_response = _make_mock_response([
120
+ {
121
+ "clause": {
122
+ "id": i,
123
+ "raw_text": f"Test clause {i}",
124
+ "plain_english": None,
125
+ "clause_type": "OTHER",
126
+ "section_heading": None,
127
+ "position": i,
128
+ },
129
+ "finding": {
130
+ "clause_id": i,
131
+ "severity": sev.value,
132
+ "risk_title": f"Risk {i}",
133
+ "risk_reason": f"Reason for clause {i}",
134
+ "recommended_action": "",
135
+ },
136
+ }
137
+ for i, sev in enumerate(
138
+ [Severity.CRITICAL, Severity.HIGH, Severity.MEDIUM, Severity.LOW, Severity.INFO], 1
139
+ )
140
+ ])
141
+
142
+ scored = _parse_response(mock_response)
143
+ assert len(scored) == 5
144
+ severities = [sc.finding.severity for sc in scored]
145
+ assert Severity.CRITICAL in severities
146
+ assert Severity.HIGH in severities
147
+ assert Severity.MEDIUM in severities
148
+ assert Severity.LOW in severities
149
+ assert Severity.INFO in severities
tools/__init__.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ClauseGuard tools package."""
2
+
3
+ from clauseguard.tools.clause_tools import clean_text, detect_contract_type, detect_headings, split_into_clauses
4
+ from clauseguard.tools.file_tools import detect_encoding, extract_text, read_docx, read_pdf, read_txt
5
+ from clauseguard.tools.report_tools import format_markdown, risk_color, severity_badge, severity_emoji
6
+
7
+ __all__ = [
8
+ "clean_text",
9
+ "detect_contract_type",
10
+ "detect_encoding",
11
+ "detect_headings",
12
+ "extract_text",
13
+ "format_markdown",
14
+ "read_docx",
15
+ "read_pdf",
16
+ "read_txt",
17
+ "risk_color",
18
+ "severity_badge",
19
+ "severity_emoji",
20
+ "split_into_clauses",
21
+ ]
tools/clause_tools.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Clause processing utility functions."""
2
+
3
+ import re
4
+ from typing import List
5
+
6
+
7
+ def split_into_clauses(text: str) -> List[str]:
8
+ """Split a contract document into individual clauses.
9
+
10
+ Splits on: numbered patterns (1., 1.1, Article 1, Section 1, etc.),
11
+ ALL CAPS headings, and double newline breaks.
12
+
13
+ Args:
14
+ text: The full text of the contract document.
15
+
16
+ Returns:
17
+ A list of non-empty clause strings.
18
+ """
19
+ if not text or not text.strip():
20
+ return []
21
+
22
+ _paragraphs = _split_by_numbered_headings(text)
23
+ clauses: List[str] = []
24
+
25
+ for para in _paragraphs:
26
+ sub_clauses = _split_by_double_newlines(para)
27
+ clauses.extend(c for c in sub_clauses if c.strip())
28
+
29
+ return [c for c in clauses if len(c.split()) >= 5]
30
+
31
+
32
+ def _split_by_numbered_headings(text: str) -> List[str]:
33
+ """Split text by numbered section patterns and ALL CAPS headings."""
34
+ pattern = r"(?:(?<=\n)\s*(?:Article|Section|SECTION|ARTICLE)\s+\d+[\.:\s]|\n\s*(?:\d+[\.\)]\s*[A-Z]|\d+\.\d+\s+[A-Z]|[IVX]+\.\s+[A-Z])|\n\s*[A-Z][A-Z\s]{10,}\n)"
35
+ parts = re.split(pattern, text)
36
+ return [p.strip() for p in parts if p.strip()]
37
+
38
+
39
+ def _split_by_double_newlines(text: str) -> List[str]:
40
+ """Split text by double newline breaks."""
41
+ parts = re.split(r"\n\s*\n", text)
42
+ return [p.strip() for p in parts if p.strip()]
43
+
44
+
45
+ def clean_text(text: str) -> str:
46
+ """Clean and normalize text by removing excessive whitespace.
47
+
48
+ Args:
49
+ text: Raw text to clean.
50
+
51
+ Returns:
52
+ Cleaned and normalized text.
53
+ """
54
+ if not text:
55
+ return ""
56
+
57
+ text = text.replace("\r\n", "\n").replace("\r", "\n")
58
+ text = re.sub(r" {2,}", " ", text)
59
+ text = re.sub(r"\n{3,}", "\n\n", text)
60
+ text = re.sub(r"\t+", " ", text)
61
+ text = re.sub(r" +\n", "\n", text)
62
+ text = re.sub(r"\n +", "\n", text)
63
+ return text.strip()
64
+
65
+
66
+ _CONTRACT_KEYWORDS: dict[str, List[str]] = {
67
+ "NDA": ["non-disclosure", "confidential", "confidentiality", "trade secret", "nda", "non disclosure"],
68
+ "Employment": ["employment", "employee", "salary", "benefits", "at-will", "at will", "offer letter"],
69
+ "Freelance": ["freelance", "independent contractor", "consultant", "statement of work", "contractor"],
70
+ "SaaS": ["software as a service", "subscription", "saas", "service level agreement", "sla", "license"],
71
+ }
72
+
73
+
74
+ def detect_contract_type(text: str) -> str:
75
+ """Detect the type of contract based on keyword analysis.
76
+
77
+ Args:
78
+ text: The full text of the contract document.
79
+
80
+ Returns:
81
+ Detected contract type string (NDA, Employment, Freelance, SaaS, or Other).
82
+ """
83
+ if not text:
84
+ return "Other"
85
+
86
+ text_lower = text.lower()
87
+ scores: dict[str, int] = {}
88
+
89
+ for contract_type, keywords in _CONTRACT_KEYWORDS.items():
90
+ score = sum(1 for kw in keywords if kw in text_lower)
91
+ if score > 0:
92
+ scores[contract_type] = score
93
+
94
+ if not scores:
95
+ return "Other"
96
+
97
+ return max(scores, key=lambda k: scores[k])
98
+
99
+
100
+ def detect_headings(text: str) -> list[str]:
101
+ """Detect section headings from a contract document.
102
+
103
+ Identifies ALL CAPS lines and numbered section headers.
104
+
105
+ Args:
106
+ text: The full text of the contract document.
107
+
108
+ Returns:
109
+ A list of detected heading strings.
110
+ """
111
+ if not text:
112
+ return []
113
+
114
+ headings: list[str] = []
115
+ lines = text.split("\n")
116
+
117
+ for line in lines:
118
+ stripped = line.strip()
119
+ if not stripped:
120
+ continue
121
+
122
+ if re.match(r"^\s*(?:Article|Section|SECTION|ARTICLE)\s+\d+", stripped):
123
+ headings.append(stripped)
124
+ continue
125
+
126
+ if re.match(r"^\s*\d+[\.\)]\s+[A-Z]", stripped):
127
+ headings.append(stripped)
128
+ continue
129
+
130
+ if re.match(r"^[A-Z][A-Z\s]{10,}$", stripped) and len(stripped.split()) <= 6:
131
+ headings.append(stripped)
132
+
133
+ return headings
tools/file_tools.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """File parsing utilities for contract documents."""
2
+
3
+ import io
4
+ from typing import Union
5
+
6
+ import chardet
7
+
8
+
9
+ def read_pdf(file_bytes: bytes) -> str:
10
+ """Extract text from a PDF file using PyMuPDF.
11
+
12
+ Falls back to pdfplumber if PyMuPDF extraction returns empty.
13
+
14
+ Args:
15
+ file_bytes: Raw bytes of the PDF file.
16
+
17
+ Returns:
18
+ Extracted text content as a string.
19
+
20
+ Raises:
21
+ ValueError: If the PDF cannot be read or contains no extractable text.
22
+ """
23
+ import fitz
24
+
25
+ try:
26
+ doc = fitz.open(stream=file_bytes, filetype="pdf")
27
+ except Exception as e:
28
+ raise ValueError(f"Unable to open PDF file: {e}") from e
29
+
30
+ text_parts: list[str] = []
31
+ for page in doc:
32
+ text_parts.append(page.get_text())
33
+
34
+ doc.close()
35
+ result = "\n".join(text_parts).strip()
36
+
37
+ if result:
38
+ return result
39
+
40
+ result = _read_pdf_with_pdfplumber(file_bytes)
41
+ if result:
42
+ return result
43
+
44
+ raise ValueError("PDF file contains no extractable text — try pasting the text directly")
45
+
46
+
47
+ def _read_pdf_with_pdfplumber(file_bytes: bytes) -> str:
48
+ """Fallback PDF extraction using pdfplumber."""
49
+ try:
50
+ import pdfplumber
51
+ except ImportError:
52
+ return ""
53
+
54
+ try:
55
+ with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
56
+ text_parts: list[str] = []
57
+ for page in pdf.pages:
58
+ page_text = page.extract_text()
59
+ if page_text:
60
+ text_parts.append(page_text)
61
+ return "\n".join(text_parts).strip()
62
+ except Exception:
63
+ return ""
64
+
65
+
66
+ def read_docx(file_bytes: bytes) -> str:
67
+ """Extract text from a DOCX file using python-docx.
68
+
69
+ Args:
70
+ file_bytes: Raw bytes of the DOCX file.
71
+
72
+ Returns:
73
+ Extracted text content as a string.
74
+
75
+ Raises:
76
+ ValueError: If the DOCX cannot be read or contains no text.
77
+ """
78
+ from docx import Document
79
+
80
+ try:
81
+ doc = Document(io.BytesIO(file_bytes))
82
+ except Exception as e:
83
+ raise ValueError(f"Unable to open DOCX file: {e}") from e
84
+
85
+ paragraphs: list[str] = []
86
+ for para in doc.paragraphs:
87
+ if para.text.strip():
88
+ paragraphs.append(para.text)
89
+
90
+ result = "\n".join(paragraphs).strip()
91
+
92
+ if not result:
93
+ raise ValueError("DOCX file contains no extractable text")
94
+
95
+ return result
96
+
97
+
98
+ def detect_encoding(file_bytes: bytes) -> str:
99
+ """Detect the character encoding of a byte string using chardet.
100
+
101
+ Args:
102
+ file_bytes: Raw bytes to detect encoding for.
103
+
104
+ Returns:
105
+ Detected encoding name string (e.g., 'utf-8', 'latin-1').
106
+ """
107
+ detection = chardet.detect(file_bytes)
108
+ return detection.get("encoding", "utf-8") or "utf-8"
109
+
110
+
111
+ def read_txt(file_bytes: bytes) -> str:
112
+ """Read a plain text file with automatic encoding detection.
113
+
114
+ Args:
115
+ file_bytes: Raw bytes of the text file.
116
+
117
+ Returns:
118
+ Decoded text content as a string.
119
+
120
+ Raises:
121
+ ValueError: If the file cannot be decoded or is empty.
122
+ """
123
+ encoding = detect_encoding(file_bytes)
124
+
125
+ try:
126
+ text = file_bytes.decode(encoding)
127
+ except (UnicodeDecodeError, LookupError):
128
+ text = file_bytes.decode("utf-8", errors="replace")
129
+
130
+ result = text.strip()
131
+
132
+ if not result:
133
+ raise ValueError("Text file is empty or contains no readable content")
134
+
135
+ return result
136
+
137
+
138
+ SUPPORTED_EXTENSIONS = frozenset({".pdf", ".txt", ".docx"})
139
+ READER_MAP = {
140
+ ".pdf": read_pdf,
141
+ ".txt": read_txt,
142
+ ".docx": read_docx,
143
+ }
144
+
145
+
146
+ def extract_text(file_bytes: bytes, filename: str) -> str:
147
+ """Route file to the appropriate reader based on extension.
148
+
149
+ Args:
150
+ file_bytes: Raw bytes of the file.
151
+ filename: Original filename used to determine file type.
152
+
153
+ Returns:
154
+ Extracted text content as a string.
155
+
156
+ Raises:
157
+ ValueError: If the file extension is not supported or the file is unreadable.
158
+ """
159
+ if not filename:
160
+ raise ValueError("Filename is required to determine file type")
161
+
162
+ ext = _get_extension(filename)
163
+
164
+ if ext not in READER_MAP:
165
+ raise ValueError(
166
+ f"Unsupported file type: {ext}. Supported types: {', '.join(sorted(SUPPORTED_EXTENSIONS))}"
167
+ )
168
+
169
+ reader = READER_MAP[ext]
170
+ return reader(file_bytes)
171
+
172
+
173
+ def _get_extension(filename: str) -> str:
174
+ """Extract the lowercase file extension from a filename."""
175
+ dot_index = filename.rfind(".")
176
+ if dot_index == -1 or dot_index == len(filename) - 1:
177
+ raise ValueError(f"Cannot determine file type from filename: {filename}")
178
+ return filename[dot_index:].lower()
tools/report_tools.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Report formatting utilities for ClauseGuard."""
2
+
3
+ from clauseguard.models.findings import Severity
4
+ from clauseguard.models.report import FinalReport
5
+
6
+ SEVERITY_EMOJI_MAP = {
7
+ Severity.CRITICAL: "🔴",
8
+ Severity.HIGH: "🟠",
9
+ Severity.MEDIUM: "🟡",
10
+ Severity.LOW: "🟢",
11
+ Severity.INFO: "ℹ️",
12
+ }
13
+
14
+ SEVERITY_COLOR_MAP = {
15
+ Severity.CRITICAL: "#FF0000",
16
+ Severity.HIGH: "#FF8C00",
17
+ Severity.MEDIUM: "#FFD700",
18
+ Severity.LOW: "#32CD32",
19
+ Severity.INFO: "#1E90FF",
20
+ }
21
+
22
+
23
+ def severity_badge(severity: Severity) -> str:
24
+ """Return the emoji badge for a severity level.
25
+
26
+ Alias for severity_emoji — matches PRD specification.
27
+
28
+ Args:
29
+ severity: The Severity enum value.
30
+
31
+ Returns:
32
+ The corresponding emoji string.
33
+ """
34
+ return SEVERITY_EMOJI_MAP.get(severity, "⚪")
35
+
36
+
37
+ def severity_emoji(severity: Severity) -> str:
38
+ """Return the emoji representation for a severity level.
39
+
40
+ Args:
41
+ severity: The Severity enum value.
42
+
43
+ Returns:
44
+ The corresponding emoji string.
45
+ """
46
+ return severity_badge(severity)
47
+
48
+
49
+ def risk_color(severity: Severity) -> str:
50
+ """Return the hex color code for a severity level.
51
+
52
+ Alias for severity_color — matches PRD specification.
53
+
54
+ Args:
55
+ severity: The Severity enum value.
56
+
57
+ Returns:
58
+ The corresponding hex color string.
59
+ """
60
+ return SEVERITY_COLOR_MAP.get(severity, "#808080")
61
+
62
+
63
+ def severity_color(severity: Severity) -> str:
64
+ """Return the hex color code for a severity level.
65
+
66
+ Args:
67
+ severity: The Severity enum value.
68
+
69
+ Returns:
70
+ The corresponding hex color string.
71
+ """
72
+ return risk_color(severity)
73
+
74
+
75
+ def format_markdown(report: FinalReport) -> str:
76
+ """Convert a FinalReport to a formatted markdown string.
77
+
78
+ Args:
79
+ report: The FinalReport to format.
80
+
81
+ Returns:
82
+ The fully formatted markdown report string.
83
+ """
84
+ return report.markdown_report