File size: 5,158 Bytes
3552405
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
"""Tests for the Extractor agent."""

import json
import os
from pathlib import Path

import pytest

from clauseguard.agents.extractor import _parse_response, _validate_clause_list
from clauseguard.models.clause import Clause, ClauseList

SAMPLE_NDA_PATH = Path(__file__).parent.parent / "sample_contracts" / "sample_nda.txt"


def load_sample_nda() -> str:
    """Load the sample NDA text file."""
    with open(SAMPLE_NDA_PATH, "r", encoding="utf-8") as f:
        return f.read()


def test_sample_nda_produces_at_least_6_clauses() -> None:
    """Verify sample_nda.txt has enough content to produce 6+ clauses."""
    text = load_sample_nda()
    # The document has 10 numbered sections
    assert len(text.split("\n")) > 20
    # Each paragraph cluster represents a clause
    from clauseguard.tools.clause_tools import split_into_clauses
    clauses = split_into_clauses(text)
    assert len(clauses) >= 6, f"Expected at least 6 clauses, got {len(clauses)}"


def test_short_text_raises_value_error() -> None:
    """Test that a very short document (2 sentences) raises ValueError."""
    mock_json = json.dumps({
        "clauses": [
            {
                "id": 1,
                "raw_text": "This is a short agreement.",
                "plain_english": None,
                "clause_type": "OTHER",
                "section_heading": None,
                "position": 1,
            },
            {
                "id": 2,
                "raw_text": "Parties agree to the above.",
                "plain_english": None,
                "clause_type": "OTHER",
                "section_heading": None,
                "position": 2,
            },
        ],
        "contract_type": "Other",
        "total_clauses": 2,
    })

    clause_list = _parse_response(mock_json)
    with pytest.raises(ValueError, match="minimum 3 clauses required"):
        _validate_clause_list(clause_list)


def test_output_matches_clause_list_schema() -> None:
    """Test that parsed output matches the ClauseList Pydantic schema."""
    mock_json = json.dumps({
        "clauses": [
            {
                "id": 1,
                "raw_text": "Employee shall maintain confidentiality of all trade secrets.",
                "plain_english": None,
                "clause_type": "OTHER",
                "section_heading": "CONFIDENTIALITY",
                "position": 1,
            },
            {
                "id": 2,
                "raw_text": "This Agreement is governed by Delaware law.",
                "plain_english": None,
                "clause_type": "OTHER",
                "section_heading": "GOVERNING LAW",
                "position": 2,
            },
            {
                "id": 3,
                "raw_text": "Either party may terminate for convenience.",
                "plain_english": None,
                "clause_type": "OTHER",
                "section_heading": "TERMINATION",
                "position": 3,
            },
        ],
        "contract_type": "NDA",
        "total_clauses": 3,
    })

    clause_list = _parse_response(mock_json)
    assert isinstance(clause_list, ClauseList)
    assert clause_list.total_clauses == 3
    assert clause_list.contract_type == "NDA"
    assert len(clause_list.clauses) == 3
    assert all(isinstance(c, Clause) for c in clause_list.clauses)
    assert all(c.id > 0 for c in clause_list.clauses)
    assert all(c.raw_text for c in clause_list.clauses)


def test_parse_response_handles_list_input() -> None:
    """Test that _parse_response handles both list and dict input formats."""
    list_json = json.dumps([
        {
            "id": 1,
            "raw_text": "Test clause one.",
            "plain_english": None,
            "clause_type": "OTHER",
            "section_heading": None,
            "position": 1,
        },
        {
            "id": 2,
            "raw_text": "Test clause two.",
            "plain_english": None,
            "clause_type": "OTHER",
            "section_heading": None,
            "position": 2,
        },
        {
            "id": 3,
            "raw_text": "Test clause three.",
            "plain_english": None,
            "clause_type": "OTHER",
            "section_heading": None,
            "position": 3,
        },
    ])

    clause_list = _parse_response(list_json)
    assert clause_list.total_clauses == 3


def test_parse_response_handles_markdown_fences() -> None:
    """Test that markdown code fences are stripped from responses."""
    wrapped_json = '```json\n{\n  "clauses": [\n    {"id": 1, "raw_text": "Test one.", "plain_english": null, "clause_type": "OTHER", "section_heading": null, "position": 1},\n    {"id": 2, "raw_text": "Test two.", "plain_english": null, "clause_type": "OTHER", "section_heading": null, "position": 2},\n    {"id": 3, "raw_text": "Test three.", "plain_english": null, "clause_type": "OTHER", "section_heading": null, "position": 3}\n  ],\n  "contract_type": "Other",\n  "total_clauses": 3\n}\n```'

    clause_list = _parse_response(wrapped_json)
    assert clause_list.total_clauses == 3
    assert clause_list.clauses[0].raw_text == "Test one."