File size: 5,158 Bytes
3552405 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | """Tests for the Extractor agent."""
import json
import os
from pathlib import Path
import pytest
from clauseguard.agents.extractor import _parse_response, _validate_clause_list
from clauseguard.models.clause import Clause, ClauseList
SAMPLE_NDA_PATH = Path(__file__).parent.parent / "sample_contracts" / "sample_nda.txt"
def load_sample_nda() -> str:
"""Load the sample NDA text file."""
with open(SAMPLE_NDA_PATH, "r", encoding="utf-8") as f:
return f.read()
def test_sample_nda_produces_at_least_6_clauses() -> None:
"""Verify sample_nda.txt has enough content to produce 6+ clauses."""
text = load_sample_nda()
# The document has 10 numbered sections
assert len(text.split("\n")) > 20
# Each paragraph cluster represents a clause
from clauseguard.tools.clause_tools import split_into_clauses
clauses = split_into_clauses(text)
assert len(clauses) >= 6, f"Expected at least 6 clauses, got {len(clauses)}"
def test_short_text_raises_value_error() -> None:
"""Test that a very short document (2 sentences) raises ValueError."""
mock_json = json.dumps({
"clauses": [
{
"id": 1,
"raw_text": "This is a short agreement.",
"plain_english": None,
"clause_type": "OTHER",
"section_heading": None,
"position": 1,
},
{
"id": 2,
"raw_text": "Parties agree to the above.",
"plain_english": None,
"clause_type": "OTHER",
"section_heading": None,
"position": 2,
},
],
"contract_type": "Other",
"total_clauses": 2,
})
clause_list = _parse_response(mock_json)
with pytest.raises(ValueError, match="minimum 3 clauses required"):
_validate_clause_list(clause_list)
def test_output_matches_clause_list_schema() -> None:
"""Test that parsed output matches the ClauseList Pydantic schema."""
mock_json = json.dumps({
"clauses": [
{
"id": 1,
"raw_text": "Employee shall maintain confidentiality of all trade secrets.",
"plain_english": None,
"clause_type": "OTHER",
"section_heading": "CONFIDENTIALITY",
"position": 1,
},
{
"id": 2,
"raw_text": "This Agreement is governed by Delaware law.",
"plain_english": None,
"clause_type": "OTHER",
"section_heading": "GOVERNING LAW",
"position": 2,
},
{
"id": 3,
"raw_text": "Either party may terminate for convenience.",
"plain_english": None,
"clause_type": "OTHER",
"section_heading": "TERMINATION",
"position": 3,
},
],
"contract_type": "NDA",
"total_clauses": 3,
})
clause_list = _parse_response(mock_json)
assert isinstance(clause_list, ClauseList)
assert clause_list.total_clauses == 3
assert clause_list.contract_type == "NDA"
assert len(clause_list.clauses) == 3
assert all(isinstance(c, Clause) for c in clause_list.clauses)
assert all(c.id > 0 for c in clause_list.clauses)
assert all(c.raw_text for c in clause_list.clauses)
def test_parse_response_handles_list_input() -> None:
"""Test that _parse_response handles both list and dict input formats."""
list_json = json.dumps([
{
"id": 1,
"raw_text": "Test clause one.",
"plain_english": None,
"clause_type": "OTHER",
"section_heading": None,
"position": 1,
},
{
"id": 2,
"raw_text": "Test clause two.",
"plain_english": None,
"clause_type": "OTHER",
"section_heading": None,
"position": 2,
},
{
"id": 3,
"raw_text": "Test clause three.",
"plain_english": None,
"clause_type": "OTHER",
"section_heading": None,
"position": 3,
},
])
clause_list = _parse_response(list_json)
assert clause_list.total_clauses == 3
def test_parse_response_handles_markdown_fences() -> None:
"""Test that markdown code fences are stripped from responses."""
wrapped_json = '```json\n{\n "clauses": [\n {"id": 1, "raw_text": "Test one.", "plain_english": null, "clause_type": "OTHER", "section_heading": null, "position": 1},\n {"id": 2, "raw_text": "Test two.", "plain_english": null, "clause_type": "OTHER", "section_heading": null, "position": 2},\n {"id": 3, "raw_text": "Test three.", "plain_english": null, "clause_type": "OTHER", "section_heading": null, "position": 3}\n ],\n "contract_type": "Other",\n "total_clauses": 3\n}\n```'
clause_list = _parse_response(wrapped_json)
assert clause_list.total_clauses == 3
assert clause_list.clauses[0].raw_text == "Test one."
|