gaurv007 commited on
Commit
11d6a4f
·
verified ·
1 Parent(s): a61dcf1

v4.2: Update obligations.py

Browse files
Files changed (1) hide show
  1. obligations.py +23 -8
obligations.py CHANGED
@@ -85,11 +85,26 @@ _PRIORITY_MAP = {
85
  "delivery": 1,
86
  }
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  def _is_false_positive(sentence):
90
  """Check if a sentence is a common false positive (definition/interpretation, not obligation)."""
91
- for fp in _FALSE_POSITIVE_PATTERNS:
92
- if re.search(fp, sentence, re.IGNORECASE):
93
  return True
94
  return False
95
 
@@ -111,9 +126,9 @@ def extract_obligations(text):
111
  continue
112
 
113
  found_types = set()
114
- for otype, patterns in OBLIGATION_PATTERNS.items():
115
  for pat in patterns:
116
- if re.search(pat, sentence, re.IGNORECASE):
117
  found_types.add(otype)
118
  break
119
 
@@ -128,8 +143,8 @@ def extract_obligations(text):
128
  party = obligation_direction
129
  else:
130
  # Fallback to pattern matching within the sentence
131
- for pp in PARTY_PATTERNS:
132
- m = re.search(pp, sentence)
133
  if m:
134
  candidate = m.group(0).strip()
135
  # Fix 8: Reject party strings >40 chars (header bleed-through)
@@ -140,8 +155,8 @@ def extract_obligations(text):
140
  # Extract timeframe
141
  deadline = "Not specified"
142
  deadline_urgency = 0
143
- for pat, ptype in TIME_PATTERNS:
144
- m = re.search(pat, sentence, re.IGNORECASE)
145
  if m:
146
  if ptype == "relative":
147
  num = m.group(1)
 
85
  "delivery": 1,
86
  }
87
 
88
+ # FIX v4.2: Pre-compile obligation patterns at module level (was recompiling per sentence)
89
+ _OBLIGATION_PATTERNS_COMPILED = {
90
+ otype: [re.compile(p, re.IGNORECASE) for p in patterns]
91
+ for otype, patterns in OBLIGATION_PATTERNS.items()
92
+ }
93
+
94
+ # FIX v4.2: Pre-compile false positive patterns
95
+ _FALSE_POSITIVE_PATTERNS_COMPILED = [re.compile(p, re.IGNORECASE) for p in _FALSE_POSITIVE_PATTERNS]
96
+
97
+ # FIX v4.2: Pre-compile time patterns
98
+ _TIME_PATTERNS_COMPILED = [(re.compile(p, re.IGNORECASE), ptype) for p, ptype in TIME_PATTERNS]
99
+
100
+ # FIX v4.2: Pre-compile party patterns
101
+ _PARTY_PATTERNS_COMPILED = [re.compile(p) for p in PARTY_PATTERNS]
102
+
103
 
104
  def _is_false_positive(sentence):
105
  """Check if a sentence is a common false positive (definition/interpretation, not obligation)."""
106
+ for fp in _FALSE_POSITIVE_PATTERNS_COMPILED:
107
+ if fp.search(sentence):
108
  return True
109
  return False
110
 
 
126
  continue
127
 
128
  found_types = set()
129
+ for otype, patterns in _OBLIGATION_PATTERNS_COMPILED.items():
130
  for pat in patterns:
131
+ if pat.search(sentence):
132
  found_types.add(otype)
133
  break
134
 
 
143
  party = obligation_direction
144
  else:
145
  # Fallback to pattern matching within the sentence
146
+ for pp in _PARTY_PATTERNS_COMPILED:
147
+ m = pp.search(sentence)
148
  if m:
149
  candidate = m.group(0).strip()
150
  # Fix 8: Reject party strings >40 chars (header bleed-through)
 
155
  # Extract timeframe
156
  deadline = "Not specified"
157
  deadline_urgency = 0
158
+ for pat, ptype in _TIME_PATTERNS_COMPILED:
159
+ m = pat.search(sentence)
160
  if m:
161
  if ptype == "relative":
162
  num = m.group(1)