File size: 26,699 Bytes
4052d84
 
 
 
 
d76d092
4052d84
 
 
 
d8f8a45
4052d84
 
 
b00feb0
 
 
 
 
 
 
 
 
 
4052d84
 
b00feb0
4052d84
 
 
b00feb0
4052d84
 
a1b1513
ce6728e
 
 
 
 
898bc18
bf8f1ff
ce6728e
4052d84
 
 
 
 
 
b00feb0
 
 
 
 
4052d84
 
b00feb0
 
4052d84
 
 
 
 
 
 
33279ea
4052d84
 
 
 
 
 
 
 
 
 
 
 
 
b00feb0
 
 
 
 
 
 
4052d84
 
2bc545f
4855450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2bc545f
 
 
 
 
 
4855450
 
4052d84
 
b00feb0
 
 
 
 
 
4052d84
 
 
 
 
 
bf8f1ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4052d84
 
898bc18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2bc545f
d76d092
4052d84
 
 
 
 
 
 
2bc545f
6218d9a
ca62faa
 
 
 
4052d84
d76d092
 
 
 
4052d84
 
 
 
 
 
 
 
 
 
 
898bc18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d76d092
2bc545f
898bc18
2bc545f
d76d092
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4052d84
6218d9a
 
 
 
4052d84
 
 
 
 
 
 
b00feb0
4052d84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8f8a45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4052d84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a085ad1
4052d84
 
 
 
a085ad1
4052d84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce6728e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
898bc18
 
 
bf8f1ff
898bc18
bf8f1ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
898bc18
bf8f1ff
 
 
 
 
 
 
898bc18
 
 
 
bf8f1ff
 
898bc18
bf8f1ff
 
 
 
 
 
 
898bc18
 
4052d84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6218d9a
4052d84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
"""
UndertriAI — Core OpenEnv Environment (Server-Side)
Implements the bail assessment RL training environment.
"""

import concurrent.futures
import uuid
from typing import Any, Dict, List, Optional

from .dataset import BailDataset
from .reward import compute_reward, _is_ndps_case
from .schema_drift import maybe_apply_drift

try:
    from openenv.core import Environment  # type: ignore
except ImportError:
    try:
        from openenv_core import Environment  # type: ignore
    except ImportError:
        class Environment:  # type: ignore
            pass

try:
    from openenv.core.models import StepResult  # type: ignore
except ImportError:
    from pydantic import BaseModel
    class StepResult(BaseModel):  # type: ignore
        observation: Any
        reward: float = 0.0
        done: bool = False
        info: dict = {}

from ..models import (
    AccusedProfile, CaseObservation,
    BailAction,
    RequestDocumentAction, FlagInconsistencyAction, CrossReferencePrecedentAction,
    ComputeStatutoryEligibilityAction, AssessSuretyAction, ClassifyBailTypeAction,
    ReadSubmissionsAction, AssessFlightRiskAction, CheckCaseFactorsAction,
    ApplyProportionalityAction,
    PullCriminalHistoryAction,
    IssueOrderAction,    # Block 4.3: issue_order(grant|deny|conditional) alias
    SubmitMemoAction,
)
from .precedent_db import PrecedentDB


class UndertriAIEnvironment(Environment):
    """
    Bail Assessment Environment — OpenEnv compliant.

    The agent reads a bail case and iteratively calls legal tools before
    submitting a structured bail recommendation memo. Reward is computed
    deterministically against the real High Court decision (ground_truth).
    """

    # Concurrent sessions are safe: each instance is independent (session_id isolation)
    SUPPORTS_CONCURRENT_SESSIONS: bool = True
    MAX_STEPS = 10  # Maximum tool calls before forcing memo submission

    def __init__(
        self,
        episodes_dir: Optional[str] = None,
        initial_stage: int = 1,
    ):
        super().__init__()  # Sets self.rubric = None and self.transform = None
        self.dataset     = BailDataset(episodes_dir=episodes_dir)
        self.precedents  = PrecedentDB()
        self._episode: Optional[Dict[str, Any]] = None
        self._episode_id: str = ""
        self._step_count: int = 0
        self._flags: List[str] = []
        self._retrieved_precedents: List[str] = []
        self._current_stage: int = initial_stage

    # ------------------------------------------------------------------
    # OpenEnv API
    # ------------------------------------------------------------------

    def reset(
        self,
        seed: Optional[int] = None,
        episode_id: Optional[str] = None,
        stage: Optional[int] = None,
        **kwargs,
    ) -> CaseObservation:
        """Start a new episode. Returns initial case observation."""
        s = stage or self._current_stage

        # A8 fix: if episode_id is given, look up that specific case by case_id.
        # Previously episode_id was stored but episode was always sampled randomly.
        found_episode = None
        if episode_id is not None:
            for stage_eps in self.dataset._episodes.values():
                for ep in stage_eps:
                    if ep.get("case_id") == episode_id:
                        found_episode = ep
                        break
                if found_episode:
                    break

        if found_episode:
            self._episode = found_episode
        else:
            # Timeout guard — prevent infinite hang on slow dataset.sample_episode()
            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as ex:
                fut = ex.submit(self.dataset.sample_episode, s, True, seed)
                try:
                    self._episode = fut.result(timeout=5.0)
                except concurrent.futures.TimeoutError:
                    raise RuntimeError("reset() timed out after 5s — check dataset loading.")

        self._episode_id    = episode_id or str(uuid.uuid4())
        self._step_count    = 0
        self._flags         = []
        self._retrieved_precedents  = []
        self._action_history: List[str] = []
        self._statutory_tool_called: bool = False
        self._tools_called: set = set()
        return self._make_observation(action_result=None)

    def step(
        self,
        action: BailAction,
        timeout_s: Optional[float] = None,
        **kwargs,
    ) -> StepResult:
        """Execute one agent action. Returns StepResult with reward only when done."""
        if self._episode is None:
            raise RuntimeError("Call reset() before step().")

        self._step_count += 1

        # ---- Block 4.3: issue_order alias — convert to SubmitMemoAction ----
        if isinstance(action, IssueOrderAction):
            _order_map = {
                "grant":       "Bail Granted",
                "deny":        "Bail Denied",
                "conditional": "Bail Granted",  # conditional = granted with conditions
            }
            action = SubmitMemoAction(
                flight_risk=action.flight_risk,
                flight_risk_justification=action.flight_risk_justification,
                statutory_eligible=action.statutory_eligible,
                statutory_computation=action.statutory_computation,
                grounds_for_bail=action.grounds_for_bail,
                grounds_against_bail=action.grounds_against_bail,
                recommended_outcome=_order_map[action.order_type],
                recommended_conditions=action.recommended_conditions,
                confidence=action.confidence,
            )

        # ---- Terminal action: submit memo ----
        if isinstance(action, SubmitMemoAction):
            # 4.5 Hard minimum: agent must have called at least 1 distinct tool before submitting.
            # This is a structural gate — even a skip-penalty can't compensate for zero information.
            if len(self._tools_called) == 0:
                obs = self._make_observation(
                    action_result=(
                        "[BLOCKED] You must call at least one legal tool before submitting a memo. "
                        "Use tools such as compute_statutory_eligibility, assess_flight_risk, "
                        "read_submissions, or check_case_factors first."
                    ),
                    memo_submitted=False,
                )
                return StepResult(
                    observation=obs,
                    reward=-0.15,  # Stronger signal than just a penalty post-submission
                    done=False,
                    info={"blocked": "minimum_tools_not_met", "tools_called": 0},
                )

            # Skip penalty only if submitted on step 1 despite having called a tool
            # (edge case where first action is somehow both a tool and submit)
            no_tool_penalty = 0.40 if self._step_count == 1 else 0.0

            reward_dict = compute_reward(
                agent_outcome     = action.recommended_outcome,
                agent_flight_risk = action.flight_risk,
                agent_eligible    = action.statutory_eligible,
                agent_computation = action.statutory_computation,
                agent_conditions  = action.recommended_conditions or [],
                episode           = self._episode,
                step_count        = self._step_count,
                max_steps         = self.MAX_STEPS,
                statutory_tool_used              = self._statutory_tool_called,
                agent_flight_risk_justification  = action.flight_risk_justification,
                agent_grounds_for                = action.grounds_for_bail,
                agent_grounds_against            = action.grounds_against_bail,
            )
            # Apply skip penalty (can push total legitimately negative)
            reward_dict["total_reward"] = round(reward_dict["total_reward"] - no_tool_penalty, 4)
            reward_dict["tool_skip_penalty"] = no_tool_penalty

            obs = self._make_observation(
                action_result=self._format_memo_result(action, reward_dict),
                memo_submitted=True,
            )
            return StepResult(
                observation=obs,
                reward=reward_dict["total_reward"],
                done=True,
                info=reward_dict,
            )

        # ---- Repeat-action deduplication (5B.2) ----
        tool_key = type(action).__name__
        if tool_key in self._tools_called:
            # Return cached note — no re-execution, no reward gaming
            obs = self._make_observation(
                action_result=(
                    f"[CACHED] {tool_key} was already called this episode. "
                    "The result is already in your action history above. "
                    "Use a different tool or submit your memo."
                ),
                memo_submitted=False,
            )
            return StepResult(observation=obs, reward=-0.05, done=False,
                              info={"cached": True, "tool": tool_key})

        self._tools_called.add(tool_key)

        # ---- Tool actions with optional timeout enforcement ----
        if isinstance(action, ComputeStatutoryEligibilityAction):
            self._statutory_tool_called = True  # track for process reward

        if timeout_s is not None:
            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
                future = executor.submit(self._dispatch_tool, action)
                try:
                    result = future.result(timeout=timeout_s)
                except concurrent.futures.TimeoutError:
                    obs = self._make_observation(
                        action_result=f"TIMEOUT: tool call exceeded {timeout_s}s limit",
                        memo_submitted=False,
                    )
                    return StepResult(
                        observation=obs,
                        reward=-0.05,
                        done=False,
                        info={"timeout": True, "tool": type(action).__name__},
                    )
        else:
            result = self._dispatch_tool(action)

        # Accumulate action history (Gap 4)
        summary = f"[Step {self._step_count}] {type(action).__name__}: {result[:120]}..."
        self._action_history.append(summary)

        # Force submit if max steps reached
        done = (self._step_count >= self.MAX_STEPS)
        reward = -0.1 if done else 0.0  # Small penalty for exhausting budget

        obs = self._make_observation(action_result=result, memo_submitted=done)
        return StepResult(observation=obs, reward=reward, done=done, info={})

    @property
    def state(self):
        """Return episode metadata (OpenEnv State interface)."""
        return {
            "episode_id": self._episode_id,
            "step_count": self._step_count,
            "stage": self._current_stage,
            "case_id": self._episode.get("case_id", "") if self._episode else "",
        }

    def set_stage(self, stage: int) -> None:
        """Advance the curriculum stage."""
        self._current_stage = stage
        self.dataset.set_stage(stage)

    # ------------------------------------------------------------------
    # Tool dispatch
    # ------------------------------------------------------------------

    def _dispatch_tool(self, action: BailAction) -> str:
        ep = self._episode

        if isinstance(action, RequestDocumentAction):
            if action.document_type in ep.get("documents_available", []):
                return f"✓ Document retrieved: {action.document_type}. Review attached."
            return f"✗ Document '{action.document_type}' not available in this case record."

        elif isinstance(action, FlagInconsistencyAction):
            flag_msg = f"[{action.severity.upper()}] {action.inconsistency} (at: {action.location})"
            self._flags.append(flag_msg)
            return f"Inconsistency flagged ({action.severity}): {action.inconsistency}"

        elif isinstance(action, CrossReferencePrecedentAction):
            results = self.precedents.search(
                query=action.query,
                jurisdiction=action.jurisdiction,
                crime_category=action.crime_category,
            )
            self._retrieved_precedents.extend(results)
            if results:
                return "Precedents found:\n" + "\n".join(f"  • {r}" for r in results)
            return "No directly applicable precedents found in database."

        elif isinstance(action, ComputeStatutoryEligibilityAction):
            # B9: NDPS cases get Section 37 response instead of threshold arithmetic
            if _is_ndps_case(self._episode):
                return (
                    f"Statutory Eligibility Analysis:\n"
                    f"  Sections: {', '.join(action.sections_invoked)}\n"
                    f"  Special Law: NDPS Act applies\n"
                    f"  Section: Section 37 NDPS Act\n"
                    f"  Message: NDPS Section 37 applies. Standard custody threshold not applicable. "
                    f"Bail requires twin conditions under Section 37(1)(b): "
                    f"(i) reasonable grounds to believe accused is not guilty, "
                    f"(ii) no reasonable opportunity to commit offence if released. "
                    f"These are matters for judicial discretion, not statutory calculation.\n"
                    f"  → ELIGIBLE FOR DEFAULT BAIL: NOT APPLICABLE (NDPS twin conditions govern)"
                )

            half_months = (action.max_sentence_years * 12) / 2
            eligible = action.custody_months >= half_months and not action.special_law_applicable
            pct = round((action.custody_months / (action.max_sentence_years * 12)) * 100, 1) if action.max_sentence_years else 0
            return (
                f"Statutory Eligibility Analysis:\n"
                f"  Sections: {', '.join(action.sections_invoked)}\n"
                f"  Max Sentence: {action.max_sentence_years} years ({action.max_sentence_years*12:.0f} months)\n"
                f"  Threshold (50%): {half_months:.1f} months\n"
                f"  Time Served: {action.custody_months} months ({pct}%)\n"
                f"  Special Law: {'Yes — default bail restricted' if action.special_law_applicable else 'No'}\n"
                f"  → ELIGIBLE FOR DEFAULT BAIL: {'YES ✓' if eligible else 'NO ✗'}"
            )

        elif isinstance(action, AssessSuretyAction):
            feasible = action.proposed_amount <= (action.income_estimate or 50000) * 3
            income_str = f"₹{action.income_estimate:,}/month" if action.income_estimate is not None else "Not provided"
            return (
                f"Surety Assessment:\n"
                f"  Proposed Amount: ₹{action.proposed_amount:,}\n"
                f"  Accused Occupation: {action.accused_occupation}\n"
                f"  Income Estimate: {income_str}\n"
                f"  → {'FINANCIALLY FEASIBLE ✓' if feasible else 'AMOUNT MAY BE EXCESSIVE — consider reduction'}"
            )

        elif isinstance(action, ClassifyBailTypeAction):
            pros_count = len(action.grounds_against)
            def_count  = len(action.grounds_for)
            if def_count > pros_count:
                suggestion = "Conditional Bail (grounds for bail outweigh grounds against)"
            elif pros_count > def_count:
                suggestion = "Bail Denial (grounds against outweigh grounds for bail)"
            else:
                suggestion = "Contested — full assessment required"
            return (
                f"Bail Type Classification:\n"
                f"  Grounds FOR bail ({def_count}): {'; '.join(action.grounds_for[:3])}\n"
                f"  Grounds AGAINST bail ({pros_count}): {'; '.join(action.grounds_against[:3])}\n"
                f"  → Preliminary classification: {suggestion}"
            )

        elif isinstance(action, ReadSubmissionsAction):
            ep = self._episode
            lines = []
            if action.party in ("prosecution", "both"):
                pros = ep.get("prosecution_arguments", [])
                lines.append("── Prosecution Submissions ──")
                lines += [f"  {i+1}. {a}" for i, a in enumerate(pros)] or ["  (none on record)"]
            if action.party in ("defence", "both"):
                defence = ep.get("defence_arguments", [])
                lines.append("── Defence Submissions ──")
                lines += [f"  {i+1}. {a}" for i, a in enumerate(defence)] or ["  (none on record)"]
            if action.focus:
                lines.append(f"\n[Focus filter: '{action.focus}' — review above for relevance]")
            return "\n".join(lines)

        elif isinstance(action, AssessFlightRiskAction):
            score = 0
            reasons = []
            severity_map = {"minor": 0, "moderate": 1, "serious": 2, "heinous": 3}
            score += severity_map.get(action.severity_of_offence, 1)
            reasons.append(f"Offence severity ({action.severity_of_offence}): +{severity_map.get(action.severity_of_offence, 1)}")
            if action.prior_absconding:
                score += 3; reasons.append("Prior absconding on record: +3")
            if action.passport_status and action.passport_status not in ("surrendered", "impounded"):
                score += 2; reasons.append(f"Passport status ({action.passport_status}): +2")
            if action.roots_in_community:
                score -= 1; reasons.append("Community roots present: −1")
            if score <= 1:   verdict = "Low"
            elif score <= 3: verdict = "Medium"
            else:            verdict = "High"
            return (
                "Flight Risk Assessment:\n"
                + "\n".join(f"  {r}" for r in reasons)
                + f"\n  Total score: {score}"
                + f"\n  → FLIGHT RISK: {verdict}"
            )

        elif isinstance(action, CheckCaseFactorsAction):
            ep = self._episode
            gt = ep.get("ground_truth", {})
            results = []
            for factor in action.factors_to_check:
                f = factor.lower()
                if "offence" in f or "nature" in f:
                    results.append(f"nature_of_offence: {ep.get('crime_type', 'unknown')} — sections: {', '.join(ep.get('ipc_sections', ['n/a']))}")
                elif "prior" in f or "history" in f or "criminal" in f:
                    results.append(f"criminal_history: {ep.get('accused_profile', {}).get('prior_cases', 'no prior cases on record')}")
                elif "co_accused" in f or "parity" in f:
                    parity = gt.get("parity_argument_used", False)
                    results.append(f"co_accused_parity_argument: {'YES — HC relied on parity reasoning' if parity else 'Not applicable in this case'}")
                elif "evidence" in f or "tampering" in f:
                    results.append(f"evidence_tampering_risk: {'flagged' if self._flags else 'no inconsistencies flagged yet — run flag_inconsistency if needed'}")
                elif "victim" in f or "vulnerability" in f:
                    results.append(f"victim_vulnerability: assess from charge sheet — crime_type is {ep.get('crime_type', 'unknown')}")
                else:
                    results.append(f"{factor}: case record does not have a structured field for this — review charge sheet.")
            return "Case Factors Examined:\n" + "\n".join(f"  • {r}" for r in results)

        elif isinstance(action, ApplyProportionalityAction):
            max_months = action.max_sentence_years * 12
            pct = round((action.custody_months / max_months) * 100, 1) if max_months else 0
            bnss_threshold = max_months / 2
            over_threshold = action.custody_months >= bnss_threshold
            lines = [
                "Proportionality Analysis (BNSS 479 / former CrPC 436A):",
                f"  Custody to date:    {action.custody_months:.1f} months",
                f"  Max sentence:       {action.max_sentence_years} years ({max_months:.0f} months)",
                f"  BNSS 479 threshold: {bnss_threshold:.1f} months (50%)",
                f"  Time served:        {pct}% of maximum sentence",
                f"  → Threshold crossed: {'YES — default bail right accrued' if over_threshold else 'NO — threshold not yet met'}",
            ]
            if action.expected_trial_months:
                remaining = action.expected_trial_months
                lines.append(f"  Estimated trial completion: {remaining:.0f} more months")
                if remaining > (max_months - action.custody_months):
                    lines.append("  ⚠️  Projected total custody exceeds maximum sentence — strong proportionality argument for bail")
            return "\n".join(lines)

        elif isinstance(action, PullCriminalHistoryAction):
            ep      = self._episode
            profile = ep.get("accused_profile", {})
            prior   = profile.get("prior_cases") or "None"
            bail_type = profile.get("bail_type", "Unknown")

            # 5C.5 fix: parse unstructured prior_cases text into structured output
            prior_lower = prior.lower().strip()
            is_clean    = prior_lower in ("none", "nil", "no prior", "no prior record", "none.", "")

            # Infer case count from text
            import re as _re
            nums = _re.findall(r'\b(\d+)\s+(?:prior|previous|case)', prior_lower)
            prior_count = int(nums[0]) if nums else (0 if is_clean else 1)

            # Infer conviction vs acquittal
            convicted  = any(kw in prior_lower for kw in ("convicted", "conviction", "sentenced"))
            acquitted  = any(kw in prior_lower for kw in ("acquitted", "acquittal", "discharged"))
            bail_viol  = any(kw in prior_lower for kw in ("absconded", "jumped bail", "bail cancelled"))

            lines = [
                "Criminal History Report (5C.5 — Structured):",
                f"  Raw record:         {prior}",
                f"  Prior cases:        {prior_count} {'(none)' if is_clean else '(see above)'}",
                f"  Conviction record:  {'YES' if convicted else 'NO (no conviction on record)'}",
                f"  Acquittal record:   {'YES' if acquitted else 'NO'}",
                f"  Bail violation:     {'YES ⚠️' if bail_viol else 'NO'}",
                f"  Bail type context:  {bail_type}",
            ]
            if action.include_bail_history:
                parity = ep.get("ground_truth", {}).get("parity_argument_used", False)
                lines.append(
                    f"  Prior bail history: "
                    f"{'Co-accused parity argument on record' if parity else 'No co-accused parity argument on record'}"
                )

            classification = (
                "FIRST-TIME OFFENDER ✓" if is_clean
                else "REPEAT OFFENDER — has prior record"
                + (" | BAIL VIOLATION on record ⚠️" if bail_viol else "")
            )
            lines.append(f"  → Classification: {classification}")
            return "\n".join(lines)

        return f"Unknown action type: {type(action).__name__}"

    # ------------------------------------------------------------------
    # Helpers
    # ------------------------------------------------------------------

    def _make_observation(
        self,
        action_result: Optional[str] = None,
        memo_submitted: bool = False,
    ) -> CaseObservation:
        ep = self._episode
        profile_data = ep.get("accused_profile", {})
        profile = AccusedProfile(
            name    = profile_data.get("name", "Unknown"),
            gender  = profile_data.get("gender", "Unknown"),
            occupation = profile_data.get("occupation"),
            region  = profile_data.get("region"),
            prior_cases = profile_data.get("prior_cases"),
            bail_type = profile_data.get("bail_type"),
        )
        init_precedents = self.precedents.get_initial_precedents(ep)

        return CaseObservation(
            case_id   = ep.get("case_id", ""),
            case_title = ep.get("case_title", ""),
            charge_sheet = ep.get("charge_sheet", ""),
            ipc_sections = ep.get("ipc_sections", []),
            crime_type = ep.get("crime_type", ""),
            court     = ep.get("court", ""),
            date      = ep.get("date", ""),
            accused_profile     = profile,
            prosecution_arguments = ep.get("prosecution_arguments", []),
            defence_arguments   = ep.get("defence_arguments", []),
            legal_issues        = ep.get("legal_principles", []),
            cited_precedents    = init_precedents + self._retrieved_precedents,
            documents_available = ep.get("documents_available", []),
            action_result       = action_result,
            action_history      = list(self._action_history),  # Gap 4
            flags_raised        = list(self._flags),
            precedents_retrieved = list(self._retrieved_precedents),
            memo_submitted      = memo_submitted,
            step_count          = self._step_count,
            schema_variant      = ep.get("schema_variant", "standard"),
        )

    def _format_memo_result(self, memo: SubmitMemoAction, reward: Dict[str, Any]) -> str:
        lines = [
            "═══ BAIL ASSESSMENT MEMO SUBMITTED ═══",
            f"Recommended Outcome:  {memo.recommended_outcome}",
            f"Flight Risk:          {memo.flight_risk}",
            f"Statutory Eligible:   {'Yes' if memo.statutory_eligible else 'No'}",
            f"Confidence:           {memo.confidence}",
            "",
            "── Reward Breakdown ──",
            f"  Outcome Match:        {reward['outcome_match']:.2f} × 0.40",
            f"  Flight Risk Accuracy: {reward['flight_risk_accuracy']:.2f} × 0.20",
            f"  Statutory Accuracy:   {reward['statutory_accuracy']:.2f} × 0.20",
            f"  Condition Score:      {reward['condition_appropriateness']:.2f} × 0.20",
            f"  Bias Penalty:       − {reward['bias_penalty']:.2f} × 0.30",
            f"  ─────────────────────────────────",
            f"  TOTAL REWARD:         {reward['total_reward']:.4f}",
            "",
            f"Ground Truth: {reward['ground_truth_outcome']}",
        ]
        return "\n".join(lines)