File size: 1,477 Bytes
6a82282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
"""doc_id helpers for specialist outputs.

Specialists emit chat messages with `role="document <doc_id>"` and a
content body. Both Granite paths (Ollama Modelfile + vLLM HF template
via app/llm.py) consume that shape. These helpers keep doc_id strings
consistent across experiments so the reconciler's `[doc_id]` regex
finds them.
"""

from __future__ import annotations

import re
from typing import Any

# doc_id syntax mirrors the existing production layers: lowercase, snake
# case, alphanumerics + underscores. The Mellea citations regex is
# `\[(?P<id>[a-z][a-z0-9_]*)\]` — anything that doesn't match is invisible
# to validation.
_VALID = re.compile(r"^[a-z][a-z0-9_]*$")


def make_doc(doc_id: str, body: str) -> dict[str, str]:
    if not _VALID.match(doc_id):
        raise ValueError(
            f"doc_id {doc_id!r} must match [a-z][a-z0-9_]* "
            "to be visible to the Mellea citations check"
        )
    return {"role": f"document {doc_id}", "content": body}


def render_kv_body(rows: list[tuple[str, Any]]) -> str:
    """Render a list of (label, value) tuples as a compact key:value
    body suitable for a `document <doc_id>` content payload. Granite
    grounds well against this format."""
    out = []
    for label, val in rows:
        if val is None or val == "":
            continue
        if isinstance(val, float):
            val = f"{val:.3f}".rstrip("0").rstrip(".")
        out.append(f"{label}: {val}")
    return "\n".join(out)