File size: 1,862 Bytes
54627d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd61817
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""Cache layout for B1 dataset loaders.

One subdir per dataset under ``CI_TRIAGE_DATA_CACHE`` (default
``data_artifacts/datasets_cache``); each ``FailureRecord`` is written as
``<record_id>.json``. Re-loading is content-addressable: same ``record_id``
overwrites the same file, so re-running ``cli load`` is idempotent.

The cache dir is gitignored (see ``.gitignore`` for ``data_artifacts/*``);
only the *generated scenarios* (Phase B5) are committed / published.
"""

from __future__ import annotations

import os
from collections.abc import Iterator
from pathlib import Path

DEFAULT_CACHE_ROOT = Path("data_artifacts/datasets_cache")


def cache_root() -> Path:
    return Path(os.environ.get("CI_TRIAGE_DATA_CACHE", str(DEFAULT_CACHE_ROOT)))


def cache_dir_for(dataset_name: str) -> Path:
    return cache_root() / dataset_name


def is_cached(record_id: str, dataset_name: str) -> bool:
    return (cache_dir_for(dataset_name) / f"{record_id}.json").exists()


def load_cached(dataset_name: str) -> Iterator:
    """Yield cached ``FailureRecord``s for a dataset, if any."""
    from ci_triage_env.data.datasets._base import FailureRecord  # circular guard

    target = cache_dir_for(dataset_name)
    if not target.exists():
        return
    for path in sorted(target.glob("*.json")):
        yield FailureRecord.model_validate_json(path.read_text())


def load_all_cached() -> list:
    """Return all cached ``FailureRecord``s across every dataset sub-directory."""
    from ci_triage_env.data.datasets._base import FailureRecord  # circular guard

    root = cache_root()
    records = []
    if not root.exists():
        return records
    for json_path in sorted(root.rglob("*.json")):
        try:
            records.append(FailureRecord.model_validate_json(json_path.read_text()))
        except Exception:
            pass
    return records