File size: 3,800 Bytes
83bce40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import argparse
import json
import shutil
from pathlib import Path


def resolve_image_path(image_value: str, image_dir: str | None) -> Path | None:
    if not image_value:
        return None

    path = Path(image_value)
    if path.is_absolute():
        return path

    if image_dir:
        candidate = Path(image_dir) / path
        if candidate.exists():
            return candidate

    return path


def build_layers(bboxes: list) -> list:
    layers = []
    for i, bbox in enumerate(bboxes):
        if not isinstance(bbox, (list, tuple)) or len(bbox) < 4:
            continue

        x0, y0, x1, y1 = [int(float(value)) for value in bbox[:4]]
        x0, x1 = min(x0, x1), max(x0, x1)
        y0, y1 = min(y0, y1), max(y0, y1)
        layers.append({
            "layer_idx": i,
            "box": [x0, y0, x1, y1],
            "width_dst": x1 - x0,
            "height_dst": y1 - y0,
        })
    return layers


def convert(
    input_path: str,
    output_path: str,
    canvas_size: int = 1024,
    image_dir: str | None = None,
    materialize_data_dir: str | None = None,
):
    converted_count = 0
    materialize_root = Path(materialize_data_dir) if materialize_data_dir else None

    with open(input_path, "r", encoding="utf-8") as fin, \
         open(output_path, "w", encoding="utf-8") as fout:
        for line in fin:
            line = line.strip()
            if not line:
                continue
            vlm = json.loads(line)

            sample_name = (
                vlm.get("sample_or_stem")
                or vlm.get("sample_dir")
                or Path(vlm.get("image", f"sample_{converted_count:06d}")).stem
            )
            image_path = resolve_image_path(vlm.get("image", ""), image_dir)
            layers = build_layers(vlm.get("bboxes", []))

            sample_dir = sample_name
            blend_path = str(image_path) if image_path else ""

            if materialize_root and image_path and image_path.exists():
                sample_path = materialize_root / sample_name
                sample_path.mkdir(parents=True, exist_ok=True)
                whole_image_path = sample_path / "whole_image.png"
                shutil.copyfile(image_path, whole_image_path)
                sample_dir = sample_name
                blend_path = str(whole_image_path)

            record = {
                "sample_dir": sample_dir,
                "whole_caption": vlm.get("whole_caption", ""),
                "layer_count": len(layers),
                "width": canvas_size,
                "height": canvas_size,
                "layers": layers,
            }
            if blend_path:
                # prism_infer.py falls back to blend_path when sample_dir/whole_image.png is absent.
                record["blend_path"] = blend_path

            fout.write(json.dumps(record, ensure_ascii=False) + "\n")
            converted_count += 1

    print(f"Converted {converted_count} samples: {input_path} -> {output_path}")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Convert VLM JSONL to inference-compatible format")
    parser.add_argument("--input", "-i", type=str, required=True)
    parser.add_argument("--output", "-o", type=str, required=True)
    parser.add_argument("--canvas_size", type=int, default=1024)
    parser.add_argument("--image_dir", type=str, default=None)
    parser.add_argument(
        "--materialize_data_dir",
        type=str,
        default=None,
        help="Optional output data dir. Copies each VLM image to sample_dir/whole_image.png for infer.py.",
    )
    args = parser.parse_args()
    convert(
        args.input,
        args.output,
        args.canvas_size,
        image_dir=args.image_dir,
        materialize_data_dir=args.materialize_data_dir,
    )