synlayers / infer /convert_vlm_jsonl.py
SynLayers's picture
Upload infer/convert_vlm_jsonl.py with huggingface_hub
83bce40 verified
import argparse
import json
import shutil
from pathlib import Path
def resolve_image_path(image_value: str, image_dir: str | None) -> Path | None:
if not image_value:
return None
path = Path(image_value)
if path.is_absolute():
return path
if image_dir:
candidate = Path(image_dir) / path
if candidate.exists():
return candidate
return path
def build_layers(bboxes: list) -> list:
layers = []
for i, bbox in enumerate(bboxes):
if not isinstance(bbox, (list, tuple)) or len(bbox) < 4:
continue
x0, y0, x1, y1 = [int(float(value)) for value in bbox[:4]]
x0, x1 = min(x0, x1), max(x0, x1)
y0, y1 = min(y0, y1), max(y0, y1)
layers.append({
"layer_idx": i,
"box": [x0, y0, x1, y1],
"width_dst": x1 - x0,
"height_dst": y1 - y0,
})
return layers
def convert(
input_path: str,
output_path: str,
canvas_size: int = 1024,
image_dir: str | None = None,
materialize_data_dir: str | None = None,
):
converted_count = 0
materialize_root = Path(materialize_data_dir) if materialize_data_dir else None
with open(input_path, "r", encoding="utf-8") as fin, \
open(output_path, "w", encoding="utf-8") as fout:
for line in fin:
line = line.strip()
if not line:
continue
vlm = json.loads(line)
sample_name = (
vlm.get("sample_or_stem")
or vlm.get("sample_dir")
or Path(vlm.get("image", f"sample_{converted_count:06d}")).stem
)
image_path = resolve_image_path(vlm.get("image", ""), image_dir)
layers = build_layers(vlm.get("bboxes", []))
sample_dir = sample_name
blend_path = str(image_path) if image_path else ""
if materialize_root and image_path and image_path.exists():
sample_path = materialize_root / sample_name
sample_path.mkdir(parents=True, exist_ok=True)
whole_image_path = sample_path / "whole_image.png"
shutil.copyfile(image_path, whole_image_path)
sample_dir = sample_name
blend_path = str(whole_image_path)
record = {
"sample_dir": sample_dir,
"whole_caption": vlm.get("whole_caption", ""),
"layer_count": len(layers),
"width": canvas_size,
"height": canvas_size,
"layers": layers,
}
if blend_path:
# prism_infer.py falls back to blend_path when sample_dir/whole_image.png is absent.
record["blend_path"] = blend_path
fout.write(json.dumps(record, ensure_ascii=False) + "\n")
converted_count += 1
print(f"Converted {converted_count} samples: {input_path} -> {output_path}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Convert VLM JSONL to inference-compatible format")
parser.add_argument("--input", "-i", type=str, required=True)
parser.add_argument("--output", "-o", type=str, required=True)
parser.add_argument("--canvas_size", type=int, default=1024)
parser.add_argument("--image_dir", type=str, default=None)
parser.add_argument(
"--materialize_data_dir",
type=str,
default=None,
help="Optional output data dir. Copies each VLM image to sample_dir/whole_image.png for infer.py.",
)
args = parser.parse_args()
convert(
args.input,
args.output,
args.canvas_size,
image_dir=args.image_dir,
materialize_data_dir=args.materialize_data_dir,
)