synlayers / infer /convert_vlm_jsonl.py

Upload infer/convert_vlm_jsonl.py with huggingface_hub

83bce40 verified about 17 hours ago

3.8 kB

	import argparse
	import json
	import shutil
	from pathlib import Path


	def resolve_image_path(image_value: str, image_dir: str \| None) -> Path \| None:
	if not image_value:
	return None

	path = Path(image_value)
	if path.is_absolute():
	return path

	if image_dir:
	candidate = Path(image_dir) / path
	if candidate.exists():
	return candidate

	return path


	def build_layers(bboxes: list) -> list:
	layers = []
	for i, bbox in enumerate(bboxes):
	if not isinstance(bbox, (list, tuple)) or len(bbox) < 4:
	continue

	x0, y0, x1, y1 = [int(float(value)) for value in bbox[:4]]
	x0, x1 = min(x0, x1), max(x0, x1)
	y0, y1 = min(y0, y1), max(y0, y1)
	layers.append({
	"layer_idx": i,
	"box": [x0, y0, x1, y1],
	"width_dst": x1 - x0,
	"height_dst": y1 - y0,
	})
	return layers


	def convert(
	input_path: str,
	output_path: str,
	canvas_size: int = 1024,
	image_dir: str \| None = None,
	materialize_data_dir: str \| None = None,
	):
	converted_count = 0
	materialize_root = Path(materialize_data_dir) if materialize_data_dir else None

	with open(input_path, "r", encoding="utf-8") as fin, \
	open(output_path, "w", encoding="utf-8") as fout:
	for line in fin:
	line = line.strip()
	if not line:
	continue
	vlm = json.loads(line)

	sample_name = (
	vlm.get("sample_or_stem")
	or vlm.get("sample_dir")
	or Path(vlm.get("image", f"sample_{converted_count:06d}")).stem
	)
	image_path = resolve_image_path(vlm.get("image", ""), image_dir)
	layers = build_layers(vlm.get("bboxes", []))

	sample_dir = sample_name
	blend_path = str(image_path) if image_path else ""

	if materialize_root and image_path and image_path.exists():
	sample_path = materialize_root / sample_name
	sample_path.mkdir(parents=True, exist_ok=True)
	whole_image_path = sample_path / "whole_image.png"
	shutil.copyfile(image_path, whole_image_path)
	sample_dir = sample_name
	blend_path = str(whole_image_path)

	record = {
	"sample_dir": sample_dir,
	"whole_caption": vlm.get("whole_caption", ""),
	"layer_count": len(layers),
	"width": canvas_size,
	"height": canvas_size,
	"layers": layers,
	}
	if blend_path:
	# prism_infer.py falls back to blend_path when sample_dir/whole_image.png is absent.
	record["blend_path"] = blend_path

	fout.write(json.dumps(record, ensure_ascii=False) + "\n")
	converted_count += 1

	print(f"Converted {converted_count} samples: {input_path} -> {output_path}")


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Convert VLM JSONL to inference-compatible format")
	parser.add_argument("--input", "-i", type=str, required=True)
	parser.add_argument("--output", "-o", type=str, required=True)
	parser.add_argument("--canvas_size", type=int, default=1024)
	parser.add_argument("--image_dir", type=str, default=None)
	parser.add_argument(
	"--materialize_data_dir",
	type=str,
	default=None,
	help="Optional output data dir. Copies each VLM image to sample_dir/whole_image.png for infer.py.",
	)
	args = parser.parse_args()
	convert(
	args.input,
	args.output,
	args.canvas_size,
	image_dir=args.image_dir,
	materialize_data_dir=args.materialize_data_dir,
	)