Spaces:

SynLayers
/

synlayers

Running on Zero

App Files Files Community

synlayers / tools /dataset.py

SynLayers

Upload tools/dataset.py with huggingface_hub

6cbd779 verified 7 days ago

raw

history blame

17.7 kB

	import json
	import os
	import numpy as np
	import torch
	from PIL import Image
	from torch.utils.data import Dataset
	from datasets import load_dataset, concatenate_datasets
	import torchvision.transforms as T
	from collections import defaultdict


	def collate_fn(batch):
	pixels_RGBA = [torch.stack(item["pixel_RGBA"]) for item in batch] # [L, C, H, W]
	pixels_RGB = [torch.stack(item["pixel_RGB"]) for item in batch] # [L, C, H, W]
	pixels_RGBA = torch.stack(pixels_RGBA) # [B, L, C, H, W]
	pixels_RGB = torch.stack(pixels_RGB) # [B, L, C, H, W]

	return {
	"pixel_RGBA": pixels_RGBA,
	"pixel_RGB": pixels_RGB,
	"whole_img": [item["whole_img"] for item in batch],
	"caption": [item["caption"] for item in batch],
	"height": [item["height"] for item in batch],
	"width": [item["width"] for item in batch],
	"layout": [item["layout"] for item in batch],
	}

	class LayoutTrainDataset(Dataset):
	def __init__(self, data_dir, split="train"):
	full_dataset = load_dataset(
	"artplus/PrismLayersPro",
	cache_dir=data_dir,
	)
	full_dataset = concatenate_datasets(list(full_dataset.values()))

	if "style_category" not in full_dataset.column_names:
	raise ValueError("Dataset must contain a 'style_category' field to split by class.")

	categories = np.array(full_dataset["style_category"])
	category_to_indices = defaultdict(list)
	for i, cat in enumerate(categories):
	category_to_indices[cat].append(i)

	subsets = []
	for cat, indices in category_to_indices.items():
	total_len = len(indices)
	idx_90 = int(total_len * 0.9)
	idx_95 = int(total_len * 0.95)

	if split == "train":
	selected_idx = indices[:idx_90]
	elif split == "test":
	selected_idx = indices[idx_90:idx_95]
	elif split == "val":
	selected_idx = indices[idx_95:]
	else:
	raise ValueError("split must be 'train', 'val', or 'test'")

	subsets.append(full_dataset.select(selected_idx))

	self.dataset = concatenate_datasets(subsets)
	self.to_tensor = T.ToTensor()

	def __len__(self):
	return len(self.dataset)

	def __getitem__(self, idx):
	item = self.dataset[idx]

	def rgba2rgb(img_RGBA):
	img_RGB = Image.new("RGB", img_RGBA.size, (128, 128, 128))
	img_RGB.paste(img_RGBA, mask=img_RGBA.split()[3])
	return img_RGB

	def get_img(x):
	if isinstance(x, str):
	img_RGBA = Image.open(x).convert("RGBA")
	img_RGB = rgba2rgb(img_RGBA)
	else:
	img_RGBA = x.convert("RGBA")
	img_RGB = rgba2rgb(img_RGBA)
	return img_RGBA, img_RGB

	whole_img_RGBA, whole_img_RGB = get_img(item["whole_image"])
	whole_cap = item["whole_caption"]
	W, H = whole_img_RGBA.size
	base_layout = [0, 0, W, H] # xyxy with exclusive end coordinates

	layer_image_RGBA = [self.to_tensor(whole_img_RGBA)]
	layer_image_RGB = [self.to_tensor(whole_img_RGB)]
	layout = [base_layout]

	base_img_RGBA, base_img_RGB = get_img(item["base_image"])
	layer_image_RGBA.append(self.to_tensor(base_img_RGBA))
	layer_image_RGB.append(self.to_tensor(base_img_RGB))
	layout.append(base_layout)

	layer_count = item["layer_count"]
	for i in range(layer_count):
	key = f"layer_{i:02d}"
	img_RGBA, img_RGB = get_img(item[key])

	w0, h0, w1, h1 = item[f"{key}_box"]

	canvas_RGBA = Image.new("RGBA", (W, H), (0, 0, 0, 0))
	canvas_RGB = Image.new("RGB", (W, H), (128, 128, 128))

	W_img, H_img = w1 - w0, h1 - h0
	if img_RGBA.size != (W_img, H_img):
	img_RGBA = img_RGBA.resize((W_img, H_img), Image.BILINEAR)
	img_RGB = img_RGB.resize((W_img, H_img), Image.BILINEAR)

	canvas_RGBA.paste(img_RGBA, (w0, h0), img_RGBA)
	canvas_RGB.paste(img_RGB, (w0, h0))

	layer_image_RGBA.append(self.to_tensor(canvas_RGBA))
	layer_image_RGB.append(self.to_tensor(canvas_RGB))
	layout.append([w0, h0, w1, h1])

	return {
	"pixel_RGBA": layer_image_RGBA,
	"pixel_RGB": layer_image_RGB,
	"whole_img": whole_img_RGB,
	"caption": whole_cap,
	"height": H,
	"width": W,
	"layout": layout,
	}


	class LayoutDatasetFixedSplit(Dataset):
	"""
	HuggingFace PrismLayersPro with a fixed index-based split.
	Total 20,000 samples: train = [0, 19500), test = [19500, 20000).

	For test split, use start_index and max_samples to select a sub-range:
	start_index=200, max_samples=100 -> samples 019700-019799
	start_index=0, max_samples=100 -> samples 019500-019599
	"""

	TRAIN_END = 19500
	TOTAL = 20000

	def __init__(self, data_dir, split="train", start_index=0, max_samples=None):
	full_dataset = load_dataset(
	"artplus/PrismLayersPro",
	cache_dir=data_dir,
	)
	full_dataset = concatenate_datasets(list(full_dataset.values()))

	if split == "train":
	self.dataset = full_dataset.select(range(self.TRAIN_END))
	self.global_offset = 0
	elif split == "test":
	self.dataset = full_dataset.select(range(self.TRAIN_END, self.TOTAL))
	self.global_offset = self.TRAIN_END
	else:
	raise ValueError("split must be 'train' or 'test'")

	end_index = len(self.dataset)
	if max_samples is not None:
	end_index = min(start_index + max_samples, len(self.dataset))
	self.dataset = self.dataset.select(range(start_index, end_index))
	self.global_offset += start_index

	self.to_tensor = T.ToTensor()
	print(f"[INFO] LayoutDatasetFixedSplit: split={split}, "
	f"global range=[{self.global_offset}, {self.global_offset + len(self.dataset)}), "
	f"samples={len(self.dataset)}")

	def __len__(self):
	return len(self.dataset)

	def __getitem__(self, idx):
	item = self.dataset[idx]

	def rgba2rgb(img_RGBA):
	img_RGB = Image.new("RGB", img_RGBA.size, (128, 128, 128))
	img_RGB.paste(img_RGBA, mask=img_RGBA.split()[3])
	return img_RGB

	def get_img(x):
	if isinstance(x, str):
	img_RGBA = Image.open(x).convert("RGBA")
	else:
	img_RGBA = x.convert("RGBA")
	return img_RGBA, rgba2rgb(img_RGBA)

	whole_img_RGBA, whole_img_RGB = get_img(item["whole_image"])
	whole_cap = item["whole_caption"]
	W, H = whole_img_RGBA.size
	base_layout = [0, 0, W, H]

	layer_image_RGBA = [self.to_tensor(whole_img_RGBA)]
	layer_image_RGB = [self.to_tensor(whole_img_RGB)]
	layout = [base_layout]

	base_img_RGBA, base_img_RGB = get_img(item["base_image"])
	layer_image_RGBA.append(self.to_tensor(base_img_RGBA))
	layer_image_RGB.append(self.to_tensor(base_img_RGB))
	layout.append(base_layout)

	layer_count = item["layer_count"]
	for i in range(layer_count):
	key = f"layer_{i:02d}"
	img_RGBA, img_RGB = get_img(item[key])

	w0, h0, w1, h1 = item[f"{key}_box"]

	canvas_RGBA = Image.new("RGBA", (W, H), (0, 0, 0, 0))
	canvas_RGB = Image.new("RGB", (W, H), (128, 128, 128))

	W_img, H_img = w1 - w0, h1 - h0
	if img_RGBA.size != (W_img, H_img):
	img_RGBA = img_RGBA.resize((W_img, H_img), Image.BILINEAR)
	img_RGB = img_RGB.resize((W_img, H_img), Image.BILINEAR)

	canvas_RGBA.paste(img_RGBA, (w0, h0), img_RGBA)
	canvas_RGB.paste(img_RGB, (w0, h0))

	layer_image_RGBA.append(self.to_tensor(canvas_RGBA))
	layer_image_RGB.append(self.to_tensor(canvas_RGB))
	layout.append([w0, h0, w1, h1])

	return {
	"pixel_RGBA": layer_image_RGBA,
	"pixel_RGB": layer_image_RGB,
	"whole_img": whole_img_RGB,
	"caption": whole_cap,
	"height": H,
	"width": W,
	"layout": layout,
	}


	def prism_collate_fn(batch):
	"""Collate function for PrismBlendDataset."""
	pixels_RGBA = [torch.stack(item["pixel_RGBA"]) for item in batch]
	pixels_RGB = [torch.stack(item["pixel_RGB"]) for item in batch]
	pixels_RGBA = torch.stack(pixels_RGBA)
	pixels_RGB = torch.stack(pixels_RGB)

	return {
	"pixel_RGBA": pixels_RGBA,
	"pixel_RGB": pixels_RGB,
	"whole_img": [item["whole_img"] for item in batch],
	"caption": [item["caption"] for item in batch],
	"height": [item["height"] for item in batch],
	"width": [item["width"] for item in batch],
	"layout": [item["layout"] for item in batch],
	}


	class PrismBlendDataset(Dataset):
	"""
	Dataset for PrismLayersPro blended data.

	Loads from local directory structure (following PrismLayersPro convention):
	- data_dir/sample_XXXXXX/metadata.json
	- data_dir/sample_XXXXXX/whole_image.png
	- data_dir/sample_XXXXXX/base_image.png
	- data_dir/sample_XXXXXX/layer_00.png, layer_01.png, ...

	Boxes are in xyxy format: [x0, y0, x1, y1]
	All layer images have transparent backgrounds.
	"""

	def __init__(self, data_dir: str, jsonl_path: str = None, target_size: int = 512, split: str = "all", max_layer_num: int = None):
	self.data_dir = data_dir
	self.target_size = target_size
	self.max_layer_num = max_layer_num
	self.to_tensor = T.ToTensor()

	# Load samples
	if jsonl_path and os.path.exists(jsonl_path):
	self.samples = self._load_from_jsonl(jsonl_path)
	else:
	self.samples = self._load_from_directory(data_dir)

	# Filter samples exceeding max_layer_num (if specified)
	# Total layers = 2 (whole_image + base_image) + layer_count
	if max_layer_num is not None:
	original_count = len(self.samples)
	self.samples = [
	s for s in self.samples
	if (2 + s.get('layer_count', 0)) <= max_layer_num
	]
	filtered_count = original_count - len(self.samples)
	if filtered_count > 0:
	print(f"[INFO] Filtered {filtered_count} samples exceeding max_layer_num={max_layer_num}")

	# Split dataset (only if explicitly requested, default is "all" = use all samples)
	# Usually you have separate train/test datasets, so no splitting needed
	if split == "train_split":
	self.samples = self.samples[:int(len(self.samples) * 0.9)]
	elif split == "test_split":
	self.samples = self.samples[int(len(self.samples) * 0.9):int(len(self.samples) * 0.95)]
	elif split == "val_split":
	self.samples = self.samples[int(len(self.samples) * 0.95):]
	# "all", "train", "test" -> use all samples from the provided jsonl/directory

	def _load_from_jsonl(self, jsonl_path: str):
	"""Load samples from JSONL file."""
	samples = []
	with open(jsonl_path, 'r', encoding='utf-8') as f:
	for line in f:
	line = line.strip()
	if line:
	samples.append(json.loads(line))
	return samples

	def _load_from_directory(self, data_dir: str):
	"""Load samples from directory structure."""
	samples = []
	for name in sorted(os.listdir(data_dir)):
	sample_dir = os.path.join(data_dir, name)
	if os.path.isdir(sample_dir) and name.startswith('sample_'):
	metadata_path = os.path.join(sample_dir, 'metadata.json')
	#metadata_path = os.path.join(sample_dir, 'metadata_old.json') # old for original_1024.
	if os.path.exists(metadata_path):
	with open(metadata_path, 'r', encoding='utf-8') as f:
	samples.append(json.load(f))
	return samples

	def __len__(self):
	return len(self.samples)

	def _rgba2rgb(self, img_RGBA):
	"""Convert RGBA to RGB with gray background."""
	img_RGB = Image.new("RGB", img_RGBA.size, (128, 128, 128))
	img_RGB.paste(img_RGBA, mask=img_RGBA.split()[3])
	return img_RGB

	def _get_sample_dir(self, sample):
	"""Get the directory for a sample."""
	# Try sample_dir first
	sample_dir = sample.get('sample_dir', '')
	if sample_dir:
	full_path = os.path.join(self.data_dir, sample_dir)
	if os.path.exists(full_path):
	return full_path

	return None

	def __getitem__(self, idx):
	sample = self.samples[idx]
	sample_dir = self._get_sample_dir(sample)

	if not sample_dir:
	raise ValueError(f"Could not find sample directory for index {idx}")

	source_size = sample.get('width', self.target_size)
	caption = sample.get('whole_caption', '')

	# Scale factor (source -> target)
	scale = self.target_size / source_size

	# Load whole_image (composite)
	whole_img_path = os.path.join(sample_dir, 'whole_image.png')
	if os.path.exists(whole_img_path):
	whole_img = Image.open(whole_img_path).convert('RGBA')
	else:
	whole_img = Image.new('RGBA', (source_size, source_size), (128, 128, 128, 255))

	# Resize if needed
	if whole_img.size != (self.target_size, self.target_size):
	whole_img = whole_img.resize((self.target_size, self.target_size), Image.LANCZOS)

	whole_img_RGB = self._rgba2rgb(whole_img)

	# Initialize layer lists with whole_image first
	layer_image_RGBA = [self.to_tensor(whole_img)]
	layer_image_RGB = [self.to_tensor(whole_img_RGB)]

	# Base layout (whole image) in xyxy format [x0, y0, x1, y1]
	W, H = self.target_size, self.target_size
	base_layout = [0, 0, W, H] # xyxy with exclusive end coordinates
	layout = [base_layout]

	# Load base_image (background) as second layer
	base_img_path = os.path.join(sample_dir, 'base_image.png')
	if os.path.exists(base_img_path):
	base_img = Image.open(base_img_path).convert('RGBA')
	if base_img.size != (self.target_size, self.target_size):
	base_img = base_img.resize((self.target_size, self.target_size), Image.LANCZOS)
	else:
	base_img = Image.new('RGBA', (self.target_size, self.target_size), (0, 0, 0, 0))

	base_img_RGB = self._rgba2rgb(base_img)
	layer_image_RGBA.append(self.to_tensor(base_img))
	layer_image_RGB.append(self.to_tensor(base_img_RGB))
	layout.append(base_layout) # background covers whole image

	# Load layers from metadata
	layers = sample.get('layers', [])

	for layer_info in layers:
	image_path = layer_info.get('image_path', '')
	box = layer_info.get('box', [0, 0, source_size, source_size])

	# Scale box (xyxy format)
	x0, y0, x1, y1 = box
	scaled_box = [
	int(x0 * scale),
	int(y0 * scale),
	int(x1 * scale),
	int(y1 * scale)
	]

	# Load layer image
	# Handles two formats:
	# 1. Full-canvas (target_size x target_size) — use as-is
	# 2. Cropped (smaller than canvas) — place at bbox position on transparent canvas
	layer_path = os.path.join(sample_dir, image_path)
	if os.path.exists(layer_path):
	layer_img = Image.open(layer_path).convert('RGBA')
	if layer_img.size == (self.target_size, self.target_size):
	# Already full-canvas, use directly
	pass
	elif layer_img.size == (source_size, source_size) and source_size != self.target_size:
	# Full-canvas at source resolution, just resize
	layer_img = layer_img.resize((self.target_size, self.target_size), Image.LANCZOS)
	else:
	# Cropped layer — resize to fit the scaled bbox and place on canvas
	bw = max(1, scaled_box[2] - scaled_box[0])
	bh = max(1, scaled_box[3] - scaled_box[1])
	layer_resized = layer_img.resize((bw, bh), Image.LANCZOS)
	layer_img = Image.new('RGBA', (self.target_size, self.target_size), (0, 0, 0, 0))
	layer_img.paste(layer_resized, (scaled_box[0], scaled_box[1]), layer_resized)
	else:
	layer_img = Image.new('RGBA', (self.target_size, self.target_size), (0, 0, 0, 0))

	layer_img_RGB = self._rgba2rgb(layer_img)

	layer_image_RGBA.append(self.to_tensor(layer_img))
	layer_image_RGB.append(self.to_tensor(layer_img_RGB))
	layout.append(scaled_box)

	return {
	"pixel_RGBA": layer_image_RGBA,
	"pixel_RGB": layer_image_RGB,
	"whole_img": whole_img_RGB,
	"caption": caption,
	"height": H,
	"width": W,
	"layout": layout,
	}