"""IRIS Training utilities: synthetic dataset and scheduler.""" import torch import torch.nn as nn import torch.nn.functional as F from torch.utils.data import Dataset, DataLoader import math import time import os from .model import IRIS from .flow_matching import flow_matching_loss, euler_sample, DCAE_F32C32_SCALE class SyntheticLatentDataset(Dataset): """Generates synthetic latent/text pairs for testing training stability.""" def __init__(self, num_samples=10000, latent_channels=32, latent_size=16, text_dim=512, text_length=32, seed=42): self.num_samples = num_samples gen = torch.Generator().manual_seed(seed) self.latents = torch.randn(num_samples, latent_channels, latent_size, latent_size, generator=gen) * 2.5 self.text_embeds = torch.randn(num_samples, text_length, text_dim, generator=gen) self.text_embeds = F.normalize(self.text_embeds, dim=-1) * math.sqrt(text_dim) def __len__(self): return self.num_samples def __getitem__(self, idx): return {"latent": self.latents[idx], "text_embed": self.text_embeds[idx]} class CosineWarmupScheduler: """Cosine decay with linear warmup.""" def __init__(self, optimizer, warmup_steps, total_steps, min_lr_ratio=0.1): self.optimizer = optimizer self.warmup_steps = warmup_steps self.total_steps = total_steps self.min_lr_ratio = min_lr_ratio self.base_lrs = [pg["lr"] for pg in optimizer.param_groups] self.step_count = 0 def step(self): self.step_count += 1 if self.step_count <= self.warmup_steps: scale = self.step_count / max(1, self.warmup_steps) else: progress = (self.step_count - self.warmup_steps) / max(1, self.total_steps - self.warmup_steps) scale = self.min_lr_ratio + (1 - self.min_lr_ratio) * 0.5 * (1 + math.cos(math.pi * progress)) for pg, base_lr in zip(self.optimizer.param_groups, self.base_lrs): pg["lr"] = base_lr * scale def get_lr(self): return [pg["lr"] for pg in self.optimizer.param_groups]