| model: |
| transport: |
| target: tim.schedulers.transports.OT_FM |
| params: |
| P_mean: 0.0 |
| P_std: 1.6 |
| sigma_d: 1.0 |
| unified_dcm_loss: |
| diffusion_ratio: 0.5 |
| consistency_ratio: 0.1 |
| derivative_type: dde |
| differential_epsilon: 0.005 |
| weight_time_type: sqrt |
| weight_time_tangent: True |
| network: |
| target: tim.models.t2i.tim_model.TiM |
| params: |
| input_size: 16 |
| patch_size: 1 |
| in_channels: 32 |
| depth: 28 |
| hidden_size: 1152 |
| cap_feat_dim: 1152 |
| num_heads: 16 |
| encoder_depth: 8 |
| qk_norm: True |
| z_dim: 768 |
| new_condition: t-r |
| use_new_embed: True |
| distance_aware: True |
| lora_hidden_size: 384 |
| |
| vae_dir: mit-han-lab/dc-ae-f32c32-sana-1.1-diffusers |
| |
| text_encoder_dir: google/gemma-3-1b-it |
| proportion_empty_prompts: 0.1 |
| use_last_hidden_state: True |
| max_seq_length: 256 |
| |
| enc_dir: checkpoints/radio/radio-v2.5-b_half.pth.tar |
| proj_coeff: 1.0 |
| |
| use_ema: True |
| ema_decay: 0.9999 |
| |
| data: |
| data_type: image_ms |
| dataset: |
| root_dir: datasets/t2i_toy_dataset |
| packed_json: datasets/t2i_toy_dataset/bucket_sampler.json |
| jsonl_dir: datasets/t2i_toy_dataset/data_info.jsonl |
| dataloader: |
| num_workers: 4 |
| batch_size: 128 |
|
|
| |
| training: |
| tracker: null |
| max_train_steps: 500000 |
| checkpointing_steps: 1000 |
| checkpoints_total_limit: 2 |
| resume_from_checkpoint: latest |
| learning_rate: 1.0e-4 |
| learning_rate_base_batch_size: 512 |
| scale_lr: True |
| lr_scheduler: constant |
| lr_warmup_steps: 0 |
| gradient_accumulation_steps: 1 |
| optimizer: |
| target: torch.optim.AdamW |
| params: |
| |
| betas: [0.9, 0.95] |
| weight_decay: 1.0e-2 |
| eps: 1.0e-6 |
| max_grad_norm: 1.0 |
| proportion_empty_prompts: 0.0 |
| mixed_precision: bf16 |
| allow_tf32: True |
| validation_steps: 500 |
| checkpoint_list: [100000, 200000, 300000, 400000] |
|
|