# Byte Dream Configuration

model:
  name: "Byte Dream"
  version: "1.0.0"
  
  # Model architecture parameters (optimized for <10GB)
  unet:
    in_channels: 4
    out_channels: 4
    block_out_channels: [128, 256, 512, 512]
    layers_per_block: 1
    attention_head_dim: 4
    cross_attention_dim: 512  # Match CLIP ViT-B/32 output dimension
    use_linear_projection: false
    
  scheduler:
    name: "DDIM"  # Options: DDIM, PNDM, LMSDiscrete, EulerDiscrete
    num_train_timesteps: 1000
    beta_start: 0.00085
    beta_end: 0.012
    beta_schedule: "scaled_linear"
    clip_sample: false
    set_alpha_to_one: false
    
  vae:
    in_channels: 3
    out_channels: 3
    down_block_types: ["DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D"]
    up_block_types: ["UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D"]
    latent_channels: 4
    sample_size: 512
    # Reduced channels for smaller model
    block_out_channels: [64, 128, 256, 256]
    
  text_encoder:
    model: "openai/clip-vit-base-patch32"
    max_length: 77

# Generation parameters
generation:
  width: 512
  height: 512
  num_inference_steps: 50
  guidance_scale: 7.5
  negative_prompt: "ugly, blurry, low quality, distorted, deformed"
  seed: null  # null for random, or set integer
  
# CPU Optimization
cpu_optimization:
  use_openvino: false
  use_onnx: false
  precision: "fp32"  # fp32 or fp16
  threads: -1  # -1 for all available threads
  memory_limit: null  # null for auto, or MB value
  
# Memory optimization (12GB target)
memory_optimization:
  use_gradient_checkpointing: true
  mixed_precision: "fp16"  # Use fp16 for reduced memory
  attention_slicing: true  # Slice attention to reduce peak memory
  
# Training parameters
training:
  dataset_path: "./dataset"
  output_dir: "./models/bytedream"
  epochs: 100
  batch_size: 1  # Reduced from 4 for 12GB memory constraint
  gradient_accumulation_steps: 4  # Accumulate to maintain effective batch size
  learning_rate: 0.00001
  lr_scheduler: "constant_with_warmup"
  lr_warmup_steps: 500
  max_grad_norm: 1.0
  mixed_precision: "no"  # no, fp16, bf16
  
  # Data augmentation
  random_flip: true
  random_crop: false
  center_crop: true
  
  # Logging
  logging_dir: "./logs"
  log_every_n_steps: 10
  
# Hugging Face
huggingface:
  organization: ""  # Your HF username/organization
  private: false
  push_to_hub: true