File size: 3,161 Bytes
7a5e28a
 
 
 
 
 
 
 
 
 
 
 
2bcc764
7b0cd98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2bcc764
 
7a5e28a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
{
  "source_model": "stable-diffusion-v1-5/stable-diffusion-v1-5",
  "format": "LiteRT / TFLite diffusion submodels",
  "image_size": [
    512,
    512
  ],
  "tokenizer_max_length": 77,
  "vae_scaling_factor": 0.18215,
  "variants": [
    "fp32",
    "int8"
  ],
  "profiles": {
    "android-qnn-npu": {
      "platform": "android",
      "preferred_accelerator": "NPU",
      "delegate": "LiteRT Qualcomm AI Engine Direct (QNN)",
      "notes": [
        "Mixed deployment profile for the Qualcomm NPU path through LiteRT CompiledModel.",
        "This notebook still exports LiteRT/TFLite submodels, not Qualcomm-specific AOT context binaries.",
        "Android packaging still needs Qualcomm LiteRT runtime libraries and arm64-v8a delivery."
      ],
      "source_variant": "int8",
      "files": {
        "text_encoder": "fp32/text_encoder.tflite",
        "unet": "int8/unet.tflite",
        "vae_decoder": "fp32/vae_decoder.tflite"
      },
      "quantization": "fp32 text encoder + dynamic int8 UNet + fp32 VAE"
    },
    "android-cpu": {
      "platform": "android",
      "preferred_accelerator": "CPU",
      "delegate": "LiteRT CPU/XNNPACK",
      "notes": [
        "Conservative fallback profile for Android when GPU/NPU compilation is unavailable.",
        "Reuses the mixed int8 UNet path for smaller downloads and lower RAM pressure."
      ],
      "source_variant": "int8",
      "files": {
        "text_encoder": "fp32/text_encoder.tflite",
        "unet": "int8/unet.tflite",
        "vae_decoder": "fp32/vae_decoder.tflite"
      },
      "quantization": "fp32 text encoder + dynamic int8 UNet + fp32 VAE"
    },
    "android-gpu": {
      "platform": "android",
      "preferred_accelerator": "GPU",
      "delegate": "LiteRT GPU delegate",
      "notes": [
        "Uses the float export path because LiteRT GPU delegates are the most predictable there.",
        "The text encoder still prefers INT32 token ids to avoid delegate-hostile INT64 input graphs."
      ],
      "source_variant": "fp32",
      "files": {
        "text_encoder": "fp32/text_encoder.tflite",
        "unet": "fp32/unet.tflite",
        "vae_decoder": "fp32/vae_decoder.tflite"
      },
      "quantization": "fp32"
    },
    "ios-coreml": {
      "platform": "ios",
      "preferred_accelerator": "CORE_ML",
      "delegate": "LiteRT Core ML delegate",
      "notes": [
        "Core ML delegate currently supports float models, so this profile stays on the float export path.",
        "This notebook exports LiteRT/TFLite artifacts for the LiteRT Core ML delegate, not native `.mlmodel` files."
      ],
      "source_variant": "fp32",
      "files": {
        "text_encoder": "fp32/text_encoder.tflite",
        "unet": "fp32/unet.tflite",
        "vae_decoder": "fp32/vae_decoder.tflite"
      },
      "quantization": "fp32",
      "minimum_os": "iOS 12"
    }
  },
  "android_profile_priority": {
    "GPU": "android-gpu",
    "NPU": "android-qnn-npu",
    "CPU": "android-cpu"
  },
  "legacy_default_variant": "int8",
  "preferred_text_encoder_token_dtype": "int32",
  "text_encoder_runtime_config": "configs/text_encoder_runtime_config.json"
}