kelseye commited on
Commit
0c4cb0e
·
verified ·
1 Parent(s): e954b1c

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/apartment_Aesthetic_1.0.jpg filter=lfs diff=lfs merge=lfs -text
37
+ assets/apartment_Aesthetic_2.5.jpg filter=lfs diff=lfs merge=lfs -text
38
+ assets/apartment_base.jpg filter=lfs diff=lfs merge=lfs -text
39
+ assets/cat_Aesthetic_1.0.jpg filter=lfs diff=lfs merge=lfs -text
40
+ assets/cat_Aesthetic_2.5.jpg filter=lfs diff=lfs merge=lfs -text
41
+ assets/cat_base.jpg filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+ # Templates - Aesthetic Alignment (FLUX.2-klein-base-4B)
5
+
6
+ This model is one of the open-source Diffusion Templates series models from [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio). It is an Aesthetic alignment model that allows adjusting the degree of aesthetic alignment in generated images by modifying the `scale` parameter.
7
+
8
+ ## Results Showcase
9
+
10
+ > **Prompt:** A cat is sitting on a stone.
11
+
12
+ | base model | scale=1.0 | scale=2.5 |
13
+ |:---:|:---:|:---:|
14
+ | ![](./assets/cat_base.jpg) | ![](./assets/cat_Aesthetic_1.0.jpg) | ![](./assets/cat_Aesthetic_2.5.jpg) |
15
+
16
+ ---
17
+
18
+ > **Prompt:** A cute anime girl with pink hair and cat ears, pastel colors.
19
+
20
+ | base model | scale=1.0 | scale=2.5 |
21
+ |:---:|:---:|:---:|
22
+ | ![](./assets/girl_base.jpg) | ![](./assets/girl_Aesthetic_1.0.jpg) | ![](./assets/girl_Aesthetic_2.5.jpg) |
23
+
24
+ ---
25
+
26
+ > **Prompt:** A cyberpunk apartment with a view of neon lights.
27
+
28
+ | base model | scale=1.0 | scale=2.5 |
29
+ |:---:|:---:|:---:|
30
+ | ![](./assets/apartment_base.jpg) | ![](./assets/apartment_Aesthetic_1.0.jpg) | ![](./assets/apartment_Aesthetic_2.5.jpg) |
31
+
32
+ ## Inference Code
33
+
34
+ * Install [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio)
35
+
36
+ ```
37
+ git clone https://github.com/modelscope/DiffSynth-Studio.git
38
+ cd DiffSynth-Studio
39
+ pip install -e .
40
+ ```
41
+
42
+ * Direct inference (requires 40GB GPU memory)
43
+
44
+ ```python
45
+ from diffsynth.diffusion.template import TemplatePipeline
46
+ from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
47
+ import torch
48
+
49
+ pipe = Flux2ImagePipeline.from_pretrained(
50
+ torch_dtype=torch.bfloat16,
51
+ device="cuda",
52
+ model_configs=[
53
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
54
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
55
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
56
+ ],
57
+ tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
58
+ )
59
+ pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) # Important!
60
+ template = TemplatePipeline.from_pretrained(
61
+ torch_dtype=torch.bfloat16,
62
+ device="cuda",
63
+ model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Aesthetic")],
64
+ )
65
+ image = template(
66
+ pipe,
67
+ prompt="A cat is sitting on a stone.",
68
+ seed=0, cfg_scale=4, num_inference_steps=50,
69
+ template_inputs = [{
70
+ "lora_ids": list(range(1, 180, 2)),
71
+ "lora_scales": 1.0,
72
+ "merge_type": "mean",
73
+ }],
74
+ negative_template_inputs = [{
75
+ "lora_ids": list(range(1, 180, 2)),
76
+ "lora_scales": 1.0,
77
+ "merge_type": "mean",
78
+ }],
79
+ )
80
+ image.save("image_Aesthetic_1.0.jpg")
81
+ image = template(
82
+ pipe,
83
+ prompt="A cat is sitting on a stone.",
84
+ seed=0, cfg_scale=4, num_inference_steps=50,
85
+ template_inputs = [{
86
+ "lora_ids": list(range(1, 180, 2)),
87
+ "lora_scales": 2.5,
88
+ "merge_type": "mean",
89
+ }],
90
+ negative_template_inputs = [{
91
+ "lora_ids": list(range(1, 180, 2)),
92
+ "lora_scales": 2.5,
93
+ "merge_type": "mean",
94
+ }],
95
+ )
96
+ image.save("image_Aesthetic_2.5.jpg")
97
+ ```
98
+
99
+ * Enable lazy loading and memory management, requires 24G GPU memory
100
+
101
+ ```python
102
+ from diffsynth.diffusion.template import TemplatePipeline
103
+ from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
104
+ import torch
105
+
106
+ vram_config = {
107
+ "offload_dtype": "disk",
108
+ "offload_device": "disk",
109
+ "onload_dtype": torch.float8_e4m3fn,
110
+ "onload_device": "cpu",
111
+ "preparing_dtype": torch.float8_e4m3fn,
112
+ "preparing_device": "cuda",
113
+ "computation_dtype": torch.bfloat16,
114
+ "computation_device": "cuda",
115
+ }
116
+ pipe = Flux2ImagePipeline.from_pretrained(
117
+ torch_dtype=torch.bfloat16,
118
+ device="cuda",
119
+ model_configs=[
120
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config),
121
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config),
122
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
123
+ ],
124
+ tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
125
+ vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
126
+ )
127
+ template = TemplatePipeline.from_pretrained(
128
+ torch_dtype=torch.bfloat16,
129
+ device="cuda",
130
+ model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Aesthetic")],
131
+ lazy_loading=True,
132
+ )
133
+ image = template(
134
+ pipe,
135
+ prompt="A cat is sitting on a stone.",
136
+ seed=0, cfg_scale=4, num_inference_steps=50,
137
+ template_inputs = [{
138
+ "lora_ids": list(range(1, 180, 2)),
139
+ "lora_scales": 1.0,
140
+ "merge_type": "mean",
141
+ }],
142
+ negative_template_inputs = [{
143
+ "lora_ids": list(range(1, 180, 2)),
144
+ "lora_scales": 1.0,
145
+ "merge_type": "mean",
146
+ }],
147
+ )
148
+ image.save("image_Aesthetic_1.0.jpg")
149
+ image = template(
150
+ pipe,
151
+ prompt="A cat is sitting on a stone.",
152
+ seed=0, cfg_scale=4, num_inference_steps=50,
153
+ template_inputs = [{
154
+ "lora_ids": list(range(1, 180, 2)),
155
+ "lora_scales": 2.5,
156
+ "merge_type": "mean",
157
+ }],
158
+ negative_template_inputs = [{
159
+ "lora_ids": list(range(1, 180, 2)),
160
+ "lora_scales": 2.5,
161
+ "merge_type": "mean",
162
+ }],
163
+ )
164
+ image.save("image_Aesthetic_2.5.jpg")
165
+
166
+ ```
167
+
168
+ ## Training Code
169
+
170
+ After installing DiffSynth-Studio, use the following script to start training. For more information, please refer to the [DiffSynth-Studio Documentation](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/).
171
+
172
+ ```shell
173
+ modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Aesthetic/*" --local_dir ./data/diffsynth_example_dataset
174
+
175
+ accelerate launch examples/flux2/model_training/train.py \
176
+ --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Aesthetic \
177
+ --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Aesthetic/metadata.jsonl \
178
+ --extra_inputs "template_inputs" \
179
+ --max_pixels 1048576 \
180
+ --dataset_repeat 50 \
181
+ --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
182
+ --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Aesthetic:" \
183
+ --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
184
+ --learning_rate 1e-4 \
185
+ --num_epochs 2 \
186
+ --remove_prefix_in_ckpt "pipe.template_model." \
187
+ --output_path "./models/train/Template-KleinBase4B-Aesthetic_full" \
188
+ --trainable_models "template_model" \
189
+ --use_gradient_checkpointing \
190
+ --find_unused_parameters \
191
+ --enable_lora_hot_loading
192
+ ```
README_from_modelscope.md ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ frameworks:
3
+ - Pytorch
4
+ license: Apache License 2.0
5
+ tags: []
6
+ tasks:
7
+ - text-to-image-synthesis
8
+ ---
9
+
10
+ # Templates-美学对齐(FLUX.2-klein-base-4B)
11
+
12
+ 本模型是 [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio) 开源的 Diffusion Templates 系列模型之一。该模型为 Aesthetic(美学)对齐模型,能够通过修改 `scale` 参数来调整图像的美学对齐程度。
13
+
14
+ ## 效果展示
15
+
16
+ > **Prompt:** A cat is sitting on a stone.
17
+
18
+ | base model | scale=1.0 | scale=2.5 |
19
+ |:---:|:---:|:---:|
20
+ | ![](./assets/cat_base.jpg) | ![](./assets/cat_Aesthetic_1.0.jpg) | ![](./assets/cat_Aesthetic_2.5.jpg) |
21
+
22
+ ---
23
+
24
+ > **Prompt:** A cute anime girl with pink hair and cat ears, pastel colors.
25
+
26
+ | base model | scale=1.0 | scale=2.5 |
27
+ |:---:|:---:|:---:|
28
+ | ![](./assets/girl_base.jpg) | ![](./assets/girl_Aesthetic_1.0.jpg) | ![](./assets/girl_Aesthetic_2.5.jpg) |
29
+
30
+ ---
31
+
32
+ > **Prompt:** A cyberpunk apartment with a view of neon lights.
33
+
34
+ | base model | scale=1.0 | scale=2.5 |
35
+ |:---:|:---:|:---:|
36
+ | ![](./assets/apartment_base.jpg) | ![](./assets/apartment_Aesthetic_1.0.jpg) | ![](./assets/apartment_Aesthetic_2.5.jpg) |
37
+
38
+ ## 推理代码
39
+
40
+ * 安装 [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio)
41
+
42
+ ```
43
+ git clone https://github.com/modelscope/DiffSynth-Studio.git
44
+ cd DiffSynth-Studio
45
+ pip install -e .
46
+ ```
47
+
48
+ * 直接推理,需 40G 显存
49
+
50
+ ```python
51
+ from diffsynth.diffusion.template import TemplatePipeline
52
+ from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
53
+ import torch
54
+
55
+ pipe = Flux2ImagePipeline.from_pretrained(
56
+ torch_dtype=torch.bfloat16,
57
+ device="cuda",
58
+ model_configs=[
59
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
60
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
61
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
62
+ ],
63
+ tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
64
+ )
65
+ pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) # Important!
66
+ template = TemplatePipeline.from_pretrained(
67
+ torch_dtype=torch.bfloat16,
68
+ device="cuda",
69
+ model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Aesthetic")],
70
+ )
71
+ image = template(
72
+ pipe,
73
+ prompt="A cat is sitting on a stone.",
74
+ seed=0, cfg_scale=4, num_inference_steps=50,
75
+ template_inputs = [{
76
+ "lora_ids": list(range(1, 180, 2)),
77
+ "lora_scales": 1.0,
78
+ "merge_type": "mean",
79
+ }],
80
+ negative_template_inputs = [{
81
+ "lora_ids": list(range(1, 180, 2)),
82
+ "lora_scales": 1.0,
83
+ "merge_type": "mean",
84
+ }],
85
+ )
86
+ image.save("image_Aesthetic_1.0.jpg")
87
+ image = template(
88
+ pipe,
89
+ prompt="A cat is sitting on a stone.",
90
+ seed=0, cfg_scale=4, num_inference_steps=50,
91
+ template_inputs = [{
92
+ "lora_ids": list(range(1, 180, 2)),
93
+ "lora_scales": 2.5,
94
+ "merge_type": "mean",
95
+ }],
96
+ negative_template_inputs = [{
97
+ "lora_ids": list(range(1, 180, 2)),
98
+ "lora_scales": 2.5,
99
+ "merge_type": "mean",
100
+ }],
101
+ )
102
+ image.save("image_Aesthetic_2.5.jpg")
103
+ ```
104
+
105
+ * 开启惰性加载和显存管理,需 24G 显存
106
+
107
+ ```python
108
+ from diffsynth.diffusion.template import TemplatePipeline
109
+ from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
110
+ import torch
111
+
112
+ vram_config = {
113
+ "offload_dtype": "disk",
114
+ "offload_device": "disk",
115
+ "onload_dtype": torch.float8_e4m3fn,
116
+ "onload_device": "cpu",
117
+ "preparing_dtype": torch.float8_e4m3fn,
118
+ "preparing_device": "cuda",
119
+ "computation_dtype": torch.bfloat16,
120
+ "computation_device": "cuda",
121
+ }
122
+ pipe = Flux2ImagePipeline.from_pretrained(
123
+ torch_dtype=torch.bfloat16,
124
+ device="cuda",
125
+ model_configs=[
126
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config),
127
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config),
128
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
129
+ ],
130
+ tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
131
+ vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
132
+ )
133
+ template = TemplatePipeline.from_pretrained(
134
+ torch_dtype=torch.bfloat16,
135
+ device="cuda",
136
+ model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Aesthetic")],
137
+ lazy_loading=True,
138
+ )
139
+ image = template(
140
+ pipe,
141
+ prompt="A cat is sitting on a stone.",
142
+ seed=0, cfg_scale=4, num_inference_steps=50,
143
+ template_inputs = [{
144
+ "lora_ids": list(range(1, 180, 2)),
145
+ "lora_scales": 1.0,
146
+ "merge_type": "mean",
147
+ }],
148
+ negative_template_inputs = [{
149
+ "lora_ids": list(range(1, 180, 2)),
150
+ "lora_scales": 1.0,
151
+ "merge_type": "mean",
152
+ }],
153
+ )
154
+ image.save("image_Aesthetic_1.0.jpg")
155
+ image = template(
156
+ pipe,
157
+ prompt="A cat is sitting on a stone.",
158
+ seed=0, cfg_scale=4, num_inference_steps=50,
159
+ template_inputs = [{
160
+ "lora_ids": list(range(1, 180, 2)),
161
+ "lora_scales": 2.5,
162
+ "merge_type": "mean",
163
+ }],
164
+ negative_template_inputs = [{
165
+ "lora_ids": list(range(1, 180, 2)),
166
+ "lora_scales": 2.5,
167
+ "merge_type": "mean",
168
+ }],
169
+ )
170
+ image.save("image_Aesthetic_2.5.jpg")
171
+
172
+ ```
173
+
174
+ ## 训练代码
175
+
176
+ 安装 DiffSynth-Studio 后,使用以下脚本可开启训练,更多信息请参考 [DiffSynth-Studio 文档](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/)。
177
+
178
+ ```shell
179
+ modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Aesthetic/*" --local_dir ./data/diffsynth_example_dataset
180
+
181
+ accelerate launch examples/flux2/model_training/train.py \
182
+ --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Aesthetic \
183
+ --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Aesthetic/metadata.jsonl \
184
+ --extra_inputs "template_inputs" \
185
+ --max_pixels 1048576 \
186
+ --dataset_repeat 50 \
187
+ --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
188
+ --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Aesthetic:" \
189
+ --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
190
+ --learning_rate 1e-4 \
191
+ --num_epochs 2 \
192
+ --remove_prefix_in_ckpt "pipe.template_model." \
193
+ --output_path "./models/train/Template-KleinBase4B-Aesthetic_full" \
194
+ --trainable_models "template_model" \
195
+ --use_gradient_checkpointing \
196
+ --find_unused_parameters \
197
+ --enable_lora_hot_loading
198
+
199
+ ```
assets/apartment_Aesthetic_1.0.jpg ADDED

Git LFS Details

  • SHA256: 1cb2226bb1a752825e811fe7893c4fc3532b945dcfb9177d589250b97324702b
  • Pointer size: 131 Bytes
  • Size of remote file: 107 kB
assets/apartment_Aesthetic_2.5.jpg ADDED

Git LFS Details

  • SHA256: fffba261e2ca494c90fae9e16cad9af26267f467c40a1d26e5a32ba9c51408a7
  • Pointer size: 131 Bytes
  • Size of remote file: 110 kB
assets/apartment_base.jpg ADDED

Git LFS Details

  • SHA256: b291ac8f72eaee51a63bb24ed91402b0ccba822582d46ea631cabeb6c8653ecb
  • Pointer size: 131 Bytes
  • Size of remote file: 116 kB
assets/cat_Aesthetic_1.0.jpg ADDED

Git LFS Details

  • SHA256: f55f0d030465fbc4a5dcf60d5f182a41f37eb5b9bced5cf7b6f1233e95ac7ea4
  • Pointer size: 131 Bytes
  • Size of remote file: 165 kB
assets/cat_Aesthetic_2.5.jpg ADDED

Git LFS Details

  • SHA256: c08aae28d1420acf71334a38bebc9e1d7beb26b0acbd1f08286ab75c2b2cdead
  • Pointer size: 131 Bytes
  • Size of remote file: 119 kB
assets/cat_base.jpg ADDED

Git LFS Details

  • SHA256: 54dc4dd97f84e8c136c8182718edb6b43454429b20b00366fedef7717b88e51a
  • Pointer size: 131 Bytes
  • Size of remote file: 157 kB
assets/girl_Aesthetic_1.0.jpg ADDED
assets/girl_Aesthetic_2.5.jpg ADDED
assets/girl_base.jpg ADDED
configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"framework":"Pytorch","task":"text-to-image-synthesis"}
model.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+
4
+ class LoRALayer(torch.nn.Module):
5
+ def __init__(self, dim_in, dim_out, rank, initialize=False):
6
+ super().__init__()
7
+ if initialize:
8
+ scale = (1 / dim_in) ** 0.5
9
+ self.lora_A = torch.nn.Parameter(torch.rand((rank, dim_in)) * (scale * 2) - scale)
10
+ self.lora_B = torch.nn.Parameter(torch.zeros((dim_out, rank)))
11
+ else:
12
+ self.lora_A = torch.nn.Parameter(torch.empty((rank, dim_in)))
13
+ self.lora_B = torch.nn.Parameter(torch.empty((dim_out, rank)))
14
+
15
+
16
+ class LoRA(torch.nn.Module):
17
+ def __init__(self, rank):
18
+ super().__init__()
19
+ self.lora_patterns = [
20
+ {
21
+ "name": "single_transformer_blocks.{block_id}.attn.to_qkv_mlp_proj",
22
+ "num_blocks": 20,
23
+ "dim_in": 3072,
24
+ "dim_out": 27648,
25
+ "rank": rank,
26
+ },
27
+ {
28
+ "name": "single_transformer_blocks.{block_id}.attn.to_out",
29
+ "num_blocks": 20,
30
+ "dim_in": 12288,
31
+ "dim_out": 3072,
32
+ "rank": rank,
33
+ },
34
+ ]
35
+ self.parse_lora_layers(self.lora_patterns)
36
+
37
+ def parse_lora_layers(self, lora_patterns):
38
+ names = []
39
+ layers = []
40
+ for lora_pattern in lora_patterns:
41
+ for block_id in range(lora_pattern["num_blocks"]):
42
+ name = lora_pattern["name"].format(block_id=block_id)
43
+ layer = LoRALayer(lora_pattern["dim_in"], lora_pattern["dim_out"], lora_pattern["rank"])
44
+ names.append(name)
45
+ layers.append(layer)
46
+ self.names = names
47
+ self.layers = torch.nn.ModuleList(layers)
48
+
49
+ def forward(self):
50
+ lora = {}
51
+ for name, layer in zip(self.names, self.layers):
52
+ lora[f"{name}.lora_A.default.weight"] = layer.lora_A
53
+ lora[f"{name}.lora_B.default.weight"] = layer.lora_B
54
+ return lora
55
+
56
+
57
+ class DualLoRA(torch.nn.Module):
58
+ def __init__(self, num_loras=180):
59
+ super().__init__()
60
+ self.loras = torch.nn.ModuleList([LoRA(rank=4) for _ in range(num_loras)])
61
+
62
+ @torch.no_grad()
63
+ def process_inputs(self, lora_ids, lora_scales, require_grads=None, merge_type="concat", **kwargs):
64
+ return {"lora_ids": lora_ids, "lora_scales": lora_scales, "require_grads": require_grads, "merge_type": merge_type}
65
+
66
+ def forward(self, lora_ids, lora_scales, require_grads=None, merge_type="concat", **kwargs):
67
+ if isinstance(lora_scales, float):
68
+ lora_scales = [lora_scales] * len(lora_ids)
69
+ if require_grads is None:
70
+ require_grads = [True] * len(lora_scales)
71
+ loras = []
72
+ for lora_id, lora_scale, require_grad in zip(lora_ids, lora_scales, require_grads):
73
+ if not require_grad:
74
+ with torch.no_grad():
75
+ lora_ = self.loras[lora_id]()
76
+ else:
77
+ lora_ = self.loras[lora_id]()
78
+ lora_ = {key: lora_[key] * (lora_scale if "lora_A" in key else 1) for key in lora_}
79
+ loras.append(lora_)
80
+ lora = {}
81
+ if merge_type == "concat":
82
+ for key in loras[0]:
83
+ if "lora_A" in key:
84
+ lora[key] = torch.concat([lora_[key] for lora_ in loras], dim=0)
85
+ else:
86
+ lora[key] = torch.concat([lora_[key] for lora_ in loras], dim=1)
87
+ elif merge_type == "sum":
88
+ for key in loras[0]:
89
+ lora[key] = torch.stack([lora_[key] for lora_ in loras]).sum(dim=0)
90
+ elif merge_type == "mean":
91
+ for key in loras[0]:
92
+ if "lora_A" in key:
93
+ lora[key] = torch.stack([lora_[key] for lora_ in loras]).mean(dim=0)
94
+ else:
95
+ lora[key] = torch.stack([lora_[key] for lora_ in loras]).sum(dim=0)
96
+ else:
97
+ raise ValueError(f"Unsupported merge_type: {merge_type}")
98
+ return {"lora": lora}
99
+
100
+
101
+ class DataAnnotator:
102
+ def __call__(self, **kwargs):
103
+ return kwargs
104
+
105
+
106
+ TEMPLATE_MODEL = DualLoRA
107
+ TEMPLATE_MODEL_PATH = "model.safetensors"
108
+ TEMPLATE_DATA_PROCESSOR = DataAnnotator
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6323dbbbcabdeb7ea9c203e0f6ed0d61a094edde0e1a8f7c134c132474ff7485
3
+ size 1328543560