lxazjk commited on
Commit
0062bb1
·
verified ·
1 Parent(s): 68e8459

Upload LlamaGen FlashAR checkpoints

Browse files
B/FlashAR-LlamaGen-B.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "B",
3
+ "dst": "modelscope_upload/FlashAR-LlamaGen-B/FlashAR-LlamaGen-B.pt",
4
+ "source_checkpoint": "cloud_ckpt_vbranch_last2_hvgate_reg0p1_gptb/2026-05-01-17-46-47/000-GPT-B/checkpoints/last_version.pt",
5
+ "steps": 75000,
6
+ "metrics": {
7
+ "epoch": 29,
8
+ "step": 75000,
9
+ "npz_path": "results/000-GPT-B/./samples_vbranch_last2_hvgate_reg0p1_eval10_gptb/latest.npz",
10
+ "txt_path": "results/000-GPT-B/./samples_vbranch_last2_hvgate_reg0p1_eval10_gptb/latest.txt",
11
+ "inception_score": 208.30068969726562,
12
+ "fid": 4.680193238979371,
13
+ "sfid": 6.680051826699128,
14
+ "precision": 0.83106,
15
+ "recall": 0.4761
16
+ },
17
+ "original_model_key_count": 126,
18
+ "saved_model_key_count": 100,
19
+ "removed_cache_key_count": 26,
20
+ "removed_cache_keys": [
21
+ "layers.0.attention.kv_cache.k_cache",
22
+ "layers.0.attention.kv_cache.v_cache",
23
+ "layers.1.attention.kv_cache.k_cache",
24
+ "layers.1.attention.kv_cache.v_cache",
25
+ "layers.2.attention.kv_cache.k_cache",
26
+ "layers.2.attention.kv_cache.v_cache",
27
+ "layers.3.attention.kv_cache.k_cache",
28
+ "layers.3.attention.kv_cache.v_cache",
29
+ "layers.4.attention.kv_cache.k_cache",
30
+ "layers.4.attention.kv_cache.v_cache",
31
+ "layers.5.attention.kv_cache.k_cache",
32
+ "layers.5.attention.kv_cache.v_cache",
33
+ "layers.6.attention.kv_cache.k_cache",
34
+ "layers.6.attention.kv_cache.v_cache",
35
+ "layers.7.attention.kv_cache.k_cache",
36
+ "layers.7.attention.kv_cache.v_cache",
37
+ "layers.8.attention.kv_cache.k_cache",
38
+ "layers.8.attention.kv_cache.v_cache",
39
+ "layers.9.attention.kv_cache.k_cache",
40
+ "layers.9.attention.kv_cache.v_cache",
41
+ "layers.10.attention.kv_cache.k_cache",
42
+ "layers.10.attention.kv_cache.v_cache",
43
+ "layers.11.attention.kv_cache.k_cache",
44
+ "layers.11.attention.kv_cache.v_cache",
45
+ "layers.12.attention.kv_cache.k_cache",
46
+ "layers.12.attention.kv_cache.v_cache"
47
+ ],
48
+ "has_optimizer": false,
49
+ "has_scheduler": false,
50
+ "modelscope_repo": "lxazjk/FlashAR-LlamaGen-B"
51
+ }
B/FlashAR-LlamaGen-B.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f756dc013daa3bd8241b2106f2a998e0caa60b998841ca1043c45554cd029b66
3
+ size 526436890
B/README.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FlashAR-LlamaGen-B
2
+
3
+ Best GPT-B checkpoint for FlashAR / LlamaGen NAR ImageNet-256 evaluation.
4
+
5
+ ## Files
6
+
7
+ - `FlashAR-LlamaGen-B.pt`: clean inference checkpoint.
8
+ - `FlashAR-LlamaGen-B.json`: metric and provenance sidecar.
9
+
10
+ ## Metrics
11
+
12
+ - Dataset/eval: ImageNet-256
13
+ - Step: 75,000
14
+ - FID: 4.680193238979371
15
+ - sFID: 6.680051826699128
16
+ - Inception Score: 208.30068969726562
17
+ - Precision: 0.83106
18
+ - Recall: 0.4761
19
+
20
+ ## Checkpoint format
21
+
22
+ The `.pt` file contains:
23
+
24
+ - `model`: model state dict only
25
+ - `args`: original training args
26
+ - `steps`: training step
27
+ - `metrics`: best eval metrics
28
+
29
+ It does not contain optimizer state, scheduler state, or KV-cache buffers. The source training checkpoint contained 26 `kv_cache` buffers; they were explicitly removed before upload.
B/configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"framework": "pytorch", "task": "text-generation", "allow_remote": true}
L/FlashAR-LlamaGen-L.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:208060f6fca77c03bee97457498603cc3b4a62ed02a9f7b07f9a29f4b2a0b051
3
+ size 1494418211
L/README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FlashAR-LlamaGen-L
2
+
3
+ Generated FlashAR / LlamaGen checkpoint.
4
+
5
+ - Run: `008-GPT-L`
6
+ - Variant: `GPT-L 256 vbranch_last4_hvgate_reg0p1`
7
+ - FID: `3.174558946842069`
8
+ - Step: `75000`
9
+ - IS: `285.8968200683594`
10
+ - sFID: `6.364996221369893`
11
+ - Precision / Recall: `0.833` / `0.5349`
12
+ - Checkpoint file: `FlashAR-LlamaGen-L.pt`
13
+ - Note: available final checkpoint; selected by best available final-step FID among GPT-L candidates
14
+
L/configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"framework": "pytorch", "task": "text-generation", "allow_remote": true}
L/metrics.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "repo": "FlashAR-LlamaGen-L",
3
+ "run": "008-GPT-L",
4
+ "variant": "GPT-L 256 vbranch_last4_hvgate_reg0p1",
5
+ "fid": 3.174558946842069,
6
+ "step": 75000,
7
+ "inception_score": 285.8968200683594,
8
+ "sfid": 6.364996221369893,
9
+ "precision": 0.833,
10
+ "recall": 0.5349,
11
+ "source_checkpoint": "cloud_ckpt_vbranch_last4_hvgate_reg0p1/2026-05-02-08-36-54/008-GPT-L/checkpoints/last_version.pt",
12
+ "note": "available final checkpoint; selected by best available final-step FID among GPT-L candidates"
13
+ }
README.md ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - image-generation
5
+ - autoregressive
6
+ - flashar
7
+ - llamagen
8
+ ---
9
+
10
+ # LlamaGen-FlashAR
11
+
12
+ This repository collects FlashAR checkpoints for LlamaGen variants.
13
+
14
+ FlashAR is introduced in **"FlashAR: Efficient Post-Training Acceleration for Autoregressive Image Generation"**. It accelerates raster-scan autoregressive image generation with a vertical prediction branch and a learnable fusion gate, enabling anti-diagonal decoding.
15
+
16
+ - Paper: [arXiv:2605.09430](https://arxiv.org/abs/2605.09430)
17
+ - Project page: [FlashAR](https://lxazjk.github.io/FlashAR/)
18
+ - Code: [Emu3.5-NAR](https://github.com/lxazjk/Emu3.5-NAR)
19
+
20
+ ## Checkpoints
21
+
22
+ | Variant | Directory | Checkpoint | Extra files |
23
+ | --- | --- | --- | --- |
24
+ | LlamaGen-B | `B/` | `B/FlashAR-LlamaGen-B.pt` | `B/FlashAR-LlamaGen-B.json` |
25
+ | LlamaGen-L | `L/` | `L/FlashAR-LlamaGen-L.pt` | `L/metrics.json` |
26
+ | LlamaGen-XL | `XL/` | `XL/FlashAR-LlamaGen-XL.pt` | `XL/FlashAR-LlamaGen-XL.json` |
27
+ | LlamaGen-XXL | `XXL/` | `XXL/FlashAR-LlamaGen-XXL.pt` | `XXL/metrics.json` |
28
+
29
+ Each subdirectory also includes the source model card and `configuration.json` from the corresponding ModelScope checkpoint.
30
+
31
+ ## Layout
32
+
33
+ ```text
34
+ .
35
+ ├── B/
36
+ │ ├── FlashAR-LlamaGen-B.pt
37
+ │ ├── FlashAR-LlamaGen-B.json
38
+ │ ├── configuration.json
39
+ │ └── README.md
40
+ ├── L/
41
+ │ ├── FlashAR-LlamaGen-L.pt
42
+ │ ├── metrics.json
43
+ │ ├── configuration.json
44
+ │ └── README.md
45
+ ├── XL/
46
+ │ ├── FlashAR-LlamaGen-XL.pt
47
+ │ ├── FlashAR-LlamaGen-XL.json
48
+ │ ├── configuration.json
49
+ │ └── README.md
50
+ └── XXL/
51
+ ├── FlashAR-LlamaGen-XXL.pt
52
+ ├── metrics.json
53
+ ├── configuration.json
54
+ └── README.md
55
+ ```
56
+
57
+ ## Citation
58
+
59
+ ```bibtex
60
+ @article{zhou2026flashar,
61
+ title={FlashAR: Efficient Post-Training Acceleration for Autoregressive Image Generation},
62
+ author={Zhou, Junkang and He, Yefei and Chen, Feng and Wang, Weijie and Zhuang, Bohan},
63
+ journal={arXiv preprint arXiv:2605.09430},
64
+ year={2026}
65
+ }
66
+ ```
XL/FlashAR-LlamaGen-XL.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "XL",
3
+ "dst": "modelscope_upload/FlashAR-LlamaGen-XL/FlashAR-LlamaGen-XL.pt",
4
+ "source_checkpoint": "cloud_ckpt_llamagen_xl_imagenet256_cfg2_nogatecol_105k_to107p5k_lr5e6/2026-05-07-14-18-19/024-GPT-XL/checkpoints/last_version.pt",
5
+ "steps": 107500,
6
+ "metrics": {
7
+ "epoch": 42,
8
+ "step": 107500,
9
+ "npz_path": "./results/024-GPT-XL/./samples_llamagen_xl_imagenet256_cfg2_nogatecol_105k_to107p5k_lr5e6/latest.npz",
10
+ "txt_path": "./results/024-GPT-XL/./samples_llamagen_xl_imagenet256_cfg2_nogatecol_105k_to107p5k_lr5e6/latest.txt",
11
+ "inception_score": 259.35772705078125,
12
+ "fid": 3.054045162945613,
13
+ "sfid": 6.683951400631031,
14
+ "precision": 0.80102,
15
+ "recall": 0.5758
16
+ },
17
+ "original_model_key_count": 268,
18
+ "saved_model_key_count": 268,
19
+ "removed_cache_key_count": 0,
20
+ "removed_cache_keys": [],
21
+ "has_optimizer": false,
22
+ "has_scheduler": false,
23
+ "modelscope_repo": "lxazjk/FlashAR-LlamaGen-XL"
24
+ }
XL/FlashAR-LlamaGen-XL.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b2202b3856e31e67a4dbd76a8e537fa4aa71f0726bbd36f682913d13d0347b8
3
+ size 3268249674
XL/README.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FlashAR-LlamaGen-XL
2
+
3
+ Best GPT-XL checkpoint for FlashAR / LlamaGen NAR ImageNet-256 evaluation.
4
+
5
+ ## Files
6
+
7
+ - `FlashAR-LlamaGen-XL.pt`: clean inference checkpoint.
8
+ - `FlashAR-LlamaGen-XL.json`: metric and provenance sidecar.
9
+
10
+ ## Metrics
11
+
12
+ - Dataset/eval: ImageNet-256
13
+ - Step: 107,500
14
+ - FID: 3.054045162945613
15
+ - sFID: 6.683951400631031
16
+ - Inception Score: 259.35772705078125
17
+ - Precision: 0.80102
18
+ - Recall: 0.5758
19
+
20
+ ## Checkpoint format
21
+
22
+ The `.pt` file contains:
23
+
24
+ - `model`: model state dict only
25
+ - `args`: original training args
26
+ - `steps`: training step
27
+ - `metrics`: best eval metrics
28
+
29
+ It does not contain optimizer state, scheduler state, or KV-cache buffers. Verification found zero `kv_cache`, `k_cache`, or `v_cache` keys in this uploaded checkpoint.
XL/configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"framework": "pytorch", "task": "text-generation", "allow_remote": true}
XXL/FlashAR-LlamaGen-XXL.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30cea896a09b64bd771f1e0ead47333caadbf713eeb3c316eb8a4f557b6f07f2
3
+ size 5862212399
XXL/README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FlashAR-LlamaGen-XXL
2
+
3
+ Generated FlashAR / LlamaGen checkpoint.
4
+
5
+ - Run: `025-GPT-XXL`
6
+ - Variant: `GPT-XXL 256 clean_bestfid_nogatecol`
7
+ - FID: `3.163676372982593`
8
+ - Step: `75000`
9
+ - IS: `250.9591064453125`
10
+ - sFID: `7.060702650024382`
11
+ - Precision / Recall: `0.77906` / `0.6022`
12
+ - Checkpoint file: `FlashAR-LlamaGen-XXL.pt`
13
+ - Note: exact best-step checkpoint; KV cache removed
14
+
XXL/configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"framework": "pytorch", "task": "text-generation", "allow_remote": true}
XXL/metrics.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "repo": "FlashAR-LlamaGen-XXL",
3
+ "run": "025-GPT-XXL",
4
+ "variant": "GPT-XXL 256 clean_bestfid_nogatecol",
5
+ "fid": 3.163676372982593,
6
+ "step": 75000,
7
+ "inception_score": 250.9591064453125,
8
+ "sfid": 7.060702650024382,
9
+ "precision": 0.77906,
10
+ "recall": 0.6022,
11
+ "source_checkpoint": "cloud_ckpt_xxl256_nar_fsdp_clean_bestfid/2026-05-06-19-00-04/020-GPT-XXL/checkpoints/step_00075000_fid_3p163676_clean_no_kvcache.pt",
12
+ "note": "exact best-step checkpoint; KV cache removed"
13
+ }