Upload LlamaGen FlashAR checkpoints
Browse files- B/FlashAR-LlamaGen-B.json +51 -0
- B/FlashAR-LlamaGen-B.pt +3 -0
- B/README.md +29 -0
- B/configuration.json +1 -0
- L/FlashAR-LlamaGen-L.pt +3 -0
- L/README.md +14 -0
- L/configuration.json +1 -0
- L/metrics.json +13 -0
- README.md +66 -0
- XL/FlashAR-LlamaGen-XL.json +24 -0
- XL/FlashAR-LlamaGen-XL.pt +3 -0
- XL/README.md +29 -0
- XL/configuration.json +1 -0
- XXL/FlashAR-LlamaGen-XXL.pt +3 -0
- XXL/README.md +14 -0
- XXL/configuration.json +1 -0
- XXL/metrics.json +13 -0
B/FlashAR-LlamaGen-B.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "B",
|
| 3 |
+
"dst": "modelscope_upload/FlashAR-LlamaGen-B/FlashAR-LlamaGen-B.pt",
|
| 4 |
+
"source_checkpoint": "cloud_ckpt_vbranch_last2_hvgate_reg0p1_gptb/2026-05-01-17-46-47/000-GPT-B/checkpoints/last_version.pt",
|
| 5 |
+
"steps": 75000,
|
| 6 |
+
"metrics": {
|
| 7 |
+
"epoch": 29,
|
| 8 |
+
"step": 75000,
|
| 9 |
+
"npz_path": "results/000-GPT-B/./samples_vbranch_last2_hvgate_reg0p1_eval10_gptb/latest.npz",
|
| 10 |
+
"txt_path": "results/000-GPT-B/./samples_vbranch_last2_hvgate_reg0p1_eval10_gptb/latest.txt",
|
| 11 |
+
"inception_score": 208.30068969726562,
|
| 12 |
+
"fid": 4.680193238979371,
|
| 13 |
+
"sfid": 6.680051826699128,
|
| 14 |
+
"precision": 0.83106,
|
| 15 |
+
"recall": 0.4761
|
| 16 |
+
},
|
| 17 |
+
"original_model_key_count": 126,
|
| 18 |
+
"saved_model_key_count": 100,
|
| 19 |
+
"removed_cache_key_count": 26,
|
| 20 |
+
"removed_cache_keys": [
|
| 21 |
+
"layers.0.attention.kv_cache.k_cache",
|
| 22 |
+
"layers.0.attention.kv_cache.v_cache",
|
| 23 |
+
"layers.1.attention.kv_cache.k_cache",
|
| 24 |
+
"layers.1.attention.kv_cache.v_cache",
|
| 25 |
+
"layers.2.attention.kv_cache.k_cache",
|
| 26 |
+
"layers.2.attention.kv_cache.v_cache",
|
| 27 |
+
"layers.3.attention.kv_cache.k_cache",
|
| 28 |
+
"layers.3.attention.kv_cache.v_cache",
|
| 29 |
+
"layers.4.attention.kv_cache.k_cache",
|
| 30 |
+
"layers.4.attention.kv_cache.v_cache",
|
| 31 |
+
"layers.5.attention.kv_cache.k_cache",
|
| 32 |
+
"layers.5.attention.kv_cache.v_cache",
|
| 33 |
+
"layers.6.attention.kv_cache.k_cache",
|
| 34 |
+
"layers.6.attention.kv_cache.v_cache",
|
| 35 |
+
"layers.7.attention.kv_cache.k_cache",
|
| 36 |
+
"layers.7.attention.kv_cache.v_cache",
|
| 37 |
+
"layers.8.attention.kv_cache.k_cache",
|
| 38 |
+
"layers.8.attention.kv_cache.v_cache",
|
| 39 |
+
"layers.9.attention.kv_cache.k_cache",
|
| 40 |
+
"layers.9.attention.kv_cache.v_cache",
|
| 41 |
+
"layers.10.attention.kv_cache.k_cache",
|
| 42 |
+
"layers.10.attention.kv_cache.v_cache",
|
| 43 |
+
"layers.11.attention.kv_cache.k_cache",
|
| 44 |
+
"layers.11.attention.kv_cache.v_cache",
|
| 45 |
+
"layers.12.attention.kv_cache.k_cache",
|
| 46 |
+
"layers.12.attention.kv_cache.v_cache"
|
| 47 |
+
],
|
| 48 |
+
"has_optimizer": false,
|
| 49 |
+
"has_scheduler": false,
|
| 50 |
+
"modelscope_repo": "lxazjk/FlashAR-LlamaGen-B"
|
| 51 |
+
}
|
B/FlashAR-LlamaGen-B.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f756dc013daa3bd8241b2106f2a998e0caa60b998841ca1043c45554cd029b66
|
| 3 |
+
size 526436890
|
B/README.md
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# FlashAR-LlamaGen-B
|
| 2 |
+
|
| 3 |
+
Best GPT-B checkpoint for FlashAR / LlamaGen NAR ImageNet-256 evaluation.
|
| 4 |
+
|
| 5 |
+
## Files
|
| 6 |
+
|
| 7 |
+
- `FlashAR-LlamaGen-B.pt`: clean inference checkpoint.
|
| 8 |
+
- `FlashAR-LlamaGen-B.json`: metric and provenance sidecar.
|
| 9 |
+
|
| 10 |
+
## Metrics
|
| 11 |
+
|
| 12 |
+
- Dataset/eval: ImageNet-256
|
| 13 |
+
- Step: 75,000
|
| 14 |
+
- FID: 4.680193238979371
|
| 15 |
+
- sFID: 6.680051826699128
|
| 16 |
+
- Inception Score: 208.30068969726562
|
| 17 |
+
- Precision: 0.83106
|
| 18 |
+
- Recall: 0.4761
|
| 19 |
+
|
| 20 |
+
## Checkpoint format
|
| 21 |
+
|
| 22 |
+
The `.pt` file contains:
|
| 23 |
+
|
| 24 |
+
- `model`: model state dict only
|
| 25 |
+
- `args`: original training args
|
| 26 |
+
- `steps`: training step
|
| 27 |
+
- `metrics`: best eval metrics
|
| 28 |
+
|
| 29 |
+
It does not contain optimizer state, scheduler state, or KV-cache buffers. The source training checkpoint contained 26 `kv_cache` buffers; they were explicitly removed before upload.
|
B/configuration.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"framework": "pytorch", "task": "text-generation", "allow_remote": true}
|
L/FlashAR-LlamaGen-L.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:208060f6fca77c03bee97457498603cc3b4a62ed02a9f7b07f9a29f4b2a0b051
|
| 3 |
+
size 1494418211
|
L/README.md
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# FlashAR-LlamaGen-L
|
| 2 |
+
|
| 3 |
+
Generated FlashAR / LlamaGen checkpoint.
|
| 4 |
+
|
| 5 |
+
- Run: `008-GPT-L`
|
| 6 |
+
- Variant: `GPT-L 256 vbranch_last4_hvgate_reg0p1`
|
| 7 |
+
- FID: `3.174558946842069`
|
| 8 |
+
- Step: `75000`
|
| 9 |
+
- IS: `285.8968200683594`
|
| 10 |
+
- sFID: `6.364996221369893`
|
| 11 |
+
- Precision / Recall: `0.833` / `0.5349`
|
| 12 |
+
- Checkpoint file: `FlashAR-LlamaGen-L.pt`
|
| 13 |
+
- Note: available final checkpoint; selected by best available final-step FID among GPT-L candidates
|
| 14 |
+
|
L/configuration.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"framework": "pytorch", "task": "text-generation", "allow_remote": true}
|
L/metrics.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"repo": "FlashAR-LlamaGen-L",
|
| 3 |
+
"run": "008-GPT-L",
|
| 4 |
+
"variant": "GPT-L 256 vbranch_last4_hvgate_reg0p1",
|
| 5 |
+
"fid": 3.174558946842069,
|
| 6 |
+
"step": 75000,
|
| 7 |
+
"inception_score": 285.8968200683594,
|
| 8 |
+
"sfid": 6.364996221369893,
|
| 9 |
+
"precision": 0.833,
|
| 10 |
+
"recall": 0.5349,
|
| 11 |
+
"source_checkpoint": "cloud_ckpt_vbranch_last4_hvgate_reg0p1/2026-05-02-08-36-54/008-GPT-L/checkpoints/last_version.pt",
|
| 12 |
+
"note": "available final checkpoint; selected by best available final-step FID among GPT-L candidates"
|
| 13 |
+
}
|
README.md
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
tags:
|
| 4 |
+
- image-generation
|
| 5 |
+
- autoregressive
|
| 6 |
+
- flashar
|
| 7 |
+
- llamagen
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
# LlamaGen-FlashAR
|
| 11 |
+
|
| 12 |
+
This repository collects FlashAR checkpoints for LlamaGen variants.
|
| 13 |
+
|
| 14 |
+
FlashAR is introduced in **"FlashAR: Efficient Post-Training Acceleration for Autoregressive Image Generation"**. It accelerates raster-scan autoregressive image generation with a vertical prediction branch and a learnable fusion gate, enabling anti-diagonal decoding.
|
| 15 |
+
|
| 16 |
+
- Paper: [arXiv:2605.09430](https://arxiv.org/abs/2605.09430)
|
| 17 |
+
- Project page: [FlashAR](https://lxazjk.github.io/FlashAR/)
|
| 18 |
+
- Code: [Emu3.5-NAR](https://github.com/lxazjk/Emu3.5-NAR)
|
| 19 |
+
|
| 20 |
+
## Checkpoints
|
| 21 |
+
|
| 22 |
+
| Variant | Directory | Checkpoint | Extra files |
|
| 23 |
+
| --- | --- | --- | --- |
|
| 24 |
+
| LlamaGen-B | `B/` | `B/FlashAR-LlamaGen-B.pt` | `B/FlashAR-LlamaGen-B.json` |
|
| 25 |
+
| LlamaGen-L | `L/` | `L/FlashAR-LlamaGen-L.pt` | `L/metrics.json` |
|
| 26 |
+
| LlamaGen-XL | `XL/` | `XL/FlashAR-LlamaGen-XL.pt` | `XL/FlashAR-LlamaGen-XL.json` |
|
| 27 |
+
| LlamaGen-XXL | `XXL/` | `XXL/FlashAR-LlamaGen-XXL.pt` | `XXL/metrics.json` |
|
| 28 |
+
|
| 29 |
+
Each subdirectory also includes the source model card and `configuration.json` from the corresponding ModelScope checkpoint.
|
| 30 |
+
|
| 31 |
+
## Layout
|
| 32 |
+
|
| 33 |
+
```text
|
| 34 |
+
.
|
| 35 |
+
├── B/
|
| 36 |
+
│ ├── FlashAR-LlamaGen-B.pt
|
| 37 |
+
│ ├── FlashAR-LlamaGen-B.json
|
| 38 |
+
│ ├── configuration.json
|
| 39 |
+
│ └── README.md
|
| 40 |
+
├── L/
|
| 41 |
+
│ ├── FlashAR-LlamaGen-L.pt
|
| 42 |
+
│ ├── metrics.json
|
| 43 |
+
│ ├── configuration.json
|
| 44 |
+
│ └── README.md
|
| 45 |
+
├── XL/
|
| 46 |
+
│ ├── FlashAR-LlamaGen-XL.pt
|
| 47 |
+
│ ├── FlashAR-LlamaGen-XL.json
|
| 48 |
+
│ ├── configuration.json
|
| 49 |
+
│ └── README.md
|
| 50 |
+
└── XXL/
|
| 51 |
+
├── FlashAR-LlamaGen-XXL.pt
|
| 52 |
+
├── metrics.json
|
| 53 |
+
├── configuration.json
|
| 54 |
+
└── README.md
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
## Citation
|
| 58 |
+
|
| 59 |
+
```bibtex
|
| 60 |
+
@article{zhou2026flashar,
|
| 61 |
+
title={FlashAR: Efficient Post-Training Acceleration for Autoregressive Image Generation},
|
| 62 |
+
author={Zhou, Junkang and He, Yefei and Chen, Feng and Wang, Weijie and Zhuang, Bohan},
|
| 63 |
+
journal={arXiv preprint arXiv:2605.09430},
|
| 64 |
+
year={2026}
|
| 65 |
+
}
|
| 66 |
+
```
|
XL/FlashAR-LlamaGen-XL.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "XL",
|
| 3 |
+
"dst": "modelscope_upload/FlashAR-LlamaGen-XL/FlashAR-LlamaGen-XL.pt",
|
| 4 |
+
"source_checkpoint": "cloud_ckpt_llamagen_xl_imagenet256_cfg2_nogatecol_105k_to107p5k_lr5e6/2026-05-07-14-18-19/024-GPT-XL/checkpoints/last_version.pt",
|
| 5 |
+
"steps": 107500,
|
| 6 |
+
"metrics": {
|
| 7 |
+
"epoch": 42,
|
| 8 |
+
"step": 107500,
|
| 9 |
+
"npz_path": "./results/024-GPT-XL/./samples_llamagen_xl_imagenet256_cfg2_nogatecol_105k_to107p5k_lr5e6/latest.npz",
|
| 10 |
+
"txt_path": "./results/024-GPT-XL/./samples_llamagen_xl_imagenet256_cfg2_nogatecol_105k_to107p5k_lr5e6/latest.txt",
|
| 11 |
+
"inception_score": 259.35772705078125,
|
| 12 |
+
"fid": 3.054045162945613,
|
| 13 |
+
"sfid": 6.683951400631031,
|
| 14 |
+
"precision": 0.80102,
|
| 15 |
+
"recall": 0.5758
|
| 16 |
+
},
|
| 17 |
+
"original_model_key_count": 268,
|
| 18 |
+
"saved_model_key_count": 268,
|
| 19 |
+
"removed_cache_key_count": 0,
|
| 20 |
+
"removed_cache_keys": [],
|
| 21 |
+
"has_optimizer": false,
|
| 22 |
+
"has_scheduler": false,
|
| 23 |
+
"modelscope_repo": "lxazjk/FlashAR-LlamaGen-XL"
|
| 24 |
+
}
|
XL/FlashAR-LlamaGen-XL.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b2202b3856e31e67a4dbd76a8e537fa4aa71f0726bbd36f682913d13d0347b8
|
| 3 |
+
size 3268249674
|
XL/README.md
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# FlashAR-LlamaGen-XL
|
| 2 |
+
|
| 3 |
+
Best GPT-XL checkpoint for FlashAR / LlamaGen NAR ImageNet-256 evaluation.
|
| 4 |
+
|
| 5 |
+
## Files
|
| 6 |
+
|
| 7 |
+
- `FlashAR-LlamaGen-XL.pt`: clean inference checkpoint.
|
| 8 |
+
- `FlashAR-LlamaGen-XL.json`: metric and provenance sidecar.
|
| 9 |
+
|
| 10 |
+
## Metrics
|
| 11 |
+
|
| 12 |
+
- Dataset/eval: ImageNet-256
|
| 13 |
+
- Step: 107,500
|
| 14 |
+
- FID: 3.054045162945613
|
| 15 |
+
- sFID: 6.683951400631031
|
| 16 |
+
- Inception Score: 259.35772705078125
|
| 17 |
+
- Precision: 0.80102
|
| 18 |
+
- Recall: 0.5758
|
| 19 |
+
|
| 20 |
+
## Checkpoint format
|
| 21 |
+
|
| 22 |
+
The `.pt` file contains:
|
| 23 |
+
|
| 24 |
+
- `model`: model state dict only
|
| 25 |
+
- `args`: original training args
|
| 26 |
+
- `steps`: training step
|
| 27 |
+
- `metrics`: best eval metrics
|
| 28 |
+
|
| 29 |
+
It does not contain optimizer state, scheduler state, or KV-cache buffers. Verification found zero `kv_cache`, `k_cache`, or `v_cache` keys in this uploaded checkpoint.
|
XL/configuration.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"framework": "pytorch", "task": "text-generation", "allow_remote": true}
|
XXL/FlashAR-LlamaGen-XXL.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30cea896a09b64bd771f1e0ead47333caadbf713eeb3c316eb8a4f557b6f07f2
|
| 3 |
+
size 5862212399
|
XXL/README.md
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# FlashAR-LlamaGen-XXL
|
| 2 |
+
|
| 3 |
+
Generated FlashAR / LlamaGen checkpoint.
|
| 4 |
+
|
| 5 |
+
- Run: `025-GPT-XXL`
|
| 6 |
+
- Variant: `GPT-XXL 256 clean_bestfid_nogatecol`
|
| 7 |
+
- FID: `3.163676372982593`
|
| 8 |
+
- Step: `75000`
|
| 9 |
+
- IS: `250.9591064453125`
|
| 10 |
+
- sFID: `7.060702650024382`
|
| 11 |
+
- Precision / Recall: `0.77906` / `0.6022`
|
| 12 |
+
- Checkpoint file: `FlashAR-LlamaGen-XXL.pt`
|
| 13 |
+
- Note: exact best-step checkpoint; KV cache removed
|
| 14 |
+
|
XXL/configuration.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"framework": "pytorch", "task": "text-generation", "allow_remote": true}
|
XXL/metrics.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"repo": "FlashAR-LlamaGen-XXL",
|
| 3 |
+
"run": "025-GPT-XXL",
|
| 4 |
+
"variant": "GPT-XXL 256 clean_bestfid_nogatecol",
|
| 5 |
+
"fid": 3.163676372982593,
|
| 6 |
+
"step": 75000,
|
| 7 |
+
"inception_score": 250.9591064453125,
|
| 8 |
+
"sfid": 7.060702650024382,
|
| 9 |
+
"precision": 0.77906,
|
| 10 |
+
"recall": 0.6022,
|
| 11 |
+
"source_checkpoint": "cloud_ckpt_xxl256_nar_fsdp_clean_bestfid/2026-05-06-19-00-04/020-GPT-XXL/checkpoints/step_00075000_fid_3p163676_clean_no_kvcache.pt",
|
| 12 |
+
"note": "exact best-step checkpoint; KV cache removed"
|
| 13 |
+
}
|