lxazjk commited on 7 days ago

Commit

0062bb1

verified ·

1 Parent(s): 68e8459

Upload LlamaGen FlashAR checkpoints

Browse files

Files changed (17) hide show

B/FlashAR-LlamaGen-B.json +51 -0
B/FlashAR-LlamaGen-B.pt +3 -0
B/README.md +29 -0
B/configuration.json +1 -0
L/FlashAR-LlamaGen-L.pt +3 -0
L/README.md +14 -0
L/configuration.json +1 -0
L/metrics.json +13 -0
README.md +66 -0
XL/FlashAR-LlamaGen-XL.json +24 -0
XL/FlashAR-LlamaGen-XL.pt +3 -0
XL/README.md +29 -0
XL/configuration.json +1 -0
XXL/FlashAR-LlamaGen-XXL.pt +3 -0
XXL/README.md +14 -0
XXL/configuration.json +1 -0
XXL/metrics.json +13 -0

B/FlashAR-LlamaGen-B.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "name": "B",
+  "dst": "modelscope_upload/FlashAR-LlamaGen-B/FlashAR-LlamaGen-B.pt",
+  "source_checkpoint": "cloud_ckpt_vbranch_last2_hvgate_reg0p1_gptb/2026-05-01-17-46-47/000-GPT-B/checkpoints/last_version.pt",
+  "steps": 75000,
+  "metrics": {
+    "epoch": 29,
+    "step": 75000,
+    "npz_path": "results/000-GPT-B/./samples_vbranch_last2_hvgate_reg0p1_eval10_gptb/latest.npz",
+    "txt_path": "results/000-GPT-B/./samples_vbranch_last2_hvgate_reg0p1_eval10_gptb/latest.txt",
+    "inception_score": 208.30068969726562,
+    "fid": 4.680193238979371,
+    "sfid": 6.680051826699128,
+    "precision": 0.83106,
+    "recall": 0.4761
+  },
+  "original_model_key_count": 126,
+  "saved_model_key_count": 100,
+  "removed_cache_key_count": 26,
+  "removed_cache_keys": [
+    "layers.0.attention.kv_cache.k_cache",
+    "layers.0.attention.kv_cache.v_cache",
+    "layers.1.attention.kv_cache.k_cache",
+    "layers.1.attention.kv_cache.v_cache",
+    "layers.2.attention.kv_cache.k_cache",
+    "layers.2.attention.kv_cache.v_cache",
+    "layers.3.attention.kv_cache.k_cache",
+    "layers.3.attention.kv_cache.v_cache",
+    "layers.4.attention.kv_cache.k_cache",
+    "layers.4.attention.kv_cache.v_cache",
+    "layers.5.attention.kv_cache.k_cache",
+    "layers.5.attention.kv_cache.v_cache",
+    "layers.6.attention.kv_cache.k_cache",
+    "layers.6.attention.kv_cache.v_cache",
+    "layers.7.attention.kv_cache.k_cache",
+    "layers.7.attention.kv_cache.v_cache",
+    "layers.8.attention.kv_cache.k_cache",
+    "layers.8.attention.kv_cache.v_cache",
+    "layers.9.attention.kv_cache.k_cache",
+    "layers.9.attention.kv_cache.v_cache",
+    "layers.10.attention.kv_cache.k_cache",
+    "layers.10.attention.kv_cache.v_cache",
+    "layers.11.attention.kv_cache.k_cache",
+    "layers.11.attention.kv_cache.v_cache",
+    "layers.12.attention.kv_cache.k_cache",
+    "layers.12.attention.kv_cache.v_cache"
+  ],
+  "has_optimizer": false,
+  "has_scheduler": false,
+  "modelscope_repo": "lxazjk/FlashAR-LlamaGen-B"
+}

B/FlashAR-LlamaGen-B.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f756dc013daa3bd8241b2106f2a998e0caa60b998841ca1043c45554cd029b66
+size 526436890

B/README.md ADDED Viewed

	@@ -0,0 +1,29 @@

+# FlashAR-LlamaGen-B
+Best GPT-B checkpoint for FlashAR / LlamaGen NAR ImageNet-256 evaluation.
+## Files
+- `FlashAR-LlamaGen-B.pt`: clean inference checkpoint.
+- `FlashAR-LlamaGen-B.json`: metric and provenance sidecar.
+## Metrics
+- Dataset/eval: ImageNet-256
+- Step: 75,000
+- FID: 4.680193238979371
+- sFID: 6.680051826699128
+- Inception Score: 208.30068969726562
+- Precision: 0.83106
+- Recall: 0.4761
+## Checkpoint format
+The `.pt` file contains:
+- `model`: model state dict only
+- `args`: original training args
+- `steps`: training step
+- `metrics`: best eval metrics
+It does not contain optimizer state, scheduler state, or KV-cache buffers. The source training checkpoint contained 26 `kv_cache` buffers; they were explicitly removed before upload.

B/configuration.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"framework": "pytorch", "task": "text-generation", "allow_remote": true}

L/FlashAR-LlamaGen-L.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:208060f6fca77c03bee97457498603cc3b4a62ed02a9f7b07f9a29f4b2a0b051
+size 1494418211

L/README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+# FlashAR-LlamaGen-L
+Generated FlashAR / LlamaGen checkpoint.
+- Run: `008-GPT-L`
+- Variant: `GPT-L 256 vbranch_last4_hvgate_reg0p1`
+- FID: `3.174558946842069`
+- Step: `75000`
+- IS: `285.8968200683594`
+- sFID: `6.364996221369893`
+- Precision / Recall: `0.833` / `0.5349`
+- Checkpoint file: `FlashAR-LlamaGen-L.pt`
+- Note: available final checkpoint; selected by best available final-step FID among GPT-L candidates

L/configuration.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"framework": "pytorch", "task": "text-generation", "allow_remote": true}

L/metrics.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "repo": "FlashAR-LlamaGen-L",
+  "run": "008-GPT-L",
+  "variant": "GPT-L 256 vbranch_last4_hvgate_reg0p1",
+  "fid": 3.174558946842069,
+  "step": 75000,
+  "inception_score": 285.8968200683594,
+  "sfid": 6.364996221369893,
+  "precision": 0.833,
+  "recall": 0.5349,
+  "source_checkpoint": "cloud_ckpt_vbranch_last4_hvgate_reg0p1/2026-05-02-08-36-54/008-GPT-L/checkpoints/last_version.pt",
+  "note": "available final checkpoint; selected by best available final-step FID among GPT-L candidates"
+}

README.md ADDED Viewed

	@@ -0,0 +1,66 @@

+---
+license: apache-2.0
+tags:
+- image-generation
+- autoregressive
+- flashar
+- llamagen
+---
+# LlamaGen-FlashAR
+This repository collects FlashAR checkpoints for LlamaGen variants.
+FlashAR is introduced in **"FlashAR: Efficient Post-Training Acceleration for Autoregressive Image Generation"**. It accelerates raster-scan autoregressive image generation with a vertical prediction branch and a learnable fusion gate, enabling anti-diagonal decoding.
+- Paper: [arXiv:2605.09430](https://arxiv.org/abs/2605.09430)
+- Project page: [FlashAR](https://lxazjk.github.io/FlashAR/)
+- Code: [Emu3.5-NAR](https://github.com/lxazjk/Emu3.5-NAR)
+## Checkpoints
+| Variant | Directory | Checkpoint | Extra files |
+| --- | --- | --- | --- |
+| LlamaGen-B | `B/` | `B/FlashAR-LlamaGen-B.pt` | `B/FlashAR-LlamaGen-B.json` |
+| LlamaGen-L | `L/` | `L/FlashAR-LlamaGen-L.pt` | `L/metrics.json` |
+| LlamaGen-XL | `XL/` | `XL/FlashAR-LlamaGen-XL.pt` | `XL/FlashAR-LlamaGen-XL.json` |
+| LlamaGen-XXL | `XXL/` | `XXL/FlashAR-LlamaGen-XXL.pt` | `XXL/metrics.json` |
+Each subdirectory also includes the source model card and `configuration.json` from the corresponding ModelScope checkpoint.
+## Layout
+```text
+.
+├── B/
+│   ├── FlashAR-LlamaGen-B.pt
+│   ├── FlashAR-LlamaGen-B.json
+│   ├── configuration.json
+│   └── README.md
+├── L/
+│   ├── FlashAR-LlamaGen-L.pt
+│   ├── metrics.json
+│   ├── configuration.json
+│   └── README.md
+├── XL/
+│   ├── FlashAR-LlamaGen-XL.pt
+│   ├── FlashAR-LlamaGen-XL.json
+│   ├── configuration.json
+│   └── README.md
+└── XXL/
+    ├── FlashAR-LlamaGen-XXL.pt
+    ├── metrics.json
+    ├── configuration.json
+    └── README.md
+```
+## Citation
+```bibtex
+@article{zhou2026flashar,
+  title={FlashAR: Efficient Post-Training Acceleration for Autoregressive Image Generation},
+  author={Zhou, Junkang and He, Yefei and Chen, Feng and Wang, Weijie and Zhuang, Bohan},
+  journal={arXiv preprint arXiv:2605.09430},
+  year={2026}
+}
+```

XL/FlashAR-LlamaGen-XL.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "name": "XL",
+  "dst": "modelscope_upload/FlashAR-LlamaGen-XL/FlashAR-LlamaGen-XL.pt",
+  "source_checkpoint": "cloud_ckpt_llamagen_xl_imagenet256_cfg2_nogatecol_105k_to107p5k_lr5e6/2026-05-07-14-18-19/024-GPT-XL/checkpoints/last_version.pt",
+  "steps": 107500,
+  "metrics": {
+    "epoch": 42,
+    "step": 107500,
+    "npz_path": "./results/024-GPT-XL/./samples_llamagen_xl_imagenet256_cfg2_nogatecol_105k_to107p5k_lr5e6/latest.npz",
+    "txt_path": "./results/024-GPT-XL/./samples_llamagen_xl_imagenet256_cfg2_nogatecol_105k_to107p5k_lr5e6/latest.txt",
+    "inception_score": 259.35772705078125,
+    "fid": 3.054045162945613,
+    "sfid": 6.683951400631031,
+    "precision": 0.80102,
+    "recall": 0.5758
+  },
+  "original_model_key_count": 268,
+  "saved_model_key_count": 268,
+  "removed_cache_key_count": 0,
+  "removed_cache_keys": [],
+  "has_optimizer": false,
+  "has_scheduler": false,
+  "modelscope_repo": "lxazjk/FlashAR-LlamaGen-XL"
+}

XL/FlashAR-LlamaGen-XL.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b2202b3856e31e67a4dbd76a8e537fa4aa71f0726bbd36f682913d13d0347b8
+size 3268249674

XL/README.md ADDED Viewed

	@@ -0,0 +1,29 @@

+# FlashAR-LlamaGen-XL
+Best GPT-XL checkpoint for FlashAR / LlamaGen NAR ImageNet-256 evaluation.
+## Files
+- `FlashAR-LlamaGen-XL.pt`: clean inference checkpoint.
+- `FlashAR-LlamaGen-XL.json`: metric and provenance sidecar.
+## Metrics
+- Dataset/eval: ImageNet-256
+- Step: 107,500
+- FID: 3.054045162945613
+- sFID: 6.683951400631031
+- Inception Score: 259.35772705078125
+- Precision: 0.80102
+- Recall: 0.5758
+## Checkpoint format
+The `.pt` file contains:
+- `model`: model state dict only
+- `args`: original training args
+- `steps`: training step
+- `metrics`: best eval metrics
+It does not contain optimizer state, scheduler state, or KV-cache buffers. Verification found zero `kv_cache`, `k_cache`, or `v_cache` keys in this uploaded checkpoint.

XL/configuration.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"framework": "pytorch", "task": "text-generation", "allow_remote": true}

XXL/FlashAR-LlamaGen-XXL.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:30cea896a09b64bd771f1e0ead47333caadbf713eeb3c316eb8a4f557b6f07f2
+size 5862212399

XXL/README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+# FlashAR-LlamaGen-XXL
+Generated FlashAR / LlamaGen checkpoint.
+- Run: `025-GPT-XXL`
+- Variant: `GPT-XXL 256 clean_bestfid_nogatecol`
+- FID: `3.163676372982593`
+- Step: `75000`
+- IS: `250.9591064453125`
+- sFID: `7.060702650024382`
+- Precision / Recall: `0.77906` / `0.6022`
+- Checkpoint file: `FlashAR-LlamaGen-XXL.pt`
+- Note: exact best-step checkpoint; KV cache removed

XXL/configuration.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"framework": "pytorch", "task": "text-generation", "allow_remote": true}

XXL/metrics.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "repo": "FlashAR-LlamaGen-XXL",
+  "run": "025-GPT-XXL",
+  "variant": "GPT-XXL 256 clean_bestfid_nogatecol",
+  "fid": 3.163676372982593,
+  "step": 75000,
+  "inception_score": 250.9591064453125,
+  "sfid": 7.060702650024382,
+  "precision": 0.77906,
+  "recall": 0.6022,
+  "source_checkpoint": "cloud_ckpt_xxl256_nar_fsdp_clean_bestfid/2026-05-06-19-00-04/020-GPT-XXL/checkpoints/step_00075000_fid_3p163676_clean_no_kvcache.pt",
+  "note": "exact best-step checkpoint; KV cache removed"
+}