Xssama commited on
Commit
81dd8f1
·
verified ·
1 Parent(s): 489dde2

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -1,35 +1 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
  *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
1
  *.ckpt filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Oussama Zekri
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ library_name: pytorch
4
+ tags:
5
+ - gdds
6
+ - discrete-diffusion
7
+ - language-modeling
8
+ - research
9
+ - pytorch
10
+ ---
11
+
12
+ # GDDS Checkpoints
13
+
14
+ Official checkpoint bundle for the paper **Generalized Discrete Diffusion from Snapshots**.
15
+
16
+ ## Model Sources
17
+
18
+ - Paper: https://huggingface.co/papers/2603.21342
19
+ - arXiv: https://arxiv.org/abs/2603.21342
20
+ - Code: https://github.com/ozekri/gdds
21
+ - Project page: https://oussamazekri.fr/gdds
22
+
23
+ ## Included checkpoints
24
+
25
+ | File | Method | Notes |
26
+ | --- | --- | --- |
27
+ | `checkpoints/gdds_gauss_500k.ckpt` | GDDS | 500k-step checkpoint with the Gaussian SIK forward process |
28
+ | `checkpoints/gdds_uniform_500k.ckpt` | GDDS | 500k-step checkpoint with the uniform forward process |
29
+ | `checkpoints/gdds_absorb_500k.ckpt` | GDDS | 500k-step checkpoint with the absorbing forward process |
30
+ | `checkpoints/mdlm_500k.ckpt` | MDLM | 500k-step baseline checkpoint |
31
+ | `checkpoints/udlm_500k.ckpt` | UDLM | 500k-step baseline checkpoint |
32
+ | `checkpoints/ar_500k.ckpt` | AR | 500k-step autoregressive baseline checkpoint |
33
+
34
+ `best.ckpt` is intentionally not included in this repository.
35
+
36
+ ## Usage
37
+
38
+ These files are PyTorch Lightning checkpoints intended to be used with the
39
+ `gdds` codebase.
40
+
41
+ ```bash
42
+ git clone https://github.com/ozekri/gdds.git
43
+ cd gdds
44
+ pip install -r requirements.txt
45
+ pip install -e .
46
+
47
+ PYTHONPATH=src python -m discrete_diffusion.evaluations.ppl_eval \
48
+ data=openwebtext \
49
+ model=small \
50
+ algo=mdlm \
51
+ eval.checkpoint_path=/path/to/checkpoints/mdlm_500k.ckpt
52
+ ```
53
+
54
+ For sampling and other evaluations, use the same repository and pass the
55
+ relevant checkpoint path through the evaluation config.
56
+
57
+ ## Citation
58
+
59
+ ```bibtex
60
+ @misc{zekri2026generalizeddiscretediffusionsnapshots,
61
+ title={Generalized Discrete Diffusion from Snapshots},
62
+ author={Oussama Zekri and Th{\\'e}o Uscidda and Nicolas Boull{\\'e} and Anna Korba},
63
+ year={2026},
64
+ eprint={2603.21342},
65
+ archivePrefix={arXiv},
66
+ primaryClass={stat.ML},
67
+ url={https://arxiv.org/abs/2603.21342},
68
+ }
69
+ ```
checkpoints/ar_500k.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:021fbd60935147007f0683f1f32c2b4946b744780224e76747cd77c821ffd831
3
+ size 2597675780
checkpoints/gdds_absorb_500k.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e3d0b83777d44f90fb1446fdb6b950298ae46092ad35e0099e89ab40c1a2e70
3
+ size 2716020118
checkpoints/gdds_gauss_500k.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64e0bd726824cbad11d97776c7860c67902e879d5b36d1c90b293f0fbe44ddca
3
+ size 2716033172
checkpoints/gdds_uniform_500k.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:292f445835c288772dd8edf68c0978e37768c1f2cdeb5f9d1a6f917d0b8a7436
3
+ size 2716019798
checkpoints/mdlm_500k.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02075903d2fd238e460d6a4e29b377101d57ef1d900e6edd34f3af0e4b48bff8
3
+ size 2715818200
checkpoints/udlm_500k.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:078d08c0db2186a14eedeada36e757a7f3e7f0be018a77dd30053f019414562a
3
+ size 2716019798