BryanW commited on 28 days ago

Commit

33da3d2

verified ·

1 Parent(s): 555ed21

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +19 -0
Koala-36M-v1/.gitattributes +68 -0
Koala-36M-v1/Koala_36M_1.csv +3 -0
Koala-36M-v1/Koala_36M_10.csv +3 -0
Koala-36M-v1/Koala_36M_2.csv +3 -0
Koala-36M-v1/Koala_36M_3.csv +3 -0
Koala-36M-v1/Koala_36M_4.csv +3 -0
Koala-36M-v1/Koala_36M_5.csv +3 -0
Koala-36M-v1/Koala_36M_6.csv +3 -0
Koala-36M-v1/Koala_36M_7.csv +3 -0
Koala-36M-v1/Koala_36M_8.csv +3 -0
Koala-36M-v1/Koala_36M_9.csv +3 -0
URSA-1.7B/.gitattributes +37 -0
URSA-1.7B/.gitignore +55 -0
URSA-1.7B/LICENSE +176 -0
URSA-1.7B/README.md +117 -0
URSA-1.7B/model_index.json +19 -0
URSA-1.7B/scheduler/__scheduler__.py +17 -0
URSA-1.7B/scheduler/scheduler_config.json +7 -0
URSA-1.7B/tokenizer/tokenizer_config.json +239 -0
URSA-1.7B/transformer/__transformer__.py +17 -0
URSA-1.7B/transformer/config.json +13 -0
URSA-1.7B/transformer/diffusion_pytorch_model.safetensors +3 -0
URSA-1.7B/vae/__vae__.py +17 -0
URSA-1.7B/vae/config.json +22 -0
URSA/.flake8 +21 -0
URSA/.gitignore +55 -0
URSA/=4.57.1 +70 -0
URSA/LICENSE +176 -0
URSA/README.md +191 -0
URSA/accelerate_configs/deepspeed_zero2.yaml +12 -0
URSA/assets/sample_image.jpg +0 -0
URSA/configs/distill_dimo.yaml +158 -0
URSA/configs/onestep_dimo.yaml +111 -0
URSA/configs/ursa_0.6b_fsq320.yaml +62 -0
URSA/configs/ursa_0.6b_ibq1024.yaml +62 -0
URSA/configs/ursa_1.7b_fsq320.yaml +62 -0
URSA/configs/ursa_1.7b_ibq1024.yaml +62 -0
URSA/diffnext/__init__.py +16 -0
URSA/diffnext/__pycache__/__init__.cpython-312.pyc +0 -0
URSA/diffnext/__pycache__/image_processor.cpython-312.pyc +0 -0
URSA/diffnext/data/__init__.py +16 -0
URSA/diffnext/data/flex_loaders.py +172 -0
URSA/diffnext/data/flex_pipelines.py +63 -0
URSA/diffnext/data/flex_transforms.py +66 -0
URSA/diffnext/engine/__init__.py +16 -0
URSA/diffnext/engine/__pycache__/__init__.cpython-312.pyc +0 -0
URSA/diffnext/engine/__pycache__/engine_utils.cpython-312.pyc +0 -0
URSA/diffnext/engine/__pycache__/lr_scheduler.cpython-312.pyc +0 -0
URSA/diffnext/engine/engine_utils.py +109 -0

.gitattributes CHANGED Viewed

@@ -132,3 +132,22 @@ URSA/outputs/eval_distill_v3_100steps_49frames/03_s2_a_hummingbird_hovers_in_fro
 URSA/outputs/eval_distill_v3_100steps_49frames/03_s2_a_hummingbird_hovers_in_front_of_a_red_f_student_1step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
 URSA/outputs/eval_distill_v3_100steps_49frames/00_s0_a_lone_grizzly_bear_walks_through_a_mist_teacher_50step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
 URSA/outputs/eval_distill_v3_100steps_49frames/00_s2_a_lone_grizzly_bear_walks_through_a_mist_teacher_50step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text

 URSA/outputs/eval_distill_v3_100steps_49frames/03_s2_a_hummingbird_hovers_in_front_of_a_red_f_student_1step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
 URSA/outputs/eval_distill_v3_100steps_49frames/00_s0_a_lone_grizzly_bear_walks_through_a_mist_teacher_50step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
 URSA/outputs/eval_distill_v3_100steps_49frames/00_s2_a_lone_grizzly_bear_walks_through_a_mist_teacher_50step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
+URSA/outputs/eval_distill_v3_100steps_49frames/01_s3_beautiful_fireworks_in_the_sky_with_red__teacher_50step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
+URSA/outputs/eval_distill_v3_100steps_49frames/03_s3_a_hummingbird_hovers_in_front_of_a_red_f_student_1step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
+URSA/outputs/eval_distill_v3_100steps_49frames/01_s2_beautiful_fireworks_in_the_sky_with_red__teacher_50step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
+URSA/outputs/eval_distill_49frames/00_s1_a_lone_grizzly_bear_walks_through_a_mist_teacher_50step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
+URSA/outputs/eval_distill_49frames/00_s2_a_lone_grizzly_bear_walks_through_a_mist_student_1step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
+URSA/outputs/eval_distill_49frames/00_s0_a_lone_grizzly_bear_walks_through_a_mist_student_1step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
+URSA/outputs/eval_distill_49frames/00_s1_a_lone_grizzly_bear_walks_through_a_mist_student_1step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
+URSA/outputs/eval_distill_49frames/00_s0_a_lone_grizzly_bear_walks_through_a_mist_teacher_50step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
+URSA/outputs/eval_distill_v3_200steps_49frames/00_s0_a_lone_grizzly_bear_walks_through_a_mist_student_1step_baked.mp4 filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_7.csv filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_10.csv filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_8.csv filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_5.csv filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_3.csv filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_4.csv filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_1.csv filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_2.csv filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_6.csv filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_9.csv filter=lfs diff=lfs merge=lfs -text

Koala-36M-v1/.gitattributes ADDED Viewed

	@@ -0,0 +1,68 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.lz4 filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+# Audio files - uncompressed
+*.pcm filter=lfs diff=lfs merge=lfs -text
+*.sam filter=lfs diff=lfs merge=lfs -text
+*.raw filter=lfs diff=lfs merge=lfs -text
+# Audio files - compressed
+*.aac filter=lfs diff=lfs merge=lfs -text
+*.flac filter=lfs diff=lfs merge=lfs -text
+*.mp3 filter=lfs diff=lfs merge=lfs -text
+*.ogg filter=lfs diff=lfs merge=lfs -text
+*.wav filter=lfs diff=lfs merge=lfs -text
+# Image files - uncompressed
+*.bmp filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.tiff filter=lfs diff=lfs merge=lfs -text
+# Image files - compressed
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.webp filter=lfs diff=lfs merge=lfs -text
+# Video files - compressed
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+*.webm filter=lfs diff=lfs merge=lfs -text
+Koala_36M_1.csv filter=lfs diff=lfs merge=lfs -text
+Koala_36M_2.csv filter=lfs diff=lfs merge=lfs -text
+Koala_36M_3.csv filter=lfs diff=lfs merge=lfs -text
+Koala_36M_4.csv filter=lfs diff=lfs merge=lfs -text
+Koala_36M_5.csv filter=lfs diff=lfs merge=lfs -text
+Koala_36M_6.csv filter=lfs diff=lfs merge=lfs -text
+Koala_36M_7.csv filter=lfs diff=lfs merge=lfs -text
+Koala_36M_8.csv filter=lfs diff=lfs merge=lfs -text
+Koala_36M_9.csv filter=lfs diff=lfs merge=lfs -text
+Koala_36M_10.csv filter=lfs diff=lfs merge=lfs -text

Koala-36M-v1/Koala_36M_1.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5721d746552bcf48ca2c85d383eb3aee8a9d724cb8b498448e283e6c155b65f3
+size 4889903599

Koala-36M-v1/Koala_36M_10.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3aa2590eb8302cf43106e7faf7ef36849fedeb6c5d5ca1ee214635f820adf807
+size 4888525462

Koala-36M-v1/Koala_36M_2.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0da912f9903bcc06e077fd84e116f0497782743f35b4c1bfe06223e071720f2a
+size 4889857219

Koala-36M-v1/Koala_36M_3.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5b7cf12f398b9379ac4b6e65c4d5e3154be362513db781ede873d2ee485b112
+size 4889283599

Koala-36M-v1/Koala_36M_4.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b75062281023cf982e885cabf79963482fd23e683cfb8e1c68d7ad6c1e363637
+size 4889718227

Koala-36M-v1/Koala_36M_5.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd185667807e084a760bf3708d5da284a8603a48581a15b9810bded0f7fb4f7c
+size 4889216599

Koala-36M-v1/Koala_36M_6.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69af3c329b77c8fe5fe2a3fd7b52ccce1f88f2649f4cc13e76ab27ecca5a5efa
+size 4889541704

Koala-36M-v1/Koala_36M_7.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f654daa45977d2c12db1c22fbca9ef5bb729ba37240b83d0ed0bd1ca8008175
+size 4889367231

Koala-36M-v1/Koala_36M_8.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d1b984b48a839619b82d1db10c27a518d89c66815be010feaee76816eb59ccd
+size 4888856454

Koala-36M-v1/Koala_36M_9.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f62b8a588768377d49d92b9a4ea5eb9745537399b1fd1ccc556721edb96bc4ca
+size 4889171948

URSA-1.7B/.gitattributes ADDED Viewed

	@@ -0,0 +1,37 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+. filter=lfs diff=lfs merge=lfs -text
+tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text

URSA-1.7B/.gitignore ADDED Viewed

	@@ -0,0 +1,55 @@

+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.cuo
+# Compiled Dynamic libraries
+*.so
+*.dll
+*.dylib
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*.lib
+# Compiled python
+*.pyc
+__pycache__
+# Compiled MATLAB
+*.mex*
+# IPython notebook checkpoints
+.ipynb_checkpoints
+# Editor temporaries
+*.swp
+*~
+# Sublime Text settings
+*.sublime-workspace
+*.sublime-project
+# Eclipse Project settings
+*.*project
+.settings
+# QtCreator files
+*.user
+# VSCode files
+.vscode
+# IDEA files
+.idea
+# OSX dir files
+.DS_Store
+# Android files
+.gradle
+*.iml
+local.properties

URSA-1.7B/LICENSE ADDED Viewed

	@@ -0,0 +1,176 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS

URSA-1.7B/README.md ADDED Viewed

	@@ -0,0 +1,117 @@

+---
+library_name: diffusers
+license: apache-2.0
+license_link: https://huggingface.co/BAAI/URSA-1.7B-FSQ320/blob/main/LICENSE
+pipeline_tag: text-to-video
+base_model:
+- Qwen/Qwen3-1.7B
+---
+# URSA-1.7B-FSQ320 Model Card
+## Model Details
+- **Developed by:** BAAI
+- **Model type:** Text-to-Video Generation Model
+- **Model size:** 1.7B
+- **Model precision:** torch.float16 (FP16)
+- **Model resolution:** 512x320
+- **Model paper:** [Uniform Discrete Diffusion with Metric Path for Video Generation](https://arxiv.org/abs/2510.24717)
+- **Model family:** [BAAI-Vision-URSA](https://github.com/baaivision/URSA)
+- **Model Tokenizer:** [Cosmos-Tokenize1-DV4x8x8-360p](https://huggingface.co/nvidia/Cosmos-Tokenize1-DV4x8x8-360p)
+- **Model Description:** This is a model that can be used to generate and modify videos based on text prompts.
+## Examples
+Using the [🤗's Diffusers library](https://github.com/huggingface/diffusers) to run URSA in a simple and efficient manner.
+```bash
+pip install diffusers transformers accelerate imageio[ffmpeg]
+pip install git+ssh://git@github.com/baaivision/URSA.git
+```
+Running the pipeline:
+```python
+import os, torch, numpy
+from diffnext.pipelines import URSAPipeline
+from diffnext.utils import export_to_video
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+model_id, height, width = "BAAI/URSA-1.7B-FSQ320", 320, 512
+model_args = {"torch_dtype": torch.float16, "trust_remote_code": True}
+pipe = URSAPipeline.from_pretrained(model_id, **model_args)
+pipe = pipe.to(torch.device("cuda"))
+text_prompt = "a lone grizzly bear walks through a misty forest at dawn, sunlight catching its fur."
+negative_prompt = "worst quality, low quality, inconsistent motion, static, still, blurry, jittery, distorted, ugly"
+# Text-to-Image
+prompt = text_prompt
+num_frames, num_inference_steps = 1, 25
+image = pipe(**locals()).frames[0]
+image.save("ursa.jpg")
+# Image-to-Video
+prompt = f"motion=9.0, {text_prompt}"
+num_frames, num_inference_steps = 49, 50
+video = pipe(**locals()).frames[0]
+export_to_video(video, "ursa_1+48f.mp4", fps=12)
+# Text-to-Video
+image, video = None, None
+prompt = f"motion=9.0, {text_prompt}"
+num_frames, num_inference_steps = 49, 50
+video = pipe(**locals()).frames[0]
+export_to_video(video, "ursa_49f.mp4", fps=12)
+# Video-to-Video
+prompt = f"motion=5.0, {text_prompt}"
+num_frames, num_inference_steps = 49, 50
+num_cond_frames, cond_noise_scale = 13, 0.1
+for i in range(12):
+    video, start_video = video[-num_cond_frames:], video
+    video = pipe(**locals()).frames[0]
+    video = numpy.concatenate([start_video, video[num_cond_frames:]])
+    export_to_video(video, "ursa_{}f.mp4".format(video.shape[0]), fps=12)
+```
+# Uses
+## Direct Use
+The model is intended for research purposes only. Possible research areas and tasks include
+- Research on generative models.
+- Applications in educational or creative tools.
+- Generation of artworks and use in design and other artistic processes.
+- Probing and understanding the limitations and biases of generative models.
+- Safe deployment of models which have the potential to generate harmful content.
+Excluded uses are described below.
+#### Out-of-Scope Use
+The model was not trained to be factual or true representations of people or events, and therefore using the model to generate such content is out-of-scope for the abilities of this model.
+#### Misuse and Malicious Use
+Using the model to generate content that is cruel to individuals is a misuse of this model. This includes, but is not limited to:
+- Mis- and disinformation.
+- Representations of egregious violence and gore.
+- Impersonating individuals without their consent.
+- Sexual content without consent of the people who might see it.
+- Sharing of copyrighted or licensed material in violation of its terms of use.
+- Intentionally promoting or propagating discriminatory content or harmful stereotypes.
+- Sharing content that is an alteration of copyrighted or licensed material in violation of its terms of use.
+- Generating demeaning, dehumanizing, or otherwise harmful representations of people or their environments, cultures, religions, etc.
+## Limitations and Bias
+### Limitations
+- The autoencoding part of the model is lossy.
+- The model cannot render complex legible text.
+- The model does not achieve perfect photorealism.
+- The fingers, .etc in general may not be generated properly.
+- The model was trained on a subset of the web datasets [LAION-5B](https://laion.ai/blog/laion-5b/) and [COYO-700M](https://github.com/kakaobrain/coyo-dataset), which contains adult, violent and sexual content.
+### Bias
+While the capabilities of image generation models are impressive, they can also reinforce or exacerbate social biases.

URSA-1.7B/model_index.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "_class_name": "URSAPipeline",
+  "tokenizer": [
+    "transformers",
+    "Qwen2TokenizerFast"
+  ],
+  "scheduler": [
+    "__scheduler__",
+    "KineticOptimalScheduler"
+  ],
+  "transformer": [
+    "__transformer__",
+    "URSATransformer3DModel"
+  ],
+  "vae": [
+    "__vae__",
+    "AutoencoderVQCosmos3D"
+  ]
+}

URSA-1.7B/scheduler/__scheduler__.py ADDED Viewed

	@@ -0,0 +1,17 @@

+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+"""Scheduler."""
+from diffnext.schedulers.scheduling_dfm import KineticOptimalScheduler  # noqa

URSA-1.7B/scheduler/scheduler_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_class_name": "KineticOptimalScheduler",
+  "alpha": 1.0,
+  "c": 5,
+  "eps": 1e-5,
+  "shift": 4.0
+}

URSA-1.7B/tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,239 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0].role == 'system' %}\n        {{- messages[0].content + '\\n\\n' }}\n    {%- endif %}\n    {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0].role == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n    {%- set index = (messages|length - 1) - loop.index0 %}\n    {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n        {%- set ns.multi_step_tool = false %}\n        {%- set ns.last_query_index = index %}\n    {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n    {%- if message.content is string %}\n        {%- set content = message.content %}\n    {%- else %}\n        {%- set content = '' %}\n    {%- endif %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n        {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {%- set reasoning_content = '' %}\n        {%- if message.reasoning_content is string %}\n            {%- set reasoning_content = message.reasoning_content %}\n        {%- else %}\n            {%- if '</think>' in content %}\n                {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n                {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n            {%- endif %}\n        {%- endif %}\n        {%- if loop.index0 > ns.last_query_index %}\n            {%- if loop.last or (not loop.last and reasoning_content) %}\n                {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n            {%- else %}\n                {{- '<|im_start|>' + message.role + '\\n' + content }}\n            {%- endif %}\n        {%- else %}\n            {{- '<|im_start|>' + message.role + '\\n' + content }}\n        {%- endif %}\n        {%- if message.tool_calls %}\n            {%- for tool_call in message.tool_calls %}\n                {%- if (loop.first and content) or (not loop.first) %}\n                    {{- '\\n' }}\n                {%- endif %}\n                {%- if tool_call.function %}\n                    {%- set tool_call = tool_call.function %}\n                {%- endif %}\n                {{- '<tool_call>\\n{\"name\": \"' }}\n                {{- tool_call.name }}\n                {{- '\", \"arguments\": ' }}\n                {%- if tool_call.arguments is string %}\n                    {{- tool_call.arguments }}\n                {%- else %}\n                    {{- tool_call.arguments | tojson }}\n                {%- endif %}\n                {{- '}\\n</tool_call>' }}\n            {%- endfor %}\n        {%- endif %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n    {%- if enable_thinking is defined and enable_thinking is false %}\n        {{- '<think>\\n\\n</think>\\n\\n' }}\n    {%- endif %}\n{%- endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

URSA-1.7B/transformer/__transformer__.py ADDED Viewed

	@@ -0,0 +1,17 @@

+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+"""Transformer model."""
+from diffnext.models.transformers.transformer_ursa import URSATransformer3DModel  # noqa

URSA-1.7B/transformer/config.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "hidden_size": 2048,
+  "intermediate_size": 6144,
+  "max_window_layers": 28,
+  "num_attention_heads": 16,
+  "num_key_value_heads": 8,
+  "num_hidden_layers": 28,
+  "rope_theta": 1000000,
+  "vocab_size": 215669,
+  "lm_vocab_size": 151669,
+  "lm_head_size": 64000,
+  "bov_token_id": 151652
+}

URSA-1.7B/transformer/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4a50d661919972cd5c8640ca3c9e5824945d105fb714a0de2f7610a4e7bebb8
+size 3964379808

URSA-1.7B/vae/__vae__.py ADDED Viewed

	@@ -0,0 +1,17 @@

+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+"""VAE model."""
+from diffnext.models.autoencoders.autoencoder_vq_cosmos3d import AutoencoderVQCosmos3D  # noqa

URSA-1.7B/vae/config.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "_class_name": "AutoencoderVQCosmos3D",
+  "_quantizer_name": "FSQuantizer",
+  "in_channels": 3,
+  "latent_channels": 256,
+  "layers_per_block": 2,
+  "norm_num_groups": 1,
+  "out_channels": 3,
+  "sample_size": 1024,
+  "sample_frames": 49,
+  "num_vq_embeddings": 64000,
+  "vq_embed_dim": 6,
+  "patch_size": 2,
+  "temporal_stride": 4,
+  "spatial_stride": 8,
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ]
+}

URSA/.flake8 ADDED Viewed

	@@ -0,0 +1,21 @@

+[flake8]
+max-line-length = 100
+ignore =
+    # whitespace before ':' (conflicted with Black)
+    E203,
+    # ambiguous variable name
+    E741,
+    # ‘from module import *’ used; unable to detect undefined names
+    F403,
+    # name may be undefined, or defined from star imports: module
+    F405,
+    # redefinition of unused name from line N
+    F811,
+    # undefined name
+    F821,
+    # line break before binary operator
+    W503,
+    # line break after binary operator
+    W504
+# module imported but unused
+per-file-ignores = __init__.py: F401

URSA/.gitignore ADDED Viewed

	@@ -0,0 +1,55 @@

+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.cuo
+# Compiled Dynamic libraries
+*.so
+*.dll
+*.dylib
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*.lib
+# Compiled python
+*.pyc
+__pycache__
+# Compiled MATLAB
+*.mex*
+# IPython notebook checkpoints
+.ipynb_checkpoints
+# Editor temporaries
+*.swp
+*~
+# Sublime Text settings
+*.sublime-workspace
+*.sublime-project
+# Eclipse Project settings
+*.*project
+.settings
+# QtCreator files
+*.user
+# VSCode files
+.vscode
+# IDEA files
+.idea
+# OSX dir files
+.DS_Store
+# Android files
+.gradle
+*.iml
+local.properties

URSA/=4.57.1 ADDED Viewed

	@@ -0,0 +1,70 @@

+Requirement already satisfied: diffusers in /usr/local/lib/python3.12/dist-packages (0.36.0)
+Requirement already satisfied: transformers in /usr/local/lib/python3.12/dist-packages (5.2.0)
+Requirement already satisfied: accelerate in /usr/local/lib/python3.12/dist-packages (1.12.0)
+Requirement already satisfied: imageio in /usr/local/lib/python3.12/dist-packages (2.37.2)
+Requirement already satisfied: imageio-ffmpeg in /usr/local/lib/python3.12/dist-packages (0.6.0)
+Requirement already satisfied: omegaconf in /usr/local/lib/python3.12/dist-packages (2.3.0)
+Requirement already satisfied: wandb in /usr/local/lib/python3.12/dist-packages (0.25.0)
+Requirement already satisfied: importlib_metadata in /usr/local/lib/python3.12/dist-packages/setuptools/_vendor (from diffusers) (8.0.0)
+Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from diffusers) (3.17.0)
+Requirement already satisfied: httpx<1.0.0 in /usr/local/lib/python3.12/dist-packages (from diffusers) (0.28.1)
+Requirement already satisfied: huggingface-hub<2.0,>=0.34.0 in /usr/local/lib/python3.12/dist-packages (from diffusers) (1.3.0)
+Requirement already satisfied: numpy in /usr/local/lib/python3.12/dist-packages (from diffusers) (1.26.4)
+Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from diffusers) (2024.11.6)
+Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from diffusers) (2.32.3)
+Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.12/dist-packages (from diffusers) (0.5.3)
+Requirement already satisfied: Pillow in /usr/local/lib/python3.12/dist-packages (from diffusers) (11.1.0)
+Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (23.2)
+Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from transformers) (6.0.2)
+Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.22.2)
+Requirement already satisfied: typer-slim in /usr/local/lib/python3.12/dist-packages (from transformers) (0.21.2)
+Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.12/dist-packages (from transformers) (4.67.1)
+Requirement already satisfied: psutil in /usr/local/lib/python3.12/dist-packages (from accelerate) (7.0.0)
+Requirement already satisfied: torch>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from accelerate) (2.9.0+cu128)
+Requirement already satisfied: antlr4-python3-runtime==4.9.* in /usr/local/lib/python3.12/dist-packages (from omegaconf) (4.9.3)
+Requirement already satisfied: click>=8.0.1 in /usr/local/lib/python3.12/dist-packages (from wandb) (8.1.8)
+Requirement already satisfied: gitpython!=3.1.29,>=1.0.0 in /usr/local/lib/python3.12/dist-packages (from wandb) (3.1.46)
+Requirement already satisfied: platformdirs in /usr/local/lib/python3.12/dist-packages (from wandb) (4.3.6)
+Requirement already satisfied: protobuf!=4.21.0,!=5.28.0,<7,>=3.19.0 in /usr/local/lib/python3.12/dist-packages (from wandb) (4.24.4)
+Requirement already satisfied: pydantic<3 in /usr/local/lib/python3.12/dist-packages (from wandb) (2.10.6)
+Requirement already satisfied: sentry-sdk>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from wandb) (2.54.0)
+Requirement already satisfied: typing-extensions<5,>=4.8 in /usr/local/lib/python3.12/dist-packages (from wandb) (4.12.2)
+Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.12/dist-packages (from gitpython!=3.1.29,>=1.0.0->wandb) (4.0.12)
+Requirement already satisfied: anyio in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->diffusers) (4.8.0)
+Requirement already satisfied: certifi in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->diffusers) (2025.1.31)
+Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->diffusers) (1.0.7)
+Requirement already satisfied: idna in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->diffusers) (3.10)
+Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.12/dist-packages (from httpcore==1.*->httpx<1.0.0->diffusers) (0.14.0)
+Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<2.0,>=0.34.0->diffusers) (2025.2.0)
+Requirement already satisfied: hf-xet<2.0.0,>=1.2.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<2.0,>=0.34.0->diffusers) (1.3.2)
+Requirement already satisfied: shellingham in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<2.0,>=0.34.0->diffusers) (1.5.4)
+Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.12/dist-packages (from pydantic<3->wandb) (0.7.0)
+Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.12/dist-packages (from pydantic<3->wandb) (2.27.2)
+Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->diffusers) (3.4.1)
+Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->diffusers) (2.0.7)
+Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (75.8.2)
+Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (1.14.0)
+Requirement already satisfied: networkx>=2.5.1 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (3.4.2)
+Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (3.1.6)
+Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.8.93 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.8.93)
+Requirement already satisfied: nvidia-cuda-runtime-cu12==12.8.90 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.8.90)
+Requirement already satisfied: nvidia-cuda-cupti-cu12==12.8.90 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.8.90)
+Requirement already satisfied: nvidia-cudnn-cu12==9.10.2.21 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (9.10.2.21)
+Requirement already satisfied: nvidia-cublas-cu12==12.8.4.1 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.8.4.1)
+Requirement already satisfied: nvidia-cufft-cu12==11.3.3.83 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (11.3.3.83)
+Requirement already satisfied: nvidia-curand-cu12==10.3.9.90 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (10.3.9.90)
+Requirement already satisfied: nvidia-cusolver-cu12==11.7.3.90 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (11.7.3.90)
+Requirement already satisfied: nvidia-cusparse-cu12==12.5.8.93 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.5.8.93)
+Requirement already satisfied: nvidia-cusparselt-cu12==0.7.1 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (0.7.1)
+Requirement already satisfied: nvidia-nccl-cu12==2.27.5 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (2.27.5)
+Requirement already satisfied: nvidia-nvshmem-cu12==3.3.20 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (3.3.20)
+Requirement already satisfied: nvidia-nvtx-cu12==12.8.90 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.8.90)
+Requirement already satisfied: nvidia-nvjitlink-cu12==12.8.93 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.8.93)
+Requirement already satisfied: nvidia-cufile-cu12==1.13.1.3 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (1.13.1.3)
+Requirement already satisfied: triton==3.5.0 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (3.5.0)
+Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.12/dist-packages/setuptools/_vendor (from importlib_metadata->diffusers) (3.19.2)
+Requirement already satisfied: annotated-doc>=0.0.2 in /usr/local/lib/python3.12/dist-packages (from typer-slim->transformers) (0.0.4)
+Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.12/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb) (5.0.2)
+Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from sympy>=1.13.3->torch>=2.0.0->accelerate) (1.3.0)
+Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.12/dist-packages (from anyio->httpx<1.0.0->diffusers) (1.3.1)
+Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2->torch>=2.0.0->accelerate) (3.0.2)

URSA/LICENSE ADDED Viewed

	@@ -0,0 +1,176 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS

URSA/README.md ADDED Viewed

	@@ -0,0 +1,191 @@

+<div align="center">
+<img src="assets/logo.png" width="30%" alt="logo"/>
+<h1>🐻 URSA: Uniform Discrete Diffusion with Metric Path<br>for Video Generation</h1>
+<p align="center">
+<a href="https://arxiv.org/abs/2510.24717"><img src="https://img.shields.io/badge/ArXiv-2510.24717-%23840707.svg" alt="ArXiv"></a>
+<a href="https://huggingface.co/collections/BAAI/ursa"><img src="https://img.shields.io/badge/🤗 Weights-BAAI/URSA-rgb(166,109,59).svg" alt=""></a>
+<a href="https://huggingface.co/spaces/BAAI/nova-d48w1024-osp480"><img src="https://img.shields.io/badge/🤗 Demo-TI2V-%26840707.svg" alt="TI2VDemo"></a>
+<a href="http://bitterdhg.github.io/URSA_page"><img src="https://img.shields.io/badge/Project-URSA-%237CB4F7.svg" alt="Project"></a>
+</p>
+<p align="center">
+[Haoge Deng](https://scholar.google.com/citations?user=S2sbvjgAAAAJ&hl)<sup>1,4*</sup>, [Ting Pan](https://scholar.google.com/citations?&user=qQv6YbsAAAAJ)<sup>2,4*</sup>, [Fan Zhang](https://scholar.google.com/citations?user=VsJ39HMAAAAJ)<sup>4*</sup>, [Yang Liu](https://scholar.google.com/citations?user=9JcQ2hwAAAAJ&hl)<sup>3,4*</sup>, [Zhuoyan Luo](https://scholar.google.com/citations?user=mKQhEsIAAAAJ&hl)<sup>4</sup>, [Yufeng Cui](https://scholar.google.com/citations?user=5Ydha2EAAAAJ&hl)<sup>4</sup>, [Wenxuan Wang](https://scholar.google.com/citations?user=75OyC-oAAAAJ&hl)<sup>4</sup><br>
+[Chunhua Shen](https://scholar.google.com/citations?user=Ljk2BvIAAAAJ&hl)<sup>3</sup>, [Shiguang Shan](https://scholar.google.com/citations?user=Vkzd7MIAAAAJ&hl)<sup>2</sup>, [Zhaoxiang Zhang](https://scholar.google.com/citations?user=qxWfV6cAAAAJ&hl)<sup>1†</sup>, [Xinlong Wang](https://scholar.google.com/citations?user=DPz0DjYAAAAJ&hl)<sup>4†</sup><br>
+[CASIA](http://english.ia.cas.cn)<sup>1</sup>, [CASICT](http://english.ict.cas.cn)<sup>2</sup>, [ZJU](https://www.zju.edu.cn/english)<sup>3</sup>, [BAAI](https://www.baai.ac.cn/en)<sup>4</sup><br>
+<sup>*</sup> Equal Contribution, <sup>†</sup> Corresponding Author
+<br><br><image src="assets/model_preview.gif"/>
+<br><br><image src="assets/model_overview.png"/>
+</div>
+We present **URSA** (**U**niform disc**R**ete diffu**S**ion with metric p**A**th), a simple yet powerful framework that bridges the gap with continuous approaches. **URSA** formulates the video generation task as an iterative global refinement of discrete spatiotemporal tokens and scales efficiently to long video generation, requiring fewer inference steps. **URSA** enables multi-task video generation with asynchronous timestep scheduling strategy in one unified model.
+## 🚀 News
+- ```[Feb 2026]``` Accepted by ICLR 2026 [[OpenReview]](https://openreview.net/forum?id=GFU5yCbILk).
+- ```[Jan 2026]``` Released [Training Guide](./docs/training.md).
+- ```[Oct 2025]``` 🎉 URSA is part of [Emu3.5](https://github.com/baaivision/Emu3.5) as DiDA (Discrete Diffusion Adaptation)!
+- ```[Oct 2025]``` Released <a href="https://huggingface.co/spaces/BAAI/nova-d48w1024-osp480"><b>TI2V</b></a> 🤗 Demo.
+- ```[Oct 2025]``` Released [Paper](https://arxiv.org/abs/2510.24717) & [Project Page](http://bitterdhg.github.io/URSA_page) & [Evaluation Guide](./docs/evaluation.md).
+## ✨Hightlights
+- 🥇 **Novel Approach**: Uniform Discrete Diffusion with Metric Path.
+- 🥈 **SOTA Performance**: High efficiency with state-of-the-art T2I/T2V/I2V results.
+- 🥉 **Unified Modeling**: Multi-task capabilities in a single unified model.
+## 🗄️ Models
+### 🖼️ Text to Image
+| Model | Resolution | Data | Weight | GenEval | DPGBench |
+|:-----:|:----------:|:----:|:------:|:-------:|:--------:|
+| URSA-0.6B-IBQ1024 | 1024x1024 | 30M | [🤗 HF](https://huggingface.co/BAAI/URSA-0.6B-IBQ1024) \| [🤖 ModelScope](https://www.modelscope.cn/models/BAAI/URSA-0.6B-IBQ1024) | 0.79 | 85.6 |
+| URSA-1.7B-IBQ1024 | 1024x1024 | 30M | [🤗 HF](https://huggingface.co/BAAI/URSA-1.7B-IBQ1024) \| [🤖 ModelScope](https://www.modelscope.cn/models/BAAI/URSA-1.7B-IBQ1024) | 0.80 | 86.0 |
+### 🎬 Text to Video
+| Model | Resolution | Data | Weight | VBench-T2V | VBench-I2V |
+|:-----:|:----------:|:----:|:------:|:----------:|:----------:|
+| URSA-0.6B-FSQ320 | 49x512x320 | 24M | [🤗 HF](https://huggingface.co/BAAI/URSA-0.6B-FSQ320) \| [🤖 ModelScope](https://www.modelscope.cn/models/BAAI/URSA-0.6B-FSQ320) | 81.4 | 86.0 |
+| URSA-1.7B-FSQ320 | 49x512x320 | 24M | [🤗 HF](https://huggingface.co/BAAI/URSA-1.7B-FSQ320) \| [🤖 ModelScope](https://www.modelscope.cn/models/BAAI/URSA-1.7B-FSQ320) | 82.4 | 86.2 |
+## 📖 Table of Contents
+- [🔧 Installation](#installation)
+- [🔥 Quick Start](#quick-start)
+  - [🖼️ Image Generation](#quickstart-image-generation)
+  - [🎬 Video Generation](#quickstart-video-generation)
+- [💻 Gradio Demo](#gradio-demo)
+- [💯 Evaluation](./docs/evaluation.md)
+- [🤖 Training](./docs/training.md)
+## 🔧 Installation
+<a id="installation"></a>
+Clone this repository to local disk and install:
+```bash
+pip install diffusers transformers>=4.57.1 accelerate imageio imageio-ffmpeg omegaconf wandb
+git clone https://github.com/baaivision/URSA.git
+cd URSA && pip install .
+```
+## 🔥 Quick Start
+<a id="quick-start"></a>
+### 🖼️ Image Generation
+<a id="quickstart-image-generation"></a>
+```python
+import torch
+from diffnext.pipelines import URSAPipeline
+model_id, height, width = "BAAI/URSA-1.7B-IBQ1024", 1024, 1024
+model_args = {"torch_dtype": torch.float16, "trust_remote_code": True}
+pipe = URSAPipeline.from_pretrained(model_id, **model_args)
+pipe = pipe.to(torch.device("cuda"))
+prompt = "The bear, calm and still, gazes upward as if lost in contemplation of the cosmos."
+negative_prompt = "worst quality, low quality, inconsistent motion, static, still, blurry, jittery, distorted, ugly"
+image = pipe(**locals()).frames[0]
+image.save("ursa.jpg")
+```
+### 🎬 Video Generation
+<a id="quickstart-video-generation"></a>
+```python
+import os, torch, numpy
+from diffnext.pipelines import URSAPipeline
+from diffnext.utils import export_to_video
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+model_id, height, width = "BAAI/URSA-1.7B-FSQ320", 320, 512
+model_args = {"torch_dtype": torch.float16, "trust_remote_code": True}
+pipe = URSAPipeline.from_pretrained(model_id, **model_args)
+pipe = pipe.to(torch.device("cuda"))
+text_prompt = "a lone grizzly bear walks through a misty forest at dawn, sunlight catching its fur."
+negative_prompt = "worst quality, low quality, inconsistent motion, static, still, blurry, jittery, distorted, ugly"
+# Text-to-Image
+prompt = text_prompt
+num_frames, num_inference_steps = 1, 25
+image = pipe(**locals()).frames[0]
+image.save("ursa.jpg")
+# Image-to-Video
+prompt = f"motion=9.0, {text_prompt}"
+num_frames, num_inference_steps = 49, 50
+video = pipe(**locals()).frames[0]
+export_to_video(video, "ursa_1+48f.mp4", fps=12)
+# Text-to-Video
+image, video = None, None
+prompt = f"motion=9.0, {text_prompt}"
+num_frames, num_inference_steps = 49, 50
+video = pipe(**locals()).frames[0]
+export_to_video(video, "ursa_49f.mp4", fps=12)
+# Video-to-Video
+prompt = f"motion=5.0, {text_prompt}"
+num_frames, num_inference_steps = 49, 50
+num_cond_frames, cond_noise_scale = 13, 0.1
+for i in range(12):
+    video, start_video = video[-num_cond_frames:], video
+    video = pipe(**locals()).frames[0]
+    video = numpy.concatenate([start_video, video[num_cond_frames:]])
+    export_to_video(video, "ursa_{}f.mp4".format(video.shape[0]), fps=12)
+```
+## 💻 Gradio Demo
+<a id="gradio-demo"></a>
+```bash
+# Text-to-Image (T2I)
+python scripts/app_ursa_t2i.py --model "BAAI/URSA-1.7B-IBQ1024" --device 0
+# Text-to-Image-to-Video (TI2V)
+python scripts/app_ursa_ti2v.py --model "BAAI/URSA-1.7B-FSQ320" --device 0
+```
+## 📋 Todo List
+- [X] [Model Zoo](#model-zoo)
+- [X] [Quick Start](#quick-start)
+- [X] [Gradio Demo](#gradio-demo)
+- [X] [Evaluation Guide](./docs/evaluation.md)
+- [X] [Training Guide](./docs/training.md)
+- [ ] 4B Model
+## 📖 Citation
+If you find this repository useful, please consider giving a star ⭐ and citation 🦖:
+```
+@article{deng2025ursa,
+  title={Uniform Discrete Diffusion with Metric Path for Video Generation},
+  author={Deng, Haoge and Pan, Ting and Zhang, Fan and Liu, Yang and Luo, Zhuoyan and Cui, Yufeng and Shen, Chunhua and Shan, Shiguang and Zhang, Zhaoxiang and Wang, Xinlong},
+  journal={arXiv preprint arXiv:2510.24717},
+  year={2025}
+}
+```
+```
+@article{deng2024nova,
+  title={Autoregressive Video Generation without Vector Quantization},
+  author={Deng, Haoge and Pan, Ting and Diao, Haiwen and Luo, Zhengxiong and Cui, Yufeng and Lu, Huchuan and Shan, Shiguang and Qi, Yonggang and Wang, Xinlong},
+  journal={arXiv preprint arXiv:2412.14169},
+  year={2024}
+}
+```
+## 🤗 Acknowledgement
+We thank the repositories:
+- [NOVA](https://github.com/baaivision/NOVA). ✨NOVA is the predecessor of 🐻URSA.
+- [FlowMatching](https://github.com/facebookresearch/flow_matching). This codebase systemically provides CFM and DFM implementations.
+- [FUDOKI](https://github.com/fudoki-hku/FUDOKI). This codebase provides a naive multimodal DFM implementation.
+- [CodeWithGPU](https://github.com/seetacloud/codewithgpu). CodeWithGPU library is the core of our data loading pipeline.
+## License
+Code and models are licensed under [Apache License 2.0](LICENSE).

URSA/accelerate_configs/deepspeed_zero2.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+distributed_type: DEEPSPEED
+deepspeed_config:
+  deepspeed_multinode_launcher: standard
+  gradient_clipping: 0.0
+  zero_stage: 3 #2
+  offload_optimizer_device: cpu   # Moves optimizer states to CPU RAM
+  offload_param_device: cpu       # Moves model parameters to CPU RAM
+  zero3_init_flag: true           # Initializes the model directly across GPUs to save CPU RAM
+  zero3_save_16bit_model: true    # Consolidates weights into a single file when saving checkpoints
+num_machines: 1
+num_processes: 8
+machine_rank: 0

URSA/assets/sample_image.jpg ADDED Viewed

URSA/configs/distill_dimo.yaml ADDED Viewed

	@@ -0,0 +1,158 @@

+# ============================================================================
+# URSA one-step distillation — DiMO-style distributed training config
+# ============================================================================
+# Verified native inference regime (from A/B testing — ground truth):
+#   height=320, width=512, num_frames=49, guidance_scale=7, teacher_steps=50.
+#   no_cfg (guidance_scale=1) does NOT produce valid output.
+#   All defaults below align to this verified regime.
+#
+# Launch (8-GPU, single node):
+#
+#   accelerate launch --config_file accelerate_configs/deepspeed_zero2.yaml \
+#       --machine_rank 0 --num_machines 1 --num_processes 8 \
+#       scripts/train_distill_dimo.py \
+#       config="./configs/distill_dimo.yaml" \
+#       experiment.output_dir="./experiments/distill_dimo" \
+#       distill.teacher_ckpt="/path/to/URSA-1.7B-IBQ1024" \
+#       distill.prompt_source="/data/Koala_36M_*.csv"
+#
+# Smoke test (1 GPU, 50 steps — save student checkpoint):
+#
+#   accelerate launch --num_processes 1 \
+#       scripts/train_distill_dimo.py \
+#       config="./configs/distill_dimo.yaml" \
+#       experiment.output_dir="./experiments/smoke" \
+#       distill.teacher_ckpt="/path/to/URSA-1.7B-IBQ1024" \
+#       distill.prompt_source="prompts.txt" \
+#       training.max_train_steps=50 \
+#       experiment.save_every=50
+#
+# Load student for 1-step inference (must use CFG=7, native geometry):
+#
+#   pipe = URSAPipeline.from_pretrained("/path/to/URSA-1.7B-IBQ1024")
+#   state = torch.load("experiments/distill_dimo/checkpoints/final/student.pt")
+#   pipe.transformer.load_state_dict(state, strict=True)
+#   frames = pipe(prompt="...", num_inference_steps=1,
+#                 height=320, width=512, num_frames=49,
+#                 guidance_scale=7).frames
+# ============================================================================
+# ── Experiment bookkeeping ───────────────────────────────────────────────────
+experiment:
+  name: distill_dimo
+  output_dir: ./experiments/distill_dimo
+  log_every: 10
+  save_every: 100
+  resume_iter: 0          # set to step number to resume
+# ── Training (framework-level) ───────────────────────────────────────────────
+training:
+  seed: 42
+  mixed_precision: bf16    # bf16 | fp16 | fp32
+  max_train_steps: 10000
+  gradient_accumulation_steps: 1   # Two-backward; keep =1 for distillation
+# ── Distillation hyperparameters ─────────────────────────────────────────────
+distill:
+  # ---- Paths ----------------------------------------------------------------
+  teacher_ckpt: /gfs/space/private/fengzl/World_Model/URSA-1.7B
+  prompt_source: /gfs/space/private/fengzl/World_Model/Koala-36M-v1    # glob, dir, .txt, or comma-list
+  # ---- Video geometry (verified native: 320×512×49) -------------------------
+  num_frames: 49
+  height: 320
+  width: 512
+  max_prompt_length: 320
+  # ---- Data -----------------------------------------------------------------
+  batch_size_per_gpu: 1   # effective global batch = batch_size_per_gpu × 8 GPUs
+  # # ---- Loss weights ---------------------------------------------------------
+  # lambda_kd: 0.5          # KL(z_T || z_S) weight
+  # lambda_pg: 1.0          # REINFORCE policy gradient weight
+  # lambda_ent: 0.01        # entropy bonus (λ_ent_eff × H) — set 0 for DiMO orig
+  # tau: 1.0                # student sampling temperature
+  # tau_kd: 1.0             # KD / Jeffrey softmax temperature
+  # # ---- Teacher CFG (aligned to verified working regime: CFG=7) ---------------
+  # # A/B testing confirmed: guidance_scale=1 (no_cfg) does NOT produce valid
+  # # output for this URSA checkpoint.  The teacher KD target must use CFG=7.
+  # enable_teacher_cfg: true
+  # teacher_cfg_scale: 7.0         # s in z_guided = z_uncond + s*(z_cond-z_uncond)
+  #                                 # Verified: CFG=7 is the official working value.
+  # teacher_cfg_prob: 1.0          # max fraction of samples using guided target
+  # teacher_cfg_warmup_steps: 2000 # linear warmup 0→teacher_cfg_prob
+  # teacher_cfg_trunc: 0.9         # when t≥trunc, scale falls to 1 (no guide)
+  # lambda_kd_uncond: 0.3          # weight for uncond-branch KD loss
+  # reward_use_guided: false       # [RISKY] use guided logits for reward signal
+  # # ---- DiMO extensions -------------------------------------------------------
+  # fake_rounds: 1          # aux updates per student update (DiMO=2; try 2)
+  # use_surrogate_grad: false
+  # lambda_surr: 1.0
+  # ---- Loss weights ---------------------------------------------------------
+  lambda_kd: 1.0          # KL(z_T || z_S) weight (基础知识蒸馏权重，保持不变)
+  lambda_pg: 1.0          # [重用] 现在代表 lambda_bridge，控制 MSE 伪梯度注入的强度
+  lambda_ent: 0.0         # [已废弃] 强化学习的熵奖励已彻底删除，设为 0.0
+  tau: 1.0                # student sampling temperature
+  tau_kd: 1.0             # KD softmax temperature
+  # ---- Teacher CFG (aligned to verified working regime: CFG=7) ---------------
+  enable_teacher_cfg: true
+  teacher_cfg_scale: 7.0
+  teacher_cfg_prob: 1.0
+  teacher_cfg_warmup_steps: 1000
+  teacher_cfg_trunc: 0.9
+  lambda_kd_uncond: 0.3
+  # reward_use_guided: false       <-- [请直接删除这行] 因为 Reward 计算已被移除
+  # ---- DiMO extensions -------------------------------------------------------
+  fake_rounds: 2 #1          # Aux 拟合假 token 的迭代次数。如果发现 Aux 算出的 bridge_loss 降不下去，可以尝试改为 2
+  use_surrogate_grad: false
+  lambda_surr: 1.0
+  # ---- Stability -------------------------------------------------------------
+  t_curriculum_steps: 10000   # curriculum steps before uniform-t sampling
+  p_init_mix_ratio: 0.2       # fraction of batch from corrupted x_hat_prev
+  p_mix_corrupt_frac: 0.2     # token corruption rate in p_init mixing
+  collapse_warn_frac: 0.2     # warn if tok_entropy < frac × initial entropy
+  # ---- Aux initialisation ---------------------------------------------------
+  aux_noise_std: 1.0e-5  # tiny noise added to aux weights at init to break
+                          # symmetry; set 0.0 to keep aux == student exactly
+  # ---- Gradient clipping ----------------------------------------------------
+  grad_clip: 1.0
+# ── Student optimizer ────────────────────────────────────────────────────────
+optimizer_student:
+  target: torch.optim.AdamW
+  params:
+    lr: 1.0e-5
+    betas: [0.9, 0.95]
+    weight_decay: 0.01
+# ── Aux optimizer ────────────────────────────────────────────────────────────
+optimizer_aux:
+  target: torch.optim.AdamW
+  params:
+    lr: 1.0e-5
+    betas: [0.9, 0.95]
+    weight_decay: 0.01
+# ── LR scheduler (cosine, shared warmup/decay params for both opts) ──────────
+lr_scheduler:
+  target: diffnext.engine.lr_scheduler.CosineLR
+  params:
+    lr_max: ${optimizer_student.params.lr}
+    lr_min: 1.0e-6
+    max_steps: ${training.max_train_steps}
+    warmup_steps: 500
+# ── Prompt DataLoader ─────────────────────────────────────────────────────────
+prompt_dataloader:
+  shuffle_files: true
+  shuffle_buffer: 50000      # in-memory shuffle buffer per shard; reduce if OOM
+  num_workers: 4             # CPU workers (no CUDA in workers)
+  caption_field: caption     # CSV column name (Koala default)

URSA/configs/onestep_dimo.yaml ADDED Viewed

	@@ -0,0 +1,111 @@

+# ============================================================================
+# URSA one-step distillation — DiMO-style training configuration
+# ============================================================================
+# Reference: train_onestep_ursa_dimo.py
+#
+# DiMO hyperparameter comparison (Meissonic vs. our URSA defaults)
+# ---------------------------------------------------------------
+#  Param                 DiMO (Meissonic)    URSA (this config)    Risk / Note
+#  ─────────────────────────────────────────────────────────────────────────
+#  guidance_scale (CFG)  3.0 (true_cfg)      3.0 (teacher_cfg)     ✅ aligned
+#  fake_rounds           2                   1                     ⚠ try 2 for aux stability
+#  fixed_ratio           0.5 (mask ratio)    —                     N/A (different domain)
+#  distil_loss_type      surrogate MSE       optional surrogate     ✅ toggle via use_surrogate_grad
+#  noise_emb_perturb     True                —                     ℹ️ not needed for VQ-based model
+#  cfg_prob              1.0                 teacher_cfg_prob=1.0  ✅ aligned
+#  lambda_ent            0.0 (no ent reg)    0.01                  ℹ️ our addition for stability
+# ============================================================================
+# ── Paths ────────────────────────────────────────────────────────────────────
+teacher_ckpt: "/path/to/URSA"
+prompt_file: "prompts.txt"
+out_dir: "./outputs/dimo"
+# ── Video geometry ───────────────────────────────────────────────────────────
+num_frames: 17
+height: 256
+width: 256
+max_prompt_length: 320
+# ── Training ─────────────────────────────────────────────────────────────────
+batch_size: 2           # reduce to 1 if enable_teacher_cfg uses too much VRAM
+num_steps: 10000
+lr_student: 1.0e-5
+lr_aux:     1.0e-5
+weight_decay: 0.01
+grad_clip: 1.0
+mixed_precision: "bf16"
+seed: 42
+log_every: 50
+save_every: 1000
+# ── Loss weights ─────────────────────────────────────────────────────────────
+lambda_pg: 1.0
+lambda_kd: 0.5
+lambda_ent: 0.01          # entropy regularisation (0 → DiMO original; 0.01 → our default)
+tau: 1.0                  # student sampling temperature
+tau_kd: 1.0               # KD softmax temperature
+# ── Teacher CFG (DiMO true_cfg style) ────────────────────────────────────────
+# Set enable_teacher_cfg: false to revert to the prior single-branch behavior.
+# All other params in this block are ignored when enable_teacher_cfg=false.
+enable_teacher_cfg: true
+teacher_cfg_scale: 3.0    # s in z_guided = z_uncond + s*(z_cond - z_uncond)
+                           # Matches DiMO true_cfg=3.0
+teacher_cfg_prob: 1.0      # Probability of using guided target per batch (after warmup).
+                           # 1.0 = always guided (DiMO default).
+teacher_cfg_warmup_steps: 2000
+                           # Ramp teacher_cfg_prob from 0 → teacher_cfg_prob over this many
+                           # steps. Prevents instability at the start of training.
+teacher_cfg_trunc: 0.9    # When t >= trunc, CFG scale falls to 1 (no guidance at high noise).
+                           # Mirrors DiMO's guidance_trunc parameter.
+lambda_kd_uncond: 0.3     # Weight for uncond-branch KD loss.
+                           # Keeps the student uncond-capable for eval-time CFG.
+reward_use_guided: false  # [RISKY] Use guided teacher logits for REINFORCE reward.
+                           # Default false: use non-guided cond (more stable).
+# ── Eval / inference CFG ─────────────────────────────────────────────────────
+eval_cfg_scale: 3.0        # guidance_scale used during evaluation
+use_cfg_eval: false        # Run eval with inference-time CFG (2× forward)
+# ── DiMO extensions ──────────────────────────────────────────────────────────
+use_surrogate_grad: false  # DiMO surrogate MSE trick (zero-variance alternative to REINFORCE)
+lambda_surr: 1.0
+fake_rounds: 1             # Aux updates per generator update (DiMO uses 2; try 2 for aux stability)
+# ── Stability ─────────────────────────────────────────────────────────────────
+t_curriculum_steps: 10000  # Steps to use t-curriculum (biases t toward larger values)
+p_mix_corrupt_frac: 0.2   # Fraction of tokens to corrupt in p_init mixing
+p_init_mix_ratio: 0.2     # Fraction of batch drawn from corrupted x_hat_prev
+collapse_warn_frac: 0.2   # Warn if tok_hist_entropy drops below this fraction of initial
+# ── Debug ────────────────────────────────────────────────────────────────────
+dry_run: false             # Run 1 step, print diagnostics, exit
+debug_dump: 0              # Dump token histogram + x_hat every N steps (0=off)
+# ── Recommended quick-start commands ─────────────────────────────────────────
+# # Smoke test (CFG enabled):
+# python scripts/train_onestep_ursa_dimo.py \
+#     --teacher_ckpt /path/to/URSA --prompt_file prompts.txt \
+#     --enable_teacher_cfg --teacher_cfg_scale 3.0 \
+#     --num_frames 17 --height 256 --width 256 --dry_run
+#
+# # Full training (DiMO-aligned):
+# python scripts/train_onestep_ursa_dimo.py \
+#     --teacher_ckpt /path/to/URSA --prompt_file prompts.txt \
+#     --enable_teacher_cfg --teacher_cfg_scale 3.0 \
+#     --batch_size 2 --num_steps 10000 --fake_rounds 2 \
+#     --out_dir ./outputs/dimo_cfg
+#
+# # Eval (compare 3 student modes vs teacher):
+# python scripts/eval_onestep_ursa.py \
+#     --teacher_ckpt /path/to/URSA \
+#     --student_ckpt ./outputs/dimo_cfg/final/student.pt \
+#     --modes no_cfg cfg baked --eval_cfg_scale 3.0 \
+#     --out_dir ./outputs/eval

URSA/configs/ursa_0.6b_fsq320.yaml ADDED Viewed

	@@ -0,0 +1,62 @@

+wandb:
+  run_id: null
+experiment:
+  project: ursa_0.6b_fsq320
+  log_every: 20
+  save_every: 5000
+  resume_from_checkpoint: latest
+model:
+  name: "transformer"
+  gradient_checkpointing: 2  # 1: +mlp_ckpt 2: +qkv_ckpt 3: +layer_ckpt
+  async_timestep: true
+  tokenizer:
+    params:
+      max_length: 320
+      truncation: true
+      padding_side: left
+      padding: max_length
+pipeline:
+  target: diffnext.pipelines.ursa.pipeline_train.URSATrainPipeline
+  paths:
+      pretrained_path: /path/to/URSA-0.6B-FSQ320
+      module_dict:
+        vae: ${pipeline.paths.pretrained_path}/vae
+        scheduler: ${pipeline.paths.pretrained_path}/scheduler
+        tokenizer: ${pipeline.paths.pretrained_path}/tokenizer
+        model_index: ${pipeline.paths.pretrained_path}/model_index.json
+optimizer:
+  target: torch.optim.AdamW
+  param_groups: false
+  params:
+    lr: 0.00003
+    betas: [0.9, 0.95]
+    weight_decay: 0.05
+    fused: true
+lr_scheduler:
+  target: diffnext.engine.lr_scheduler.CosineLR
+  params:
+    lr_max: ${optimizer.params.lr}
+    lr_min: 0.00001
+    max_steps: ${training.max_train_steps}
+    warmup_steps: 500
+train_dataloader:
+  target: diffnext.data.flex_loaders.FeatureDataLoader
+  params:
+    dataset: /path/to/fsq320_dataset
+    batch_size: ${training.batch_size}
+    seed: ${training.seed}
+    num_workers: 4
+    shuffle: true
+training:
+  gradient_accumulation_steps: 1
+  batch_size: 1  # * 256 = 256
+  max_train_steps: 20000
+  seed: 1337
+  mixed_precision: bf16

URSA/configs/ursa_0.6b_ibq1024.yaml ADDED Viewed

	@@ -0,0 +1,62 @@

+wandb:
+  run_id: null
+experiment:
+  project: ursa_0.6b_ibq1024
+  log_every: 20
+  save_every: 5000
+  resume_from_checkpoint: latest
+model:
+  name: "transformer"
+  gradient_checkpointing: 2  # 1: +mlp_ckpt 2: +qkv_ckpt 3: +layer_ckpt
+  async_timestep: false
+  tokenizer:
+    params:
+      max_length: 320
+      truncation: true
+      padding_side: left
+      padding: max_length
+pipeline:
+  target: diffnext.pipelines.ursa.pipeline_train.URSATrainPipeline
+  paths:
+      pretrained_path: /path/to/URSA-0.6B-IBQ1024
+      module_dict:
+        vae: ${pipeline.paths.pretrained_path}/vae
+        scheduler: ${pipeline.paths.pretrained_path}/scheduler
+        tokenizer: ${pipeline.paths.pretrained_path}/tokenizer
+        model_index: ${pipeline.paths.pretrained_path}/model_index.json
+optimizer:
+  target: torch.optim.AdamW
+  param_groups: false
+  params:
+    lr: 0.00003
+    betas: [0.9, 0.95]
+    weight_decay: 0.05
+    fused: true
+lr_scheduler:
+  target: diffnext.engine.lr_scheduler.CosineLR
+  params:
+    lr_max: ${optimizer.params.lr}
+    lr_min: 0.00001
+    max_steps: ${training.max_train_steps}
+    warmup_steps: 500
+train_dataloader:
+  target: diffnext.data.flex_loaders.FeatureDataLoader
+  params:
+    dataset: /path/to/ibq1024_dataset
+    batch_size: ${training.batch_size}
+    seed: ${training.seed}
+    num_workers: 4
+    shuffle: true
+training:
+  gradient_accumulation_steps: 1
+  batch_size: 1  # * 512 = 512
+  max_train_steps: 120000
+  seed: 1337
+  mixed_precision: bf16

URSA/configs/ursa_1.7b_fsq320.yaml ADDED Viewed

	@@ -0,0 +1,62 @@

+wandb:
+  run_id: null
+experiment:
+  project: ursa_1.7b_fsq320
+  log_every: 20
+  save_every: 5000
+  resume_from_checkpoint: latest
+model:
+  name: "transformer"
+  gradient_checkpointing: 2  # 1: +mlp_ckpt 2: +qkv_ckpt 3: +layer_ckpt
+  async_timestep: true
+  tokenizer:
+    params:
+      max_length: 320
+      truncation: true
+      padding_side: left
+      padding: max_length
+pipeline:
+  target: diffnext.pipelines.ursa.pipeline_train.URSATrainPipeline
+  paths:
+      pretrained_path: /path/to/URSA-1.7B-FSQ320
+      module_dict:
+        vae: ${pipeline.paths.pretrained_path}/vae
+        scheduler: ${pipeline.paths.pretrained_path}/scheduler
+        tokenizer: ${pipeline.paths.pretrained_path}/tokenizer
+        model_index: ${pipeline.paths.pretrained_path}/model_index.json
+optimizer:
+  target: torch.optim.AdamW
+  param_groups: false
+  params:
+    lr: 0.00003
+    betas: [0.9, 0.95]
+    weight_decay: 0.05
+    fused: true
+lr_scheduler:
+  target: diffnext.engine.lr_scheduler.CosineLR
+  params:
+    lr_max: ${optimizer.params.lr}
+    lr_min: 0.00001
+    max_steps: ${training.max_train_steps}
+    warmup_steps: 500
+train_dataloader:
+  target: diffnext.data.flex_loaders.FeatureDataLoader
+  params:
+    dataset: /path/to/fsq320_dataset
+    batch_size: ${training.batch_size}
+    seed: ${training.seed}
+    num_workers: 4
+    shuffle: true
+training:
+  gradient_accumulation_steps: 1
+  batch_size: 1  # * 256 = 256
+  max_train_steps: 20000
+  seed: 1337
+  mixed_precision: bf16

URSA/configs/ursa_1.7b_ibq1024.yaml ADDED Viewed

	@@ -0,0 +1,62 @@

+wandb:
+  run_id: null
+experiment:
+  project: ursa_1.7b_ibq1024
+  log_every: 20
+  save_every: 5000
+  resume_from_checkpoint: latest
+model:
+  name: "transformer"
+  gradient_checkpointing: 2  # 1: +mlp_ckpt 2: +qkv_ckpt 3: +layer_ckpt
+  async_timestep: false
+  tokenizer:
+    params:
+      max_length: 320
+      truncation: true
+      padding_side: left
+      padding: max_length
+pipeline:
+  target: diffnext.pipelines.ursa.pipeline_train.URSATrainPipeline
+  paths:
+      pretrained_path: /path/to/URSA-1.7B-IBQ1024
+      module_dict:
+        vae: ${pipeline.paths.pretrained_path}/vae
+        scheduler: ${pipeline.paths.pretrained_path}/scheduler
+        tokenizer: ${pipeline.paths.pretrained_path}/tokenizer
+        model_index: ${pipeline.paths.pretrained_path}/model_index.json
+optimizer:
+  target: torch.optim.AdamW
+  param_groups: false
+  params:
+    lr: 0.00003
+    betas: [0.9, 0.95]
+    weight_decay: 0.05
+    fused: true
+lr_scheduler:
+  target: diffnext.engine.lr_scheduler.CosineLR
+  params:
+    lr_max: ${optimizer.params.lr}
+    lr_min: 0.00001
+    max_steps: ${training.max_train_steps}
+    warmup_steps: 500
+train_dataloader:
+  target: diffnext.data.flex_loaders.FeatureDataLoader
+  params:
+    dataset: /path/to/ibq1024_dataset
+    batch_size: ${training.batch_size}
+    seed: ${training.seed}
+    num_workers: 4
+    shuffle: true
+training:
+  gradient_accumulation_steps: 1
+  batch_size: 1  # * 512 = 512
+  max_train_steps: 120000
+  seed: 1337
+  mixed_precision: bf16

URSA/diffnext/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""DiffNext: A diffusers based library for autoregressive diffusion models."""

URSA/diffnext/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (250 Bytes). View file

URSA/diffnext/__pycache__/image_processor.cpython-312.pyc ADDED Viewed

Binary file (5.04 kB). View file

URSA/diffnext/data/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Data components."""

URSA/diffnext/data/flex_loaders.py ADDED Viewed

	@@ -0,0 +1,172 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Flex data loaders."""
+import collections
+import multiprocessing as mp
+import time
+import threading
+import queue
+import codewithgpu
+import numpy as np
+from diffnext.data.flex_pipelines import FeatureWorker
+class BalancedQueues(object):
+    """Balanced queues."""
+    def __init__(self, base_queue, num=1):
+        self.queues = [base_queue]
+        self.queues += [mp.Queue(base_queue._maxsize) for _ in range(num - 1)]
+        self.index = 0
+    def put(self, obj, block=True, timeout=None):
+        q = self.queues[self.index]
+        q.put(obj, block=block, timeout=timeout)
+        self.index = (self.index + 1) % len(self.queues)
+    def get(self, block=True, timeout=None):
+        q = self.queues[self.index]
+        obj = q.get(block=block, timeout=timeout)
+        self.index = (self.index + 1) % len(self.queues)
+        return obj
+    def get_n(self, num=1):
+        outputs = []
+        while len(outputs) < num:
+            obj = self.get()
+            if obj is not None:
+                outputs.append(obj)
+        return outputs
+class DataLoaderBase(threading.Thread):
+    """Base class of data loader."""
+    def __init__(self, worker, **kwargs):
+        super().__init__(daemon=True)
+        self.seed = kwargs.pop("seed", 1337)
+        self.shuffle = kwargs.pop("shuffle", True)
+        self.shard_id = kwargs.get("shard_id", 0)
+        self.num_shards = kwargs.get("num_shards", 1)
+        self.batch_size = kwargs.get("batch_size", 1)
+        self.num_workers = kwargs.get("num_workers", 1)
+        self.queue_depth = kwargs.get("queue_depth", 2)
+        # Build queues.
+        self.reader_queue = mp.Queue(self.queue_depth * self.batch_size)
+        self.worker_queue = mp.Queue(self.queue_depth * self.batch_size)
+        self.batch_queue = queue.Queue(self.queue_depth)
+        self.reader_queue = BalancedQueues(self.reader_queue, self.num_workers)
+        self.worker_queue = BalancedQueues(self.worker_queue, self.num_workers)
+        # Build readers.
+        self.readers = [
+            codewithgpu.DatasetReader(
+                output_queue=self.reader_queue,
+                partition_id=self.shard_id,
+                num_partitions=self.num_shards,
+                seed=self.seed + self.shard_id,
+                shuffle=self.shuffle,
+                **kwargs,
+            )
+        ]
+        self.readers[0].start()
+        time.sleep(0.1)
+        # Build workers.
+        self.workers = []
+        for i in range(self.num_workers):
+            p = worker()
+            p.seed = self.seed + i + self.shard_id * self.num_workers
+            p.reader_queue = self.reader_queue.queues[i]
+            p.worker_queue = self.worker_queue.queues[i]
+            p.start()
+            self.workers.append(p)
+            time.sleep(0.1)
+        # Register cleanup callbacks.
+        def cleanup():
+            def terminate(processes):
+                for p in processes:
+                    p.terminate()
+                    p.join()
+            terminate(self.workers)
+            terminate(self.readers)
+        import atexit
+        atexit.register(cleanup)
+        # Start batch prefetching.
+        self.start()
+    def next(self):
+        """Return the next batch of data."""
+        return self.__next__()
+    def run(self):
+        """Main loop."""
+    def __call__(self):
+        return self.next()
+    def __iter__(self):
+        """Return the iterator self."""
+        return self
+    def __next__(self):
+        """Return the next batch of data."""
+        return [self.batch_queue.get()]
+class DataLoader(DataLoaderBase):
+    """Loader to return the batch of data."""
+    def __init__(self, dataset, worker, **kwargs):
+        kwargs.update({"path": dataset})  # Alias for codewithgpu.
+        self.contiguous = kwargs.pop("contiguous", True)
+        self.prefetch_count = kwargs.pop("prefetch_count", 50)
+        super().__init__(worker, **kwargs)
+    def run(self):
+        """Main loop."""
+        prev_inputs = self.worker_queue.get_n(self.prefetch_count * self.batch_size)
+        next_inputs = []
+        while True:
+            # Use cached buffer for next N inputs.
+            if len(next_inputs) == 0:
+                next_inputs = prev_inputs
+                prev_inputs = []
+            # Collect the next batch.
+            outputs = collections.defaultdict(list)
+            for _ in range(self.batch_size):
+                inputs = next_inputs.pop(0)
+                for k, v in inputs.items():
+                    outputs[k].extend(v)
+                prev_inputs += self.worker_queue.get_n(1)
+            # Stack batch data.
+            if self.contiguous:
+                if "latents" in outputs:
+                    outputs["latents"] = np.stack(outputs["latents"])
+            # Send batch data to consumer.
+            self.batch_queue.put(outputs)
+class FeatureDataLoader(DataLoader):
+    """Loader to return the batch of data features."""
+    def __init__(self, dataset, **kwargs):
+        super().__init__(dataset, FeatureWorker, **kwargs)

URSA/diffnext/data/flex_pipelines.py ADDED Viewed

	@@ -0,0 +1,63 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Flex data pipelines."""
+import multiprocessing
+import cv2
+import numpy.random as npr
+from diffnext.data import flex_transforms
+class Worker(multiprocessing.Process):
+    """Base data worker."""
+    def __init__(self):
+        super().__init__(daemon=True)
+        self.seed = 1337
+        self.reader_queue = None
+        self.worker_queue = None
+    def run(self):
+        """Run implementation."""
+        # Disable opencv threading and fix numpy random seed.
+        cv2.setNumThreads(1), npr.seed(self.seed)
+        while True:  # Main loop.
+            self.worker_queue.put(self.get_outputs(self.reader_queue.get()))
+class FeaturePipe(object):
+    """Pipeline to transform data features."""
+    def __init__(self):
+        super().__init__()
+        self.parse_latents = flex_transforms.ParseLatents()
+        self.parse_annotations = flex_transforms.ParseAnnotations()
+    def get_outputs(self, inputs):
+        """Return the outputs."""
+        latents = self.parse_latents(inputs)
+        label, caption = self.parse_annotations(inputs)
+        outputs = {"latents": [latents]}
+        outputs.setdefault("prompt", [label]) if label is not None else None
+        outputs.setdefault("prompt", [caption]) if caption is not None else None
+        outputs.setdefault("motion", [inputs["flow"]]) if "flow" in inputs else None
+        return outputs
+class FeatureWorker(FeaturePipe, Worker):
+    """Worker to transform data features."""

URSA/diffnext/data/flex_transforms.py ADDED Viewed

	@@ -0,0 +1,66 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Flex data transforms."""
+import re
+import numpy as np
+import numpy.random as npr
+class Transform(object):
+    """Base transform type."""
+    def filter_outputs(self, *outputs):
+        outputs = [x for x in outputs if x is not None]
+        return outputs if len(outputs) > 1 else outputs[0]
+class ParseLatents(Transform):
+    """Parse VQ or VAE latents."""
+    def __init__(self):
+        super().__init__()
+    def __call__(self, inputs):
+        for k, dtype in zip(("moments", "codes"), ("float16", "int32")):
+            if k in inputs:
+                return np.frombuffer(inputs[k], dtype).reshape(inputs["shape"])
+        raise ValueError("Missing latents in inputs.")
+class ParseAnnotations(Transform):
+    """Parse ground-truth annotations."""
+    def __init__(self, short_prob=0.5):
+        super().__init__()
+        self.short_prob = short_prob
+    def __call__(self, inputs):
+        text = inputs.get("text", None)
+        label = inputs.get("label", None)
+        caption = inputs.get("caption", None)
+        if caption and isinstance(caption, dict):  # Cached.
+            caption = np.frombuffer(caption["data"], "float16").reshape(caption["shape"])
+            if text and isinstance(text, dict) and len(text["data"]) > 0 and npr.rand() < 0.5:
+                caption = np.frombuffer(text["data"], "float16").reshape(text["shape"])
+            return label, caption
+        # Improved short caption.
+        if label is None:
+            text_match = re.match(r"^(.*?[.!?])\s+", caption)
+            text = text if text else (text_match.group(1) if text_match else caption)
+            caption = text if text and npr.rand() < self.short_prob else caption
+        return label, caption

URSA/diffnext/engine/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Engine components."""

URSA/diffnext/engine/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (202 Bytes). View file

URSA/diffnext/engine/__pycache__/engine_utils.cpython-312.pyc ADDED Viewed

Binary file (5.8 kB). View file

URSA/diffnext/engine/__pycache__/lr_scheduler.cpython-312.pyc ADDED Viewed

Binary file (4.39 kB). View file

URSA/diffnext/engine/engine_utils.py ADDED Viewed

	@@ -0,0 +1,109 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Engine utilities."""
+import collections
+import pickle
+import numpy as np
+import torch
+from torch import nn
+def count_params(module, trainable=True, unit="M"):
+    """Return the number of parameters."""
+    counts = [v.size().numel() for v in module.parameters() if v.requires_grad or (not trainable)]
+    return sum(counts) / {"M": 1e6, "B": 1e9}[unit]
+def freeze_module(module, trainable=False):
+    """Freeze parameters of given module."""
+    module.eval() if not trainable else module.train()
+    for param in module.parameters():
+        param.requires_grad = trainable
+    return module
+def get_device(index):
+    """Create the available device object."""
+    if torch.cuda.is_available():
+        return torch.device("cuda", index)
+    for device_type in ("mps",):
+        try:
+            if getattr(torch.backends, device_type).is_available():
+                return torch.device(device_type, index)
+        except AttributeError:
+            pass
+    return torch.device("cpu")
+def get_param_groups(model):
+    """Separate parameters into groups."""
+    memo, groups, lr_scale_getter = set(), collections.OrderedDict(), None
+    norm_types = (nn.BatchNorm2d, nn.GroupNorm, nn.SyncBatchNorm, nn.LayerNorm)
+    for module_name, module in model.named_modules():
+        for param_name, param in module.named_parameters(recurse=False):
+            if not param.requires_grad or param in memo:
+                continue
+            memo.add(param)
+            attrs = collections.OrderedDict()
+            if lr_scale_getter:
+                attrs["lr_scale"] = lr_scale_getter(f"{module_name}.{param_name}")
+            if hasattr(param, "lr_scale"):
+                attrs["lr_scale"] = param.lr_scale
+            if getattr(param, "no_weight_decay", False) or isinstance(module, norm_types):
+                attrs["weight_decay"] = 0
+            group_name = "/".join(["%s:%s" % (v[0], v[1]) for v in list(attrs.items())])
+            groups[group_name] = groups.get(group_name, {**attrs, **{"params": []}})
+            groups[group_name]["params"].append(param)
+    return list(groups.values())
+def load_weights(module, weights_file, prefix_removed="", strict=True):
+    """Load a weights file."""
+    if not weights_file:
+        return
+    if weights_file.endswith(".pkl"):
+        with open(weights_file, "rb") as f:
+            state_dict = pickle.load(f)
+            for k, v in state_dict.items():
+                state_dict[k] = torch.as_tensor(v)
+    else:
+        state_dict = torch.load(weights_file, map_location="cpu", weights_only=False)
+    if prefix_removed:
+        new_state_dict = type(state_dict)()
+        for k in list(state_dict.keys()):
+            if k.startswith(prefix_removed):
+                new_state_dict[k.replace(prefix_removed, "")] = state_dict.pop(k)
+        state_dict = new_state_dict
+    module.load_state_dict(state_dict, strict=strict)
+def manual_seed(seed, device_and_seed=None):
+    """Set the cpu and device random seed."""
+    torch.manual_seed(seed)
+    if device_and_seed is not None:
+        device_index, device_seed = device_and_seed
+        device_type = get_device(device_index).type
+        np.random.seed(device_seed)
+        if device_type in ("cuda", "mps"):
+            getattr(torch, device_type).manual_seed(device_seed)
+def synchronize_device(device):
+    """Synchronize the computation of device."""
+    if device.type in ("cuda", "mps"):
+        getattr(torch, device.type).synchronize(device)