diff --git a/.gitattributes b/.gitattributes
index 58f975cd045065740dc3dfd2183edbae71ebc108..5bab6f29aa0b8cc737dfcfdd37c232ee8a4e9bee 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -132,3 +132,22 @@ URSA/outputs/eval_distill_v3_100steps_49frames/03_s2_a_hummingbird_hovers_in_fro
 URSA/outputs/eval_distill_v3_100steps_49frames/03_s2_a_hummingbird_hovers_in_front_of_a_red_f_student_1step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
 URSA/outputs/eval_distill_v3_100steps_49frames/00_s0_a_lone_grizzly_bear_walks_through_a_mist_teacher_50step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
 URSA/outputs/eval_distill_v3_100steps_49frames/00_s2_a_lone_grizzly_bear_walks_through_a_mist_teacher_50step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
+URSA/outputs/eval_distill_v3_100steps_49frames/01_s3_beautiful_fireworks_in_the_sky_with_red__teacher_50step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
+URSA/outputs/eval_distill_v3_100steps_49frames/03_s3_a_hummingbird_hovers_in_front_of_a_red_f_student_1step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
+URSA/outputs/eval_distill_v3_100steps_49frames/01_s2_beautiful_fireworks_in_the_sky_with_red__teacher_50step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
+URSA/outputs/eval_distill_49frames/00_s1_a_lone_grizzly_bear_walks_through_a_mist_teacher_50step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
+URSA/outputs/eval_distill_49frames/00_s2_a_lone_grizzly_bear_walks_through_a_mist_student_1step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
+URSA/outputs/eval_distill_49frames/00_s0_a_lone_grizzly_bear_walks_through_a_mist_student_1step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
+URSA/outputs/eval_distill_49frames/00_s1_a_lone_grizzly_bear_walks_through_a_mist_student_1step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
+URSA/outputs/eval_distill_49frames/00_s0_a_lone_grizzly_bear_walks_through_a_mist_teacher_50step_cfg.mp4 filter=lfs diff=lfs merge=lfs -text
+URSA/outputs/eval_distill_v3_200steps_49frames/00_s0_a_lone_grizzly_bear_walks_through_a_mist_student_1step_baked.mp4 filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_7.csv filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_10.csv filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_8.csv filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_5.csv filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_3.csv filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_4.csv filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_1.csv filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_2.csv filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_6.csv filter=lfs diff=lfs merge=lfs -text
+Koala-36M-v1/Koala_36M_9.csv filter=lfs diff=lfs merge=lfs -text
diff --git a/Koala-36M-v1/.gitattributes b/Koala-36M-v1/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..b70c7f433004a26e68d52a692372aaaac50ce236
--- /dev/null
+++ b/Koala-36M-v1/.gitattributes
@@ -0,0 +1,68 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.lz4 filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+# Audio files - uncompressed
+*.pcm filter=lfs diff=lfs merge=lfs -text
+*.sam filter=lfs diff=lfs merge=lfs -text
+*.raw filter=lfs diff=lfs merge=lfs -text
+# Audio files - compressed
+*.aac filter=lfs diff=lfs merge=lfs -text
+*.flac filter=lfs diff=lfs merge=lfs -text
+*.mp3 filter=lfs diff=lfs merge=lfs -text
+*.ogg filter=lfs diff=lfs merge=lfs -text
+*.wav filter=lfs diff=lfs merge=lfs -text
+# Image files - uncompressed
+*.bmp filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.tiff filter=lfs diff=lfs merge=lfs -text
+# Image files - compressed
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.webp filter=lfs diff=lfs merge=lfs -text
+# Video files - compressed
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+*.webm filter=lfs diff=lfs merge=lfs -text
+Koala_36M_1.csv filter=lfs diff=lfs merge=lfs -text
+Koala_36M_2.csv filter=lfs diff=lfs merge=lfs -text
+Koala_36M_3.csv filter=lfs diff=lfs merge=lfs -text
+Koala_36M_4.csv filter=lfs diff=lfs merge=lfs -text
+Koala_36M_5.csv filter=lfs diff=lfs merge=lfs -text
+Koala_36M_6.csv filter=lfs diff=lfs merge=lfs -text
+Koala_36M_7.csv filter=lfs diff=lfs merge=lfs -text
+Koala_36M_8.csv filter=lfs diff=lfs merge=lfs -text
+Koala_36M_9.csv filter=lfs diff=lfs merge=lfs -text
+Koala_36M_10.csv filter=lfs diff=lfs merge=lfs -text
diff --git a/Koala-36M-v1/Koala_36M_1.csv b/Koala-36M-v1/Koala_36M_1.csv
new file mode 100644
index 0000000000000000000000000000000000000000..61657f193990f41c7b511e026dc5b02ee300173f
--- /dev/null
+++ b/Koala-36M-v1/Koala_36M_1.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5721d746552bcf48ca2c85d383eb3aee8a9d724cb8b498448e283e6c155b65f3
+size 4889903599
diff --git a/Koala-36M-v1/Koala_36M_10.csv b/Koala-36M-v1/Koala_36M_10.csv
new file mode 100644
index 0000000000000000000000000000000000000000..4711d4c487a88348b775fa5cfe13b269eeb5202f
--- /dev/null
+++ b/Koala-36M-v1/Koala_36M_10.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3aa2590eb8302cf43106e7faf7ef36849fedeb6c5d5ca1ee214635f820adf807
+size 4888525462
diff --git a/Koala-36M-v1/Koala_36M_2.csv b/Koala-36M-v1/Koala_36M_2.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ccf09ab0dcaac8d53cc6ca2d9480778c359e8ca2
--- /dev/null
+++ b/Koala-36M-v1/Koala_36M_2.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0da912f9903bcc06e077fd84e116f0497782743f35b4c1bfe06223e071720f2a
+size 4889857219
diff --git a/Koala-36M-v1/Koala_36M_3.csv b/Koala-36M-v1/Koala_36M_3.csv
new file mode 100644
index 0000000000000000000000000000000000000000..6d6d5ab474ac44c73b46ccc706587f82afdd42e7
--- /dev/null
+++ b/Koala-36M-v1/Koala_36M_3.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5b7cf12f398b9379ac4b6e65c4d5e3154be362513db781ede873d2ee485b112
+size 4889283599
diff --git a/Koala-36M-v1/Koala_36M_4.csv b/Koala-36M-v1/Koala_36M_4.csv
new file mode 100644
index 0000000000000000000000000000000000000000..3cc93b895903384f108ed8524fe1a7ffba37da30
--- /dev/null
+++ b/Koala-36M-v1/Koala_36M_4.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b75062281023cf982e885cabf79963482fd23e683cfb8e1c68d7ad6c1e363637
+size 4889718227
diff --git a/Koala-36M-v1/Koala_36M_5.csv b/Koala-36M-v1/Koala_36M_5.csv
new file mode 100644
index 0000000000000000000000000000000000000000..8e8640a75636024ad581b5a7a58e74a161c999fe
--- /dev/null
+++ b/Koala-36M-v1/Koala_36M_5.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd185667807e084a760bf3708d5da284a8603a48581a15b9810bded0f7fb4f7c
+size 4889216599
diff --git a/Koala-36M-v1/Koala_36M_6.csv b/Koala-36M-v1/Koala_36M_6.csv
new file mode 100644
index 0000000000000000000000000000000000000000..4644793c453893b6a27dde98c57be0d987f743db
--- /dev/null
+++ b/Koala-36M-v1/Koala_36M_6.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69af3c329b77c8fe5fe2a3fd7b52ccce1f88f2649f4cc13e76ab27ecca5a5efa
+size 4889541704
diff --git a/Koala-36M-v1/Koala_36M_7.csv b/Koala-36M-v1/Koala_36M_7.csv
new file mode 100644
index 0000000000000000000000000000000000000000..8cff5e3d850ad05c58630a21196f4cfbcc5fce70
--- /dev/null
+++ b/Koala-36M-v1/Koala_36M_7.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f654daa45977d2c12db1c22fbca9ef5bb729ba37240b83d0ed0bd1ca8008175
+size 4889367231
diff --git a/Koala-36M-v1/Koala_36M_8.csv b/Koala-36M-v1/Koala_36M_8.csv
new file mode 100644
index 0000000000000000000000000000000000000000..76d1311f5f771ce5ac6febff66c913e7b9b32ecc
--- /dev/null
+++ b/Koala-36M-v1/Koala_36M_8.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d1b984b48a839619b82d1db10c27a518d89c66815be010feaee76816eb59ccd
+size 4888856454
diff --git a/Koala-36M-v1/Koala_36M_9.csv b/Koala-36M-v1/Koala_36M_9.csv
new file mode 100644
index 0000000000000000000000000000000000000000..10f9756bfc0e552d8ccc2474079449d7ad7a4357
--- /dev/null
+++ b/Koala-36M-v1/Koala_36M_9.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f62b8a588768377d49d92b9a4ea5eb9745537399b1fd1ccc556721edb96bc4ca
+size 4889171948
diff --git a/URSA-1.7B/.gitattributes b/URSA-1.7B/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..f2bdbb705ed29c60c386dc5ce1141edebb7bbae3
--- /dev/null
+++ b/URSA-1.7B/.gitattributes
@@ -0,0 +1,37 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+. filter=lfs diff=lfs merge=lfs -text
+tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/URSA-1.7B/.gitignore b/URSA-1.7B/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..d5917ecb4ffd40155e59ac77e1b866b9af98a974
--- /dev/null
+++ b/URSA-1.7B/.gitignore
@@ -0,0 +1,55 @@
+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.cuo
+
+# Compiled Dynamic libraries
+*.so
+*.dll
+*.dylib
+
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*.lib
+
+# Compiled python
+*.pyc
+__pycache__
+
+# Compiled MATLAB
+*.mex*
+
+# IPython notebook checkpoints
+.ipynb_checkpoints
+
+# Editor temporaries
+*.swp
+*~
+
+# Sublime Text settings
+*.sublime-workspace
+*.sublime-project
+
+# Eclipse Project settings
+*.*project
+.settings
+
+# QtCreator files
+*.user
+
+# VSCode files
+.vscode
+
+# IDEA files
+.idea
+
+# OSX dir files
+.DS_Store
+
+# Android files
+.gradle
+*.iml
+local.properties
diff --git a/URSA-1.7B/LICENSE b/URSA-1.7B/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..d9a10c0d8e868ebf8da0b3dc95bb0be634c34bfe
--- /dev/null
+++ b/URSA-1.7B/LICENSE
@@ -0,0 +1,176 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
diff --git a/URSA-1.7B/README.md b/URSA-1.7B/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..90208d396629c07be7ba7357d7eece81587430f7
--- /dev/null
+++ b/URSA-1.7B/README.md
@@ -0,0 +1,117 @@
+---
+library_name: diffusers
+license: apache-2.0
+license_link: https://huggingface.co/BAAI/URSA-1.7B-FSQ320/blob/main/LICENSE
+pipeline_tag: text-to-video
+base_model:
+- Qwen/Qwen3-1.7B
+---
+
+# URSA-1.7B-FSQ320 Model Card 
+
+## Model Details
+- **Developed by:** BAAI
+- **Model type:** Text-to-Video Generation Model
+- **Model size:** 1.7B
+- **Model precision:** torch.float16 (FP16)
+- **Model resolution:** 512x320
+- **Model paper:** [Uniform Discrete Diffusion with Metric Path for Video Generation](https://arxiv.org/abs/2510.24717)
+- **Model family:** [BAAI-Vision-URSA](https://github.com/baaivision/URSA)
+- **Model Tokenizer:** [Cosmos-Tokenize1-DV4x8x8-360p](https://huggingface.co/nvidia/Cosmos-Tokenize1-DV4x8x8-360p)
+- **Model Description:** This is a model that can be used to generate and modify videos based on text prompts.
+
+## Examples
+
+Using the [🤗's Diffusers library](https://github.com/huggingface/diffusers) to run URSA in a simple and efficient manner.
+
+```bash
+pip install diffusers transformers accelerate imageio[ffmpeg]
+pip install git+ssh://git@github.com/baaivision/URSA.git
+```
+
+Running the pipeline:
+
+```python
+import os, torch, numpy
+from diffnext.pipelines import URSAPipeline
+from diffnext.utils import export_to_video
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+
+model_id, height, width = "BAAI/URSA-1.7B-FSQ320", 320, 512
+model_args = {"torch_dtype": torch.float16, "trust_remote_code": True}
+pipe = URSAPipeline.from_pretrained(model_id, **model_args)
+pipe = pipe.to(torch.device("cuda"))
+
+text_prompt = "a lone grizzly bear walks through a misty forest at dawn, sunlight catching its fur."
+negative_prompt = "worst quality, low quality, inconsistent motion, static, still, blurry, jittery, distorted, ugly"
+
+# Text-to-Image
+prompt = text_prompt
+num_frames, num_inference_steps = 1, 25
+image = pipe(**locals()).frames[0]
+image.save("ursa.jpg")
+
+# Image-to-Video
+prompt = f"motion=9.0, {text_prompt}"
+num_frames, num_inference_steps = 49, 50
+video = pipe(**locals()).frames[0]
+export_to_video(video, "ursa_1+48f.mp4", fps=12)
+
+# Text-to-Video
+image, video = None, None
+prompt = f"motion=9.0, {text_prompt}"
+num_frames, num_inference_steps = 49, 50
+video = pipe(**locals()).frames[0]
+export_to_video(video, "ursa_49f.mp4", fps=12)
+
+# Video-to-Video
+prompt = f"motion=5.0, {text_prompt}"
+num_frames, num_inference_steps = 49, 50
+num_cond_frames, cond_noise_scale = 13, 0.1
+for i in range(12):
+    video, start_video = video[-num_cond_frames:], video
+    video = pipe(**locals()).frames[0]
+    video = numpy.concatenate([start_video, video[num_cond_frames:]])
+    export_to_video(video, "ursa_{}f.mp4".format(video.shape[0]), fps=12)
+```
+
+# Uses
+
+## Direct Use 
+The model is intended for research purposes only. Possible research areas and tasks include
+
+- Research on generative models.
+- Applications in educational or creative tools.
+- Generation of artworks and use in design and other artistic processes.
+- Probing and understanding the limitations and biases of generative models.
+- Safe deployment of models which have the potential to generate harmful content.
+
+Excluded uses are described below.
+
+#### Out-of-Scope Use
+The model was not trained to be factual or true representations of people or events, and therefore using the model to generate such content is out-of-scope for the abilities of this model.
+
+#### Misuse and Malicious Use
+Using the model to generate content that is cruel to individuals is a misuse of this model. This includes, but is not limited to:
+
+- Mis- and disinformation.
+- Representations of egregious violence and gore.
+- Impersonating individuals without their consent.
+- Sexual content without consent of the people who might see it.
+- Sharing of copyrighted or licensed material in violation of its terms of use.
+- Intentionally promoting or propagating discriminatory content or harmful stereotypes.
+- Sharing content that is an alteration of copyrighted or licensed material in violation of its terms of use.
+- Generating demeaning, dehumanizing, or otherwise harmful representations of people or their environments, cultures, religions, etc.
+
+## Limitations and Bias
+
+### Limitations
+
+- The autoencoding part of the model is lossy.
+- The model cannot render complex legible text.
+- The model does not achieve perfect photorealism.
+- The fingers, .etc in general may not be generated properly.
+- The model was trained on a subset of the web datasets [LAION-5B](https://laion.ai/blog/laion-5b/) and [COYO-700M](https://github.com/kakaobrain/coyo-dataset), which contains adult, violent and sexual content.
+
+### Bias
+While the capabilities of image generation models are impressive, they can also reinforce or exacerbate social biases.
diff --git a/URSA-1.7B/model_index.json b/URSA-1.7B/model_index.json
new file mode 100644
index 0000000000000000000000000000000000000000..fdccf92ec6b65a656500c6f266f1c72d3a50e6c3
--- /dev/null
+++ b/URSA-1.7B/model_index.json
@@ -0,0 +1,19 @@
+{
+  "_class_name": "URSAPipeline",
+  "tokenizer": [
+    "transformers",
+    "Qwen2TokenizerFast"
+  ],
+  "scheduler": [
+    "__scheduler__",
+    "KineticOptimalScheduler"
+  ],
+  "transformer": [
+    "__transformer__",
+    "URSATransformer3DModel"
+  ],
+  "vae": [
+    "__vae__",
+    "AutoencoderVQCosmos3D"
+  ]
+}
diff --git a/URSA-1.7B/scheduler/__scheduler__.py b/URSA-1.7B/scheduler/__scheduler__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c80a5b63b8cfe1030ec7eaaf3b64f96882ba6b50
--- /dev/null
+++ b/URSA-1.7B/scheduler/__scheduler__.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+"""Scheduler."""
+
+from diffnext.schedulers.scheduling_dfm import KineticOptimalScheduler  # noqa
diff --git a/URSA-1.7B/scheduler/scheduler_config.json b/URSA-1.7B/scheduler/scheduler_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..4f25dfeabc61bd4fd4618a386d90a3287acff2ed
--- /dev/null
+++ b/URSA-1.7B/scheduler/scheduler_config.json
@@ -0,0 +1,7 @@
+{
+  "_class_name": "KineticOptimalScheduler",
+  "alpha": 1.0,
+  "c": 5,
+  "eps": 1e-5,
+  "shift": 4.0
+}
diff --git a/URSA-1.7B/tokenizer/tokenizer_config.json b/URSA-1.7B/tokenizer/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..417d038a63fa3de29cfde265caedae14d1a58d92
--- /dev/null
+++ b/URSA-1.7B/tokenizer/tokenizer_config.json
@@ -0,0 +1,239 @@
+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0].role == 'system' %}\n        {{- messages[0].content + '\\n\\n' }}\n    {%- endif %}\n    {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0].role == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n    {%- set index = (messages|length - 1) - loop.index0 %}\n    {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n        {%- set ns.multi_step_tool = false %}\n        {%- set ns.last_query_index = index %}\n    {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n    {%- if message.content is string %}\n        {%- set content = message.content %}\n    {%- else %}\n        {%- set content = '' %}\n    {%- endif %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n        {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {%- set reasoning_content = '' %}\n        {%- if message.reasoning_content is string %}\n            {%- set reasoning_content = message.reasoning_content %}\n        {%- else %}\n            {%- if '</think>' in content %}\n                {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n                {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n            {%- endif %}\n        {%- endif %}\n        {%- if loop.index0 > ns.last_query_index %}\n            {%- if loop.last or (not loop.last and reasoning_content) %}\n                {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n            {%- else %}\n                {{- '<|im_start|>' + message.role + '\\n' + content }}\n            {%- endif %}\n        {%- else %}\n            {{- '<|im_start|>' + message.role + '\\n' + content }}\n        {%- endif %}\n        {%- if message.tool_calls %}\n            {%- for tool_call in message.tool_calls %}\n                {%- if (loop.first and content) or (not loop.first) %}\n                    {{- '\\n' }}\n                {%- endif %}\n                {%- if tool_call.function %}\n                    {%- set tool_call = tool_call.function %}\n                {%- endif %}\n                {{- '<tool_call>\\n{\"name\": \"' }}\n                {{- tool_call.name }}\n                {{- '\", \"arguments\": ' }}\n                {%- if tool_call.arguments is string %}\n                    {{- tool_call.arguments }}\n                {%- else %}\n                    {{- tool_call.arguments | tojson }}\n                {%- endif %}\n                {{- '}\\n</tool_call>' }}\n            {%- endfor %}\n        {%- endif %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n    {%- if enable_thinking is defined and enable_thinking is false %}\n        {{- '<think>\\n\\n</think>\\n\\n' }}\n    {%- endif %}\n{%- endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}
diff --git a/URSA-1.7B/transformer/__transformer__.py b/URSA-1.7B/transformer/__transformer__.py
new file mode 100644
index 0000000000000000000000000000000000000000..fac56e3856b5bf914da4fe8a367a86c8b77b4fb4
--- /dev/null
+++ b/URSA-1.7B/transformer/__transformer__.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+"""Transformer model."""
+
+from diffnext.models.transformers.transformer_ursa import URSATransformer3DModel  # noqa
diff --git a/URSA-1.7B/transformer/config.json b/URSA-1.7B/transformer/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6f97d0f4dbb10e585a5483bfaa105f57d9f8acdf
--- /dev/null
+++ b/URSA-1.7B/transformer/config.json
@@ -0,0 +1,13 @@
+{
+  "hidden_size": 2048,
+  "intermediate_size": 6144,
+  "max_window_layers": 28,
+  "num_attention_heads": 16,
+  "num_key_value_heads": 8,
+  "num_hidden_layers": 28,
+  "rope_theta": 1000000,
+  "vocab_size": 215669,
+  "lm_vocab_size": 151669,
+  "lm_head_size": 64000,
+  "bov_token_id": 151652
+}
diff --git a/URSA-1.7B/transformer/diffusion_pytorch_model.safetensors b/URSA-1.7B/transformer/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..044b9f4f019e87b9f017b2b0dc99217b1c9e3a97
--- /dev/null
+++ b/URSA-1.7B/transformer/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4a50d661919972cd5c8640ca3c9e5824945d105fb714a0de2f7610a4e7bebb8
+size 3964379808
diff --git a/URSA-1.7B/vae/__vae__.py b/URSA-1.7B/vae/__vae__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab040888aae6960f5e04480664f3900b799614fd
--- /dev/null
+++ b/URSA-1.7B/vae/__vae__.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+"""VAE model."""
+
+from diffnext.models.autoencoders.autoencoder_vq_cosmos3d import AutoencoderVQCosmos3D  # noqa
diff --git a/URSA-1.7B/vae/config.json b/URSA-1.7B/vae/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6700a83e4791744c41e6c22b647b73ce70e16c37
--- /dev/null
+++ b/URSA-1.7B/vae/config.json
@@ -0,0 +1,22 @@
+{
+  "_class_name": "AutoencoderVQCosmos3D",
+  "_quantizer_name": "FSQuantizer",
+  "in_channels": 3,
+  "latent_channels": 256,
+  "layers_per_block": 2,
+  "norm_num_groups": 1,
+  "out_channels": 3,
+  "sample_size": 1024,
+  "sample_frames": 49,
+  "num_vq_embeddings": 64000,
+  "vq_embed_dim": 6,
+  "patch_size": 2,
+  "temporal_stride": 4,
+  "spatial_stride": 8,
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ]
+}
diff --git a/URSA/.flake8 b/URSA/.flake8
new file mode 100644
index 0000000000000000000000000000000000000000..84d4384488ec68843e171d7d2e76f02bebcf7abe
--- /dev/null
+++ b/URSA/.flake8
@@ -0,0 +1,21 @@
+[flake8]
+max-line-length = 100
+ignore =
+    # whitespace before ':' (conflicted with Black)
+    E203,
+    # ambiguous variable name
+    E741,
+    # ‘from module import *’ used; unable to detect undefined names
+    F403,
+    # name may be undefined, or defined from star imports: module
+    F405,
+    # redefinition of unused name from line N
+    F811,
+    # undefined name
+    F821,
+    # line break before binary operator
+    W503,
+    # line break after binary operator
+    W504
+# module imported but unused
+per-file-ignores = __init__.py: F401
diff --git a/URSA/.gitignore b/URSA/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..d5917ecb4ffd40155e59ac77e1b866b9af98a974
--- /dev/null
+++ b/URSA/.gitignore
@@ -0,0 +1,55 @@
+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.cuo
+
+# Compiled Dynamic libraries
+*.so
+*.dll
+*.dylib
+
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*.lib
+
+# Compiled python
+*.pyc
+__pycache__
+
+# Compiled MATLAB
+*.mex*
+
+# IPython notebook checkpoints
+.ipynb_checkpoints
+
+# Editor temporaries
+*.swp
+*~
+
+# Sublime Text settings
+*.sublime-workspace
+*.sublime-project
+
+# Eclipse Project settings
+*.*project
+.settings
+
+# QtCreator files
+*.user
+
+# VSCode files
+.vscode
+
+# IDEA files
+.idea
+
+# OSX dir files
+.DS_Store
+
+# Android files
+.gradle
+*.iml
+local.properties
diff --git a/URSA/=4.57.1 b/URSA/=4.57.1
new file mode 100644
index 0000000000000000000000000000000000000000..e7c1b41c2e14aeb3f1d4245f984db46f59e28068
--- /dev/null
+++ b/URSA/=4.57.1
@@ -0,0 +1,70 @@
+Requirement already satisfied: diffusers in /usr/local/lib/python3.12/dist-packages (0.36.0)
+Requirement already satisfied: transformers in /usr/local/lib/python3.12/dist-packages (5.2.0)
+Requirement already satisfied: accelerate in /usr/local/lib/python3.12/dist-packages (1.12.0)
+Requirement already satisfied: imageio in /usr/local/lib/python3.12/dist-packages (2.37.2)
+Requirement already satisfied: imageio-ffmpeg in /usr/local/lib/python3.12/dist-packages (0.6.0)
+Requirement already satisfied: omegaconf in /usr/local/lib/python3.12/dist-packages (2.3.0)
+Requirement already satisfied: wandb in /usr/local/lib/python3.12/dist-packages (0.25.0)
+Requirement already satisfied: importlib_metadata in /usr/local/lib/python3.12/dist-packages/setuptools/_vendor (from diffusers) (8.0.0)
+Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from diffusers) (3.17.0)
+Requirement already satisfied: httpx<1.0.0 in /usr/local/lib/python3.12/dist-packages (from diffusers) (0.28.1)
+Requirement already satisfied: huggingface-hub<2.0,>=0.34.0 in /usr/local/lib/python3.12/dist-packages (from diffusers) (1.3.0)
+Requirement already satisfied: numpy in /usr/local/lib/python3.12/dist-packages (from diffusers) (1.26.4)
+Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from diffusers) (2024.11.6)
+Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from diffusers) (2.32.3)
+Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.12/dist-packages (from diffusers) (0.5.3)
+Requirement already satisfied: Pillow in /usr/local/lib/python3.12/dist-packages (from diffusers) (11.1.0)
+Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (23.2)
+Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from transformers) (6.0.2)
+Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.22.2)
+Requirement already satisfied: typer-slim in /usr/local/lib/python3.12/dist-packages (from transformers) (0.21.2)
+Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.12/dist-packages (from transformers) (4.67.1)
+Requirement already satisfied: psutil in /usr/local/lib/python3.12/dist-packages (from accelerate) (7.0.0)
+Requirement already satisfied: torch>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from accelerate) (2.9.0+cu128)
+Requirement already satisfied: antlr4-python3-runtime==4.9.* in /usr/local/lib/python3.12/dist-packages (from omegaconf) (4.9.3)
+Requirement already satisfied: click>=8.0.1 in /usr/local/lib/python3.12/dist-packages (from wandb) (8.1.8)
+Requirement already satisfied: gitpython!=3.1.29,>=1.0.0 in /usr/local/lib/python3.12/dist-packages (from wandb) (3.1.46)
+Requirement already satisfied: platformdirs in /usr/local/lib/python3.12/dist-packages (from wandb) (4.3.6)
+Requirement already satisfied: protobuf!=4.21.0,!=5.28.0,<7,>=3.19.0 in /usr/local/lib/python3.12/dist-packages (from wandb) (4.24.4)
+Requirement already satisfied: pydantic<3 in /usr/local/lib/python3.12/dist-packages (from wandb) (2.10.6)
+Requirement already satisfied: sentry-sdk>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from wandb) (2.54.0)
+Requirement already satisfied: typing-extensions<5,>=4.8 in /usr/local/lib/python3.12/dist-packages (from wandb) (4.12.2)
+Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.12/dist-packages (from gitpython!=3.1.29,>=1.0.0->wandb) (4.0.12)
+Requirement already satisfied: anyio in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->diffusers) (4.8.0)
+Requirement already satisfied: certifi in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->diffusers) (2025.1.31)
+Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->diffusers) (1.0.7)
+Requirement already satisfied: idna in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->diffusers) (3.10)
+Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.12/dist-packages (from httpcore==1.*->httpx<1.0.0->diffusers) (0.14.0)
+Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<2.0,>=0.34.0->diffusers) (2025.2.0)
+Requirement already satisfied: hf-xet<2.0.0,>=1.2.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<2.0,>=0.34.0->diffusers) (1.3.2)
+Requirement already satisfied: shellingham in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<2.0,>=0.34.0->diffusers) (1.5.4)
+Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.12/dist-packages (from pydantic<3->wandb) (0.7.0)
+Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.12/dist-packages (from pydantic<3->wandb) (2.27.2)
+Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->diffusers) (3.4.1)
+Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->diffusers) (2.0.7)
+Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (75.8.2)
+Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (1.14.0)
+Requirement already satisfied: networkx>=2.5.1 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (3.4.2)
+Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (3.1.6)
+Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.8.93 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.8.93)
+Requirement already satisfied: nvidia-cuda-runtime-cu12==12.8.90 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.8.90)
+Requirement already satisfied: nvidia-cuda-cupti-cu12==12.8.90 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.8.90)
+Requirement already satisfied: nvidia-cudnn-cu12==9.10.2.21 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (9.10.2.21)
+Requirement already satisfied: nvidia-cublas-cu12==12.8.4.1 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.8.4.1)
+Requirement already satisfied: nvidia-cufft-cu12==11.3.3.83 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (11.3.3.83)
+Requirement already satisfied: nvidia-curand-cu12==10.3.9.90 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (10.3.9.90)
+Requirement already satisfied: nvidia-cusolver-cu12==11.7.3.90 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (11.7.3.90)
+Requirement already satisfied: nvidia-cusparse-cu12==12.5.8.93 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.5.8.93)
+Requirement already satisfied: nvidia-cusparselt-cu12==0.7.1 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (0.7.1)
+Requirement already satisfied: nvidia-nccl-cu12==2.27.5 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (2.27.5)
+Requirement already satisfied: nvidia-nvshmem-cu12==3.3.20 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (3.3.20)
+Requirement already satisfied: nvidia-nvtx-cu12==12.8.90 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.8.90)
+Requirement already satisfied: nvidia-nvjitlink-cu12==12.8.93 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.8.93)
+Requirement already satisfied: nvidia-cufile-cu12==1.13.1.3 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (1.13.1.3)
+Requirement already satisfied: triton==3.5.0 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (3.5.0)
+Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.12/dist-packages/setuptools/_vendor (from importlib_metadata->diffusers) (3.19.2)
+Requirement already satisfied: annotated-doc>=0.0.2 in /usr/local/lib/python3.12/dist-packages (from typer-slim->transformers) (0.0.4)
+Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.12/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb) (5.0.2)
+Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from sympy>=1.13.3->torch>=2.0.0->accelerate) (1.3.0)
+Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.12/dist-packages (from anyio->httpx<1.0.0->diffusers) (1.3.1)
+Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2->torch>=2.0.0->accelerate) (3.0.2)
diff --git a/URSA/LICENSE b/URSA/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..d9a10c0d8e868ebf8da0b3dc95bb0be634c34bfe
--- /dev/null
+++ b/URSA/LICENSE
@@ -0,0 +1,176 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
diff --git a/URSA/README.md b/URSA/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2a41d6d1d43272115889ddec3cce26ca99d4e1c5
--- /dev/null
+++ b/URSA/README.md
@@ -0,0 +1,191 @@
+<div align="center">
+
+<img src="assets/logo.png" width="30%" alt="logo"/>
+
+<h1>🐻 URSA: Uniform Discrete Diffusion with Metric Path<br>for Video Generation</h1>
+
+<p align="center">
+<a href="https://arxiv.org/abs/2510.24717"><img src="https://img.shields.io/badge/ArXiv-2510.24717-%23840707.svg" alt="ArXiv"></a>
+<a href="https://huggingface.co/collections/BAAI/ursa"><img src="https://img.shields.io/badge/🤗 Weights-BAAI/URSA-rgb(166,109,59).svg" alt=""></a>
+<a href="https://huggingface.co/spaces/BAAI/nova-d48w1024-osp480"><img src="https://img.shields.io/badge/🤗 Demo-TI2V-%26840707.svg" alt="TI2VDemo"></a>
+<a href="http://bitterdhg.github.io/URSA_page"><img src="https://img.shields.io/badge/Project-URSA-%237CB4F7.svg" alt="Project"></a>
+</p>
+
+<p align="center">
+
+[Haoge Deng](https://scholar.google.com/citations?user=S2sbvjgAAAAJ&hl)<sup>1,4*</sup>, [Ting Pan](https://scholar.google.com/citations?&user=qQv6YbsAAAAJ)<sup>2,4*</sup>, [Fan Zhang](https://scholar.google.com/citations?user=VsJ39HMAAAAJ)<sup>4*</sup>, [Yang Liu](https://scholar.google.com/citations?user=9JcQ2hwAAAAJ&hl)<sup>3,4*</sup>, [Zhuoyan Luo](https://scholar.google.com/citations?user=mKQhEsIAAAAJ&hl)<sup>4</sup>, [Yufeng Cui](https://scholar.google.com/citations?user=5Ydha2EAAAAJ&hl)<sup>4</sup>, [Wenxuan Wang](https://scholar.google.com/citations?user=75OyC-oAAAAJ&hl)<sup>4</sup><br>
+[Chunhua Shen](https://scholar.google.com/citations?user=Ljk2BvIAAAAJ&hl)<sup>3</sup>, [Shiguang Shan](https://scholar.google.com/citations?user=Vkzd7MIAAAAJ&hl)<sup>2</sup>, [Zhaoxiang Zhang](https://scholar.google.com/citations?user=qxWfV6cAAAAJ&hl)<sup>1†</sup>, [Xinlong Wang](https://scholar.google.com/citations?user=DPz0DjYAAAAJ&hl)<sup>4†</sup><br>
+
+[CASIA](http://english.ia.cas.cn)<sup>1</sup>, [CASICT](http://english.ict.cas.cn)<sup>2</sup>, [ZJU](https://www.zju.edu.cn/english)<sup>3</sup>, [BAAI](https://www.baai.ac.cn/en)<sup>4</sup><br>
+<sup>*</sup> Equal Contribution, <sup>†</sup> Corresponding Author
+<br><br><image src="assets/model_preview.gif"/>
+<br><br><image src="assets/model_overview.png"/>
+</div>
+
+We present **URSA** (**U**niform disc**R**ete diffu**S**ion with metric p**A**th), a simple yet powerful framework that bridges the gap with continuous approaches. **URSA** formulates the video generation task as an iterative global refinement of discrete spatiotemporal tokens and scales efficiently to long video generation, requiring fewer inference steps. **URSA** enables multi-task video generation with asynchronous timestep scheduling strategy in one unified model.
+
+## 🚀 News
+- ```[Feb 2026]``` Accepted by ICLR 2026 [[OpenReview]](https://openreview.net/forum?id=GFU5yCbILk).
+- ```[Jan 2026]``` Released [Training Guide](./docs/training.md).
+- ```[Oct 2025]``` 🎉 URSA is part of [Emu3.5](https://github.com/baaivision/Emu3.5) as DiDA (Discrete Diffusion Adaptation)!
+- ```[Oct 2025]``` Released <a href="https://huggingface.co/spaces/BAAI/nova-d48w1024-osp480"><b>TI2V</b></a> 🤗 Demo.
+- ```[Oct 2025]``` Released [Paper](https://arxiv.org/abs/2510.24717) & [Project Page](http://bitterdhg.github.io/URSA_page) & [Evaluation Guide](./docs/evaluation.md).
+
+## ✨Hightlights
+
+- 🥇 **Novel Approach**: Uniform Discrete Diffusion with Metric Path.
+- 🥈 **SOTA Performance**: High efficiency with state-of-the-art T2I/T2V/I2V results.
+- 🥉 **Unified Modeling**: Multi-task capabilities in a single unified model.
+
+## 🗄️ Models
+
+### 🖼️ Text to Image
+
+| Model | Resolution | Data | Weight | GenEval | DPGBench |
+|:-----:|:----------:|:----:|:------:|:-------:|:--------:|
+| URSA-0.6B-IBQ1024 | 1024x1024 | 30M | [🤗 HF](https://huggingface.co/BAAI/URSA-0.6B-IBQ1024) \| [🤖 ModelScope](https://www.modelscope.cn/models/BAAI/URSA-0.6B-IBQ1024) | 0.79 | 85.6 |
+| URSA-1.7B-IBQ1024 | 1024x1024 | 30M | [🤗 HF](https://huggingface.co/BAAI/URSA-1.7B-IBQ1024) \| [🤖 ModelScope](https://www.modelscope.cn/models/BAAI/URSA-1.7B-IBQ1024) | 0.80 | 86.0 |
+
+### 🎬 Text to Video
+
+| Model | Resolution | Data | Weight | VBench-T2V | VBench-I2V |
+|:-----:|:----------:|:----:|:------:|:----------:|:----------:|
+| URSA-0.6B-FSQ320 | 49x512x320 | 24M | [🤗 HF](https://huggingface.co/BAAI/URSA-0.6B-FSQ320) \| [🤖 ModelScope](https://www.modelscope.cn/models/BAAI/URSA-0.6B-FSQ320) | 81.4 | 86.0 |
+| URSA-1.7B-FSQ320 | 49x512x320 | 24M | [🤗 HF](https://huggingface.co/BAAI/URSA-1.7B-FSQ320) \| [🤖 ModelScope](https://www.modelscope.cn/models/BAAI/URSA-1.7B-FSQ320) | 82.4 | 86.2 |
+
+## 📖 Table of Contents
+- [🔧 Installation](#installation)
+- [🔥 Quick Start](#quick-start)
+  - [🖼️ Image Generation](#quickstart-image-generation)
+  - [🎬 Video Generation](#quickstart-video-generation)
+- [💻 Gradio Demo](#gradio-demo)
+- [💯 Evaluation](./docs/evaluation.md)
+- [🤖 Training](./docs/training.md)
+
+## 🔧 Installation
+<a id="installation"></a>
+
+Clone this repository to local disk and install:
+```bash
+pip install diffusers transformers>=4.57.1 accelerate imageio imageio-ffmpeg omegaconf wandb
+git clone https://github.com/baaivision/URSA.git
+cd URSA && pip install .
+```
+
+## 🔥 Quick Start
+<a id="quick-start"></a>
+
+### 🖼️ Image Generation
+<a id="quickstart-image-generation"></a>
+
+```python
+import torch
+from diffnext.pipelines import URSAPipeline
+
+model_id, height, width = "BAAI/URSA-1.7B-IBQ1024", 1024, 1024
+model_args = {"torch_dtype": torch.float16, "trust_remote_code": True}
+pipe = URSAPipeline.from_pretrained(model_id, **model_args)
+pipe = pipe.to(torch.device("cuda"))
+
+prompt = "The bear, calm and still, gazes upward as if lost in contemplation of the cosmos."
+negative_prompt = "worst quality, low quality, inconsistent motion, static, still, blurry, jittery, distorted, ugly"
+
+image = pipe(**locals()).frames[0]
+image.save("ursa.jpg")
+```
+
+### 🎬 Video Generation
+<a id="quickstart-video-generation"></a>
+
+```python
+import os, torch, numpy
+from diffnext.pipelines import URSAPipeline
+from diffnext.utils import export_to_video
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+
+model_id, height, width = "BAAI/URSA-1.7B-FSQ320", 320, 512
+model_args = {"torch_dtype": torch.float16, "trust_remote_code": True}
+pipe = URSAPipeline.from_pretrained(model_id, **model_args)
+pipe = pipe.to(torch.device("cuda"))
+
+text_prompt = "a lone grizzly bear walks through a misty forest at dawn, sunlight catching its fur."
+negative_prompt = "worst quality, low quality, inconsistent motion, static, still, blurry, jittery, distorted, ugly"
+
+# Text-to-Image
+prompt = text_prompt
+num_frames, num_inference_steps = 1, 25
+image = pipe(**locals()).frames[0]
+image.save("ursa.jpg")
+
+# Image-to-Video
+prompt = f"motion=9.0, {text_prompt}"
+num_frames, num_inference_steps = 49, 50
+video = pipe(**locals()).frames[0]
+export_to_video(video, "ursa_1+48f.mp4", fps=12)
+
+# Text-to-Video
+image, video = None, None
+prompt = f"motion=9.0, {text_prompt}"
+num_frames, num_inference_steps = 49, 50
+video = pipe(**locals()).frames[0]
+export_to_video(video, "ursa_49f.mp4", fps=12)
+
+# Video-to-Video
+prompt = f"motion=5.0, {text_prompt}"
+num_frames, num_inference_steps = 49, 50
+num_cond_frames, cond_noise_scale = 13, 0.1
+for i in range(12):
+    video, start_video = video[-num_cond_frames:], video
+    video = pipe(**locals()).frames[0]
+    video = numpy.concatenate([start_video, video[num_cond_frames:]])
+    export_to_video(video, "ursa_{}f.mp4".format(video.shape[0]), fps=12)
+```
+
+## 💻 Gradio Demo
+<a id="gradio-demo"></a>
+
+```bash
+# Text-to-Image (T2I)
+python scripts/app_ursa_t2i.py --model "BAAI/URSA-1.7B-IBQ1024" --device 0
+
+# Text-to-Image-to-Video (TI2V)
+python scripts/app_ursa_ti2v.py --model "BAAI/URSA-1.7B-FSQ320" --device 0
+```
+
+## 📋 Todo List
+- [X] [Model Zoo](#model-zoo)
+- [X] [Quick Start](#quick-start)
+- [X] [Gradio Demo](#gradio-demo)
+- [X] [Evaluation Guide](./docs/evaluation.md)
+- [X] [Training Guide](./docs/training.md)
+- [ ] 4B Model
+
+## 📖 Citation
+If you find this repository useful, please consider giving a star ⭐ and citation 🦖:
+```
+@article{deng2025ursa,
+  title={Uniform Discrete Diffusion with Metric Path for Video Generation},
+  author={Deng, Haoge and Pan, Ting and Zhang, Fan and Liu, Yang and Luo, Zhuoyan and Cui, Yufeng and Shen, Chunhua and Shan, Shiguang and Zhang, Zhaoxiang and Wang, Xinlong},
+  journal={arXiv preprint arXiv:2510.24717},
+  year={2025}
+}
+```
+```
+@article{deng2024nova,
+  title={Autoregressive Video Generation without Vector Quantization},
+  author={Deng, Haoge and Pan, Ting and Diao, Haiwen and Luo, Zhengxiong and Cui, Yufeng and Lu, Huchuan and Shan, Shiguang and Qi, Yonggang and Wang, Xinlong},
+  journal={arXiv preprint arXiv:2412.14169},
+  year={2024}
+}
+```
+
+## 🤗 Acknowledgement
+
+We thank the repositories: 
+- [NOVA](https://github.com/baaivision/NOVA). ✨NOVA is the predecessor of 🐻URSA.
+- [FlowMatching](https://github.com/facebookresearch/flow_matching). This codebase systemically provides CFM and DFM implementations.
+- [FUDOKI](https://github.com/fudoki-hku/FUDOKI). This codebase provides a naive multimodal DFM implementation.
+- [CodeWithGPU](https://github.com/seetacloud/codewithgpu). CodeWithGPU library is the core of our data loading pipeline.
+
+## License
+Code and models are licensed under [Apache License 2.0](LICENSE).
diff --git a/URSA/accelerate_configs/deepspeed_zero2.yaml b/URSA/accelerate_configs/deepspeed_zero2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5ae955d0b868dce3f54519f7c38c09a639221d4d
--- /dev/null
+++ b/URSA/accelerate_configs/deepspeed_zero2.yaml
@@ -0,0 +1,12 @@
+distributed_type: DEEPSPEED
+deepspeed_config:
+  deepspeed_multinode_launcher: standard
+  gradient_clipping: 0.0
+  zero_stage: 3 #2
+  offload_optimizer_device: cpu   # Moves optimizer states to CPU RAM
+  offload_param_device: cpu       # Moves model parameters to CPU RAM
+  zero3_init_flag: true           # Initializes the model directly across GPUs to save CPU RAM
+  zero3_save_16bit_model: true    # Consolidates weights into a single file when saving checkpoints
+num_machines: 1
+num_processes: 8
+machine_rank: 0
diff --git a/URSA/assets/sample_image.jpg b/URSA/assets/sample_image.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1200b735417a797eaa3df888273540d97fd83b0b
Binary files /dev/null and b/URSA/assets/sample_image.jpg differ
diff --git a/URSA/configs/distill_dimo.yaml b/URSA/configs/distill_dimo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cc314d3833a6f35c64b88224971ac9ec92cb47d8
--- /dev/null
+++ b/URSA/configs/distill_dimo.yaml
@@ -0,0 +1,158 @@
+# ============================================================================
+# URSA one-step distillation — DiMO-style distributed training config
+# ============================================================================
+# Verified native inference regime (from A/B testing — ground truth):
+#   height=320, width=512, num_frames=49, guidance_scale=7, teacher_steps=50.
+#   no_cfg (guidance_scale=1) does NOT produce valid output.
+#   All defaults below align to this verified regime.
+#
+# Launch (8-GPU, single node):
+#
+#   accelerate launch --config_file accelerate_configs/deepspeed_zero2.yaml \
+#       --machine_rank 0 --num_machines 1 --num_processes 8 \
+#       scripts/train_distill_dimo.py \
+#       config="./configs/distill_dimo.yaml" \
+#       experiment.output_dir="./experiments/distill_dimo" \
+#       distill.teacher_ckpt="/path/to/URSA-1.7B-IBQ1024" \
+#       distill.prompt_source="/data/Koala_36M_*.csv"
+#
+# Smoke test (1 GPU, 50 steps — save student checkpoint):
+#
+#   accelerate launch --num_processes 1 \
+#       scripts/train_distill_dimo.py \
+#       config="./configs/distill_dimo.yaml" \
+#       experiment.output_dir="./experiments/smoke" \
+#       distill.teacher_ckpt="/path/to/URSA-1.7B-IBQ1024" \
+#       distill.prompt_source="prompts.txt" \
+#       training.max_train_steps=50 \
+#       experiment.save_every=50
+#
+# Load student for 1-step inference (must use CFG=7, native geometry):
+#
+#   pipe = URSAPipeline.from_pretrained("/path/to/URSA-1.7B-IBQ1024")
+#   state = torch.load("experiments/distill_dimo/checkpoints/final/student.pt")
+#   pipe.transformer.load_state_dict(state, strict=True)
+#   frames = pipe(prompt="...", num_inference_steps=1,
+#                 height=320, width=512, num_frames=49,
+#                 guidance_scale=7).frames
+# ============================================================================
+
+# ── Experiment bookkeeping ───────────────────────────────────────────────────
+experiment:
+  name: distill_dimo
+  output_dir: ./experiments/distill_dimo
+  log_every: 10
+  save_every: 100
+  resume_iter: 0          # set to step number to resume
+
+# ── Training (framework-level) ───────────────────────────────────────────────
+training:
+  seed: 42
+  mixed_precision: bf16    # bf16 | fp16 | fp32
+  max_train_steps: 10000
+  gradient_accumulation_steps: 1   # Two-backward; keep =1 for distillation
+
+# ── Distillation hyperparameters ─────────────────────────────────────────────
+distill:
+  # ---- Paths ----------------------------------------------------------------
+  teacher_ckpt: /gfs/space/private/fengzl/World_Model/URSA-1.7B
+  prompt_source: /gfs/space/private/fengzl/World_Model/Koala-36M-v1    # glob, dir, .txt, or comma-list
+
+  # ---- Video geometry (verified native: 320×512×49) -------------------------
+  num_frames: 49
+  height: 320
+  width: 512
+  max_prompt_length: 320
+
+  # ---- Data -----------------------------------------------------------------
+  batch_size_per_gpu: 1   # effective global batch = batch_size_per_gpu × 8 GPUs
+
+  # # ---- Loss weights ---------------------------------------------------------
+  # lambda_kd: 0.5          # KL(z_T || z_S) weight
+  # lambda_pg: 1.0          # REINFORCE policy gradient weight
+  # lambda_ent: 0.01        # entropy bonus (λ_ent_eff × H) — set 0 for DiMO orig
+  # tau: 1.0                # student sampling temperature
+  # tau_kd: 1.0             # KD / Jeffrey softmax temperature
+
+  # # ---- Teacher CFG (aligned to verified working regime: CFG=7) ---------------
+  # # A/B testing confirmed: guidance_scale=1 (no_cfg) does NOT produce valid
+  # # output for this URSA checkpoint.  The teacher KD target must use CFG=7.
+  # enable_teacher_cfg: true
+  # teacher_cfg_scale: 7.0         # s in z_guided = z_uncond + s*(z_cond-z_uncond)
+  #                                 # Verified: CFG=7 is the official working value.
+  # teacher_cfg_prob: 1.0          # max fraction of samples using guided target
+  # teacher_cfg_warmup_steps: 2000 # linear warmup 0→teacher_cfg_prob
+  # teacher_cfg_trunc: 0.9         # when t≥trunc, scale falls to 1 (no guide)
+  # lambda_kd_uncond: 0.3          # weight for uncond-branch KD loss
+  # reward_use_guided: false       # [RISKY] use guided logits for reward signal
+
+  # # ---- DiMO extensions -------------------------------------------------------
+  # fake_rounds: 1          # aux updates per student update (DiMO=2; try 2)
+  # use_surrogate_grad: false
+  # lambda_surr: 1.0
+
+  # ---- Loss weights ---------------------------------------------------------
+  lambda_kd: 1.0          # KL(z_T || z_S) weight (基础知识蒸馏权重，保持不变)
+  lambda_pg: 1.0          # [重用] 现在代表 lambda_bridge，控制 MSE 伪梯度注入的强度
+  lambda_ent: 0.0         # [已废弃] 强化学习的熵奖励已彻底删除，设为 0.0
+  tau: 1.0                # student sampling temperature
+  tau_kd: 1.0             # KD softmax temperature
+
+  # ---- Teacher CFG (aligned to verified working regime: CFG=7) ---------------
+  enable_teacher_cfg: true
+  teacher_cfg_scale: 7.0         
+  teacher_cfg_prob: 1.0          
+  teacher_cfg_warmup_steps: 1000 
+  teacher_cfg_trunc: 0.9         
+  lambda_kd_uncond: 0.3          
+  # reward_use_guided: false       <-- [请直接删除这行] 因为 Reward 计算已被移除
+
+  # ---- DiMO extensions -------------------------------------------------------
+  fake_rounds: 2 #1          # Aux 拟合假 token 的迭代次数。如果发现 Aux 算出的 bridge_loss 降不下去，可以尝试改为 2
+  use_surrogate_grad: false
+  lambda_surr: 1.0
+
+  # ---- Stability -------------------------------------------------------------
+  t_curriculum_steps: 10000   # curriculum steps before uniform-t sampling
+  p_init_mix_ratio: 0.2       # fraction of batch from corrupted x_hat_prev
+  p_mix_corrupt_frac: 0.2     # token corruption rate in p_init mixing
+  collapse_warn_frac: 0.2     # warn if tok_entropy < frac × initial entropy
+
+  # ---- Aux initialisation ---------------------------------------------------
+  aux_noise_std: 1.0e-5  # tiny noise added to aux weights at init to break
+                          # symmetry; set 0.0 to keep aux == student exactly
+
+  # ---- Gradient clipping ----------------------------------------------------
+  grad_clip: 1.0
+
+# ── Student optimizer ────────────────────────────────────────────────────────
+optimizer_student:
+  target: torch.optim.AdamW
+  params:
+    lr: 1.0e-5
+    betas: [0.9, 0.95]
+    weight_decay: 0.01
+
+# ── Aux optimizer ────────────────────────────────────────────────────────────
+optimizer_aux:
+  target: torch.optim.AdamW
+  params:
+    lr: 1.0e-5
+    betas: [0.9, 0.95]
+    weight_decay: 0.01
+
+# ── LR scheduler (cosine, shared warmup/decay params for both opts) ──────────
+lr_scheduler:
+  target: diffnext.engine.lr_scheduler.CosineLR
+  params:
+    lr_max: ${optimizer_student.params.lr}
+    lr_min: 1.0e-6
+    max_steps: ${training.max_train_steps}
+    warmup_steps: 500
+
+# ── Prompt DataLoader ─────────────────────────────────────────────────────────
+prompt_dataloader:
+  shuffle_files: true
+  shuffle_buffer: 50000      # in-memory shuffle buffer per shard; reduce if OOM
+  num_workers: 4             # CPU workers (no CUDA in workers)
+  caption_field: caption     # CSV column name (Koala default)
diff --git a/URSA/configs/onestep_dimo.yaml b/URSA/configs/onestep_dimo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..83e917530b7b0417fc4eb9a43025dd79b8818440
--- /dev/null
+++ b/URSA/configs/onestep_dimo.yaml
@@ -0,0 +1,111 @@
+# ============================================================================
+# URSA one-step distillation — DiMO-style training configuration
+# ============================================================================
+# Reference: train_onestep_ursa_dimo.py
+#
+# DiMO hyperparameter comparison (Meissonic vs. our URSA defaults)
+# ---------------------------------------------------------------
+#  Param                 DiMO (Meissonic)    URSA (this config)    Risk / Note
+#  ─────────────────────────────────────────────────────────────────────────
+#  guidance_scale (CFG)  3.0 (true_cfg)      3.0 (teacher_cfg)     ✅ aligned
+#  fake_rounds           2                   1                     ⚠ try 2 for aux stability
+#  fixed_ratio           0.5 (mask ratio)    —                     N/A (different domain)
+#  distil_loss_type      surrogate MSE       optional surrogate     ✅ toggle via use_surrogate_grad
+#  noise_emb_perturb     True                —                     ℹ️ not needed for VQ-based model
+#  cfg_prob              1.0                 teacher_cfg_prob=1.0  ✅ aligned
+#  lambda_ent            0.0 (no ent reg)    0.01                  ℹ️ our addition for stability
+# ============================================================================
+
+# ── Paths ────────────────────────────────────────────────────────────────────
+teacher_ckpt: "/path/to/URSA"
+prompt_file: "prompts.txt"
+out_dir: "./outputs/dimo"
+
+# ── Video geometry ───────────────────────────────────────────────────────────
+num_frames: 17
+height: 256
+width: 256
+max_prompt_length: 320
+
+# ── Training ─────────────────────────────────────────────────────────────────
+batch_size: 2           # reduce to 1 if enable_teacher_cfg uses too much VRAM
+num_steps: 10000
+lr_student: 1.0e-5
+lr_aux:     1.0e-5
+weight_decay: 0.01
+grad_clip: 1.0
+mixed_precision: "bf16"
+seed: 42
+log_every: 50
+save_every: 1000
+
+# ── Loss weights ─────────────────────────────────────────────────────────────
+lambda_pg: 1.0
+lambda_kd: 0.5
+lambda_ent: 0.01          # entropy regularisation (0 → DiMO original; 0.01 → our default)
+tau: 1.0                  # student sampling temperature
+tau_kd: 1.0               # KD softmax temperature
+
+# ── Teacher CFG (DiMO true_cfg style) ────────────────────────────────────────
+# Set enable_teacher_cfg: false to revert to the prior single-branch behavior.
+# All other params in this block are ignored when enable_teacher_cfg=false.
+enable_teacher_cfg: true
+
+teacher_cfg_scale: 3.0    # s in z_guided = z_uncond + s*(z_cond - z_uncond)
+                           # Matches DiMO true_cfg=3.0
+
+teacher_cfg_prob: 1.0      # Probability of using guided target per batch (after warmup).
+                           # 1.0 = always guided (DiMO default).
+
+teacher_cfg_warmup_steps: 2000
+                           # Ramp teacher_cfg_prob from 0 → teacher_cfg_prob over this many
+                           # steps. Prevents instability at the start of training.
+
+teacher_cfg_trunc: 0.9    # When t >= trunc, CFG scale falls to 1 (no guidance at high noise).
+                           # Mirrors DiMO's guidance_trunc parameter.
+
+lambda_kd_uncond: 0.3     # Weight for uncond-branch KD loss.
+                           # Keeps the student uncond-capable for eval-time CFG.
+
+reward_use_guided: false  # [RISKY] Use guided teacher logits for REINFORCE reward.
+                           # Default false: use non-guided cond (more stable).
+
+# ── Eval / inference CFG ─────────────────────────────────────────────────────
+eval_cfg_scale: 3.0        # guidance_scale used during evaluation
+use_cfg_eval: false        # Run eval with inference-time CFG (2× forward)
+
+# ── DiMO extensions ──────────────────────────────────────────────────────────
+use_surrogate_grad: false  # DiMO surrogate MSE trick (zero-variance alternative to REINFORCE)
+lambda_surr: 1.0
+fake_rounds: 1             # Aux updates per generator update (DiMO uses 2; try 2 for aux stability)
+
+# ── Stability ─────────────────────────────────────────────────────────────────
+t_curriculum_steps: 10000  # Steps to use t-curriculum (biases t toward larger values)
+p_mix_corrupt_frac: 0.2   # Fraction of tokens to corrupt in p_init mixing
+p_init_mix_ratio: 0.2     # Fraction of batch drawn from corrupted x_hat_prev
+collapse_warn_frac: 0.2   # Warn if tok_hist_entropy drops below this fraction of initial
+
+# ── Debug ────────────────────────────────────────────────────────────────────
+dry_run: false             # Run 1 step, print diagnostics, exit
+debug_dump: 0              # Dump token histogram + x_hat every N steps (0=off)
+
+# ── Recommended quick-start commands ─────────────────────────────────────────
+# # Smoke test (CFG enabled):
+# python scripts/train_onestep_ursa_dimo.py \
+#     --teacher_ckpt /path/to/URSA --prompt_file prompts.txt \
+#     --enable_teacher_cfg --teacher_cfg_scale 3.0 \
+#     --num_frames 17 --height 256 --width 256 --dry_run
+#
+# # Full training (DiMO-aligned):
+# python scripts/train_onestep_ursa_dimo.py \
+#     --teacher_ckpt /path/to/URSA --prompt_file prompts.txt \
+#     --enable_teacher_cfg --teacher_cfg_scale 3.0 \
+#     --batch_size 2 --num_steps 10000 --fake_rounds 2 \
+#     --out_dir ./outputs/dimo_cfg
+#
+# # Eval (compare 3 student modes vs teacher):
+# python scripts/eval_onestep_ursa.py \
+#     --teacher_ckpt /path/to/URSA \
+#     --student_ckpt ./outputs/dimo_cfg/final/student.pt \
+#     --modes no_cfg cfg baked --eval_cfg_scale 3.0 \
+#     --out_dir ./outputs/eval
diff --git a/URSA/configs/ursa_0.6b_fsq320.yaml b/URSA/configs/ursa_0.6b_fsq320.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b890e05c5496dca4cc7aff614b9452e5f540f8dd
--- /dev/null
+++ b/URSA/configs/ursa_0.6b_fsq320.yaml
@@ -0,0 +1,62 @@
+wandb:
+  run_id: null
+
+experiment:
+  project: ursa_0.6b_fsq320
+  log_every: 20
+  save_every: 5000
+  resume_from_checkpoint: latest
+
+model:
+  name: "transformer"
+  gradient_checkpointing: 2  # 1: +mlp_ckpt 2: +qkv_ckpt 3: +layer_ckpt
+  async_timestep: true
+  tokenizer:
+    params:
+      max_length: 320
+      truncation: true
+      padding_side: left
+      padding: max_length
+
+pipeline:
+  target: diffnext.pipelines.ursa.pipeline_train.URSATrainPipeline
+  paths:
+      pretrained_path: /path/to/URSA-0.6B-FSQ320
+      module_dict:
+        vae: ${pipeline.paths.pretrained_path}/vae
+        scheduler: ${pipeline.paths.pretrained_path}/scheduler
+        tokenizer: ${pipeline.paths.pretrained_path}/tokenizer
+        model_index: ${pipeline.paths.pretrained_path}/model_index.json
+
+optimizer:
+  target: torch.optim.AdamW
+  param_groups: false
+  params:
+    lr: 0.00003
+    betas: [0.9, 0.95]
+    weight_decay: 0.05
+    fused: true
+
+lr_scheduler:
+  target: diffnext.engine.lr_scheduler.CosineLR
+  params:
+    lr_max: ${optimizer.params.lr}
+    lr_min: 0.00001
+    max_steps: ${training.max_train_steps}
+    warmup_steps: 500
+
+train_dataloader:
+  target: diffnext.data.flex_loaders.FeatureDataLoader
+  params:
+    dataset: /path/to/fsq320_dataset
+    batch_size: ${training.batch_size}
+    seed: ${training.seed}
+    num_workers: 4
+    shuffle: true
+
+training:
+  gradient_accumulation_steps: 1
+  batch_size: 1  # * 256 = 256
+  max_train_steps: 20000
+  seed: 1337
+  mixed_precision: bf16
diff --git a/URSA/configs/ursa_0.6b_ibq1024.yaml b/URSA/configs/ursa_0.6b_ibq1024.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..352cb87b27ec6164a2c1d714be10068134ae0b37
--- /dev/null
+++ b/URSA/configs/ursa_0.6b_ibq1024.yaml
@@ -0,0 +1,62 @@
+wandb:
+  run_id: null
+
+experiment:
+  project: ursa_0.6b_ibq1024
+  log_every: 20
+  save_every: 5000
+  resume_from_checkpoint: latest
+
+model:
+  name: "transformer"
+  gradient_checkpointing: 2  # 1: +mlp_ckpt 2: +qkv_ckpt 3: +layer_ckpt
+  async_timestep: false
+  tokenizer:
+    params:
+      max_length: 320
+      truncation: true
+      padding_side: left
+      padding: max_length
+
+pipeline:
+  target: diffnext.pipelines.ursa.pipeline_train.URSATrainPipeline
+  paths:
+      pretrained_path: /path/to/URSA-0.6B-IBQ1024
+      module_dict:
+        vae: ${pipeline.paths.pretrained_path}/vae
+        scheduler: ${pipeline.paths.pretrained_path}/scheduler
+        tokenizer: ${pipeline.paths.pretrained_path}/tokenizer
+        model_index: ${pipeline.paths.pretrained_path}/model_index.json
+
+optimizer:
+  target: torch.optim.AdamW
+  param_groups: false
+  params:
+    lr: 0.00003
+    betas: [0.9, 0.95]
+    weight_decay: 0.05
+    fused: true
+
+lr_scheduler:
+  target: diffnext.engine.lr_scheduler.CosineLR
+  params:
+    lr_max: ${optimizer.params.lr}
+    lr_min: 0.00001
+    max_steps: ${training.max_train_steps}
+    warmup_steps: 500
+
+train_dataloader:
+  target: diffnext.data.flex_loaders.FeatureDataLoader
+  params:
+    dataset: /path/to/ibq1024_dataset
+    batch_size: ${training.batch_size}
+    seed: ${training.seed}
+    num_workers: 4
+    shuffle: true
+
+training:
+  gradient_accumulation_steps: 1
+  batch_size: 1  # * 512 = 512
+  max_train_steps: 120000
+  seed: 1337
+  mixed_precision: bf16
diff --git a/URSA/configs/ursa_1.7b_fsq320.yaml b/URSA/configs/ursa_1.7b_fsq320.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c2e5f945f904913b87b1809aff61e8beb38803ea
--- /dev/null
+++ b/URSA/configs/ursa_1.7b_fsq320.yaml
@@ -0,0 +1,62 @@
+wandb:
+  run_id: null
+
+experiment:
+  project: ursa_1.7b_fsq320
+  log_every: 20
+  save_every: 5000
+  resume_from_checkpoint: latest
+
+model:
+  name: "transformer"
+  gradient_checkpointing: 2  # 1: +mlp_ckpt 2: +qkv_ckpt 3: +layer_ckpt
+  async_timestep: true
+  tokenizer:
+    params:
+      max_length: 320
+      truncation: true
+      padding_side: left
+      padding: max_length
+
+pipeline:
+  target: diffnext.pipelines.ursa.pipeline_train.URSATrainPipeline
+  paths:
+      pretrained_path: /path/to/URSA-1.7B-FSQ320
+      module_dict:
+        vae: ${pipeline.paths.pretrained_path}/vae
+        scheduler: ${pipeline.paths.pretrained_path}/scheduler
+        tokenizer: ${pipeline.paths.pretrained_path}/tokenizer
+        model_index: ${pipeline.paths.pretrained_path}/model_index.json
+
+optimizer:
+  target: torch.optim.AdamW
+  param_groups: false
+  params:
+    lr: 0.00003
+    betas: [0.9, 0.95]
+    weight_decay: 0.05
+    fused: true
+
+lr_scheduler:
+  target: diffnext.engine.lr_scheduler.CosineLR
+  params:
+    lr_max: ${optimizer.params.lr}
+    lr_min: 0.00001
+    max_steps: ${training.max_train_steps}
+    warmup_steps: 500
+
+train_dataloader:
+  target: diffnext.data.flex_loaders.FeatureDataLoader
+  params:
+    dataset: /path/to/fsq320_dataset
+    batch_size: ${training.batch_size}
+    seed: ${training.seed}
+    num_workers: 4
+    shuffle: true
+
+training:
+  gradient_accumulation_steps: 1
+  batch_size: 1  # * 256 = 256
+  max_train_steps: 20000
+  seed: 1337
+  mixed_precision: bf16
diff --git a/URSA/configs/ursa_1.7b_ibq1024.yaml b/URSA/configs/ursa_1.7b_ibq1024.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5d2e122599f087447f137c67fbe799ec0a4efb0d
--- /dev/null
+++ b/URSA/configs/ursa_1.7b_ibq1024.yaml
@@ -0,0 +1,62 @@
+wandb:
+  run_id: null
+
+experiment:
+  project: ursa_1.7b_ibq1024
+  log_every: 20
+  save_every: 5000
+  resume_from_checkpoint: latest
+
+model:
+  name: "transformer"
+  gradient_checkpointing: 2  # 1: +mlp_ckpt 2: +qkv_ckpt 3: +layer_ckpt
+  async_timestep: false
+  tokenizer:
+    params:
+      max_length: 320
+      truncation: true
+      padding_side: left
+      padding: max_length
+
+pipeline:
+  target: diffnext.pipelines.ursa.pipeline_train.URSATrainPipeline
+  paths:
+      pretrained_path: /path/to/URSA-1.7B-IBQ1024
+      module_dict:
+        vae: ${pipeline.paths.pretrained_path}/vae
+        scheduler: ${pipeline.paths.pretrained_path}/scheduler
+        tokenizer: ${pipeline.paths.pretrained_path}/tokenizer
+        model_index: ${pipeline.paths.pretrained_path}/model_index.json
+
+optimizer:
+  target: torch.optim.AdamW
+  param_groups: false
+  params:
+    lr: 0.00003
+    betas: [0.9, 0.95]
+    weight_decay: 0.05
+    fused: true
+
+lr_scheduler:
+  target: diffnext.engine.lr_scheduler.CosineLR
+  params:
+    lr_max: ${optimizer.params.lr}
+    lr_min: 0.00001
+    max_steps: ${training.max_train_steps}
+    warmup_steps: 500
+
+train_dataloader:
+  target: diffnext.data.flex_loaders.FeatureDataLoader
+  params:
+    dataset: /path/to/ibq1024_dataset
+    batch_size: ${training.batch_size}
+    seed: ${training.seed}
+    num_workers: 4
+    shuffle: true
+
+training:
+  gradient_accumulation_steps: 1
+  batch_size: 1  # * 512 = 512
+  max_train_steps: 120000
+  seed: 1337
+  mixed_precision: bf16
diff --git a/URSA/diffnext/__init__.py b/URSA/diffnext/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4111073bdbc3d91c74c4a15e95caab0c862ac142
--- /dev/null
+++ b/URSA/diffnext/__init__.py
@@ -0,0 +1,16 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""DiffNext: A diffusers based library for autoregressive diffusion models."""
diff --git a/URSA/diffnext/__pycache__/__init__.cpython-312.pyc b/URSA/diffnext/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ad98ee2f574c347210c0cb22a0548d7d0d8e51e6
Binary files /dev/null and b/URSA/diffnext/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/diffnext/__pycache__/image_processor.cpython-312.pyc b/URSA/diffnext/__pycache__/image_processor.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5d01063c3880fda3be13430da0167a92cc3fa046
Binary files /dev/null and b/URSA/diffnext/__pycache__/image_processor.cpython-312.pyc differ
diff --git a/URSA/diffnext/data/__init__.py b/URSA/diffnext/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..90be158a6ac71e82598484e5d0d0be3efe593c25
--- /dev/null
+++ b/URSA/diffnext/data/__init__.py
@@ -0,0 +1,16 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Data components."""
diff --git a/URSA/diffnext/data/flex_loaders.py b/URSA/diffnext/data/flex_loaders.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a263adb7e6acfbe4a3d4a7ddfe56fa54093075f
--- /dev/null
+++ b/URSA/diffnext/data/flex_loaders.py
@@ -0,0 +1,172 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Flex data loaders."""
+
+import collections
+import multiprocessing as mp
+import time
+import threading
+import queue
+
+import codewithgpu
+import numpy as np
+
+from diffnext.data.flex_pipelines import FeatureWorker
+
+
+class BalancedQueues(object):
+    """Balanced queues."""
+
+    def __init__(self, base_queue, num=1):
+        self.queues = [base_queue]
+        self.queues += [mp.Queue(base_queue._maxsize) for _ in range(num - 1)]
+        self.index = 0
+
+    def put(self, obj, block=True, timeout=None):
+        q = self.queues[self.index]
+        q.put(obj, block=block, timeout=timeout)
+        self.index = (self.index + 1) % len(self.queues)
+
+    def get(self, block=True, timeout=None):
+        q = self.queues[self.index]
+        obj = q.get(block=block, timeout=timeout)
+        self.index = (self.index + 1) % len(self.queues)
+        return obj
+
+    def get_n(self, num=1):
+        outputs = []
+        while len(outputs) < num:
+            obj = self.get()
+            if obj is not None:
+                outputs.append(obj)
+        return outputs
+
+
+class DataLoaderBase(threading.Thread):
+    """Base class of data loader."""
+
+    def __init__(self, worker, **kwargs):
+        super().__init__(daemon=True)
+        self.seed = kwargs.pop("seed", 1337)
+        self.shuffle = kwargs.pop("shuffle", True)
+        self.shard_id = kwargs.get("shard_id", 0)
+        self.num_shards = kwargs.get("num_shards", 1)
+        self.batch_size = kwargs.get("batch_size", 1)
+        self.num_workers = kwargs.get("num_workers", 1)
+        self.queue_depth = kwargs.get("queue_depth", 2)
+        # Build queues.
+        self.reader_queue = mp.Queue(self.queue_depth * self.batch_size)
+        self.worker_queue = mp.Queue(self.queue_depth * self.batch_size)
+        self.batch_queue = queue.Queue(self.queue_depth)
+        self.reader_queue = BalancedQueues(self.reader_queue, self.num_workers)
+        self.worker_queue = BalancedQueues(self.worker_queue, self.num_workers)
+        # Build readers.
+        self.readers = [
+            codewithgpu.DatasetReader(
+                output_queue=self.reader_queue,
+                partition_id=self.shard_id,
+                num_partitions=self.num_shards,
+                seed=self.seed + self.shard_id,
+                shuffle=self.shuffle,
+                **kwargs,
+            )
+        ]
+        self.readers[0].start()
+        time.sleep(0.1)
+        # Build workers.
+        self.workers = []
+        for i in range(self.num_workers):
+            p = worker()
+            p.seed = self.seed + i + self.shard_id * self.num_workers
+            p.reader_queue = self.reader_queue.queues[i]
+            p.worker_queue = self.worker_queue.queues[i]
+            p.start()
+            self.workers.append(p)
+            time.sleep(0.1)
+
+        # Register cleanup callbacks.
+        def cleanup():
+            def terminate(processes):
+                for p in processes:
+                    p.terminate()
+                    p.join()
+
+            terminate(self.workers)
+            terminate(self.readers)
+
+        import atexit
+
+        atexit.register(cleanup)
+        # Start batch prefetching.
+        self.start()
+
+    def next(self):
+        """Return the next batch of data."""
+        return self.__next__()
+
+    def run(self):
+        """Main loop."""
+
+    def __call__(self):
+        return self.next()
+
+    def __iter__(self):
+        """Return the iterator self."""
+        return self
+
+    def __next__(self):
+        """Return the next batch of data."""
+        return [self.batch_queue.get()]
+
+
+class DataLoader(DataLoaderBase):
+    """Loader to return the batch of data."""
+
+    def __init__(self, dataset, worker, **kwargs):
+        kwargs.update({"path": dataset})  # Alias for codewithgpu.
+        self.contiguous = kwargs.pop("contiguous", True)
+        self.prefetch_count = kwargs.pop("prefetch_count", 50)
+        super().__init__(worker, **kwargs)
+
+    def run(self):
+        """Main loop."""
+        prev_inputs = self.worker_queue.get_n(self.prefetch_count * self.batch_size)
+        next_inputs = []
+        while True:
+            # Use cached buffer for next N inputs.
+            if len(next_inputs) == 0:
+                next_inputs = prev_inputs
+                prev_inputs = []
+            # Collect the next batch.
+            outputs = collections.defaultdict(list)
+            for _ in range(self.batch_size):
+                inputs = next_inputs.pop(0)
+                for k, v in inputs.items():
+                    outputs[k].extend(v)
+                prev_inputs += self.worker_queue.get_n(1)
+            # Stack batch data.
+            if self.contiguous:
+                if "latents" in outputs:
+                    outputs["latents"] = np.stack(outputs["latents"])
+            # Send batch data to consumer.
+            self.batch_queue.put(outputs)
+
+
+class FeatureDataLoader(DataLoader):
+    """Loader to return the batch of data features."""
+
+    def __init__(self, dataset, **kwargs):
+        super().__init__(dataset, FeatureWorker, **kwargs)
diff --git a/URSA/diffnext/data/flex_pipelines.py b/URSA/diffnext/data/flex_pipelines.py
new file mode 100644
index 0000000000000000000000000000000000000000..af52db37e8c3113281619922d9d02e5ed57d96a7
--- /dev/null
+++ b/URSA/diffnext/data/flex_pipelines.py
@@ -0,0 +1,63 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Flex data pipelines."""
+
+import multiprocessing
+
+import cv2
+import numpy.random as npr
+
+from diffnext.data import flex_transforms
+
+
+class Worker(multiprocessing.Process):
+    """Base data worker."""
+
+    def __init__(self):
+        super().__init__(daemon=True)
+        self.seed = 1337
+        self.reader_queue = None
+        self.worker_queue = None
+
+    def run(self):
+        """Run implementation."""
+        # Disable opencv threading and fix numpy random seed.
+        cv2.setNumThreads(1), npr.seed(self.seed)
+        while True:  # Main loop.
+            self.worker_queue.put(self.get_outputs(self.reader_queue.get()))
+
+
+class FeaturePipe(object):
+    """Pipeline to transform data features."""
+
+    def __init__(self):
+        super().__init__()
+        self.parse_latents = flex_transforms.ParseLatents()
+        self.parse_annotations = flex_transforms.ParseAnnotations()
+
+    def get_outputs(self, inputs):
+        """Return the outputs."""
+        latents = self.parse_latents(inputs)
+        label, caption = self.parse_annotations(inputs)
+        outputs = {"latents": [latents]}
+        outputs.setdefault("prompt", [label]) if label is not None else None
+        outputs.setdefault("prompt", [caption]) if caption is not None else None
+        outputs.setdefault("motion", [inputs["flow"]]) if "flow" in inputs else None
+        return outputs
+
+
+class FeatureWorker(FeaturePipe, Worker):
+    """Worker to transform data features."""
diff --git a/URSA/diffnext/data/flex_transforms.py b/URSA/diffnext/data/flex_transforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..a06f4b409cf04680c5faf1b6dc49975171e00a2a
--- /dev/null
+++ b/URSA/diffnext/data/flex_transforms.py
@@ -0,0 +1,66 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Flex data transforms."""
+
+import re
+import numpy as np
+import numpy.random as npr
+
+
+class Transform(object):
+    """Base transform type."""
+
+    def filter_outputs(self, *outputs):
+        outputs = [x for x in outputs if x is not None]
+        return outputs if len(outputs) > 1 else outputs[0]
+
+
+class ParseLatents(Transform):
+    """Parse VQ or VAE latents."""
+
+    def __init__(self):
+        super().__init__()
+
+    def __call__(self, inputs):
+        for k, dtype in zip(("moments", "codes"), ("float16", "int32")):
+            if k in inputs:
+                return np.frombuffer(inputs[k], dtype).reshape(inputs["shape"])
+        raise ValueError("Missing latents in inputs.")
+
+
+class ParseAnnotations(Transform):
+    """Parse ground-truth annotations."""
+
+    def __init__(self, short_prob=0.5):
+        super().__init__()
+        self.short_prob = short_prob
+
+    def __call__(self, inputs):
+        text = inputs.get("text", None)
+        label = inputs.get("label", None)
+        caption = inputs.get("caption", None)
+        if caption and isinstance(caption, dict):  # Cached.
+            caption = np.frombuffer(caption["data"], "float16").reshape(caption["shape"])
+            if text and isinstance(text, dict) and len(text["data"]) > 0 and npr.rand() < 0.5:
+                caption = np.frombuffer(text["data"], "float16").reshape(text["shape"])
+            return label, caption
+
+        # Improved short caption.
+        if label is None:
+            text_match = re.match(r"^(.*?[.!?])\s+", caption)
+            text = text if text else (text_match.group(1) if text_match else caption)
+            caption = text if text and npr.rand() < self.short_prob else caption
+        return label, caption
diff --git a/URSA/diffnext/engine/__init__.py b/URSA/diffnext/engine/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c40b63827cd0e1958c1bf954d4e6e9a60564e933
--- /dev/null
+++ b/URSA/diffnext/engine/__init__.py
@@ -0,0 +1,16 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Engine components."""
diff --git a/URSA/diffnext/engine/__pycache__/__init__.cpython-312.pyc b/URSA/diffnext/engine/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9e7a1c5f1f45c85d6708a96d82585aa6ac26c9b1
Binary files /dev/null and b/URSA/diffnext/engine/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/diffnext/engine/__pycache__/engine_utils.cpython-312.pyc b/URSA/diffnext/engine/__pycache__/engine_utils.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..79b3b7a375a5417abcc8064df59048b46c4584b3
Binary files /dev/null and b/URSA/diffnext/engine/__pycache__/engine_utils.cpython-312.pyc differ
diff --git a/URSA/diffnext/engine/__pycache__/lr_scheduler.cpython-312.pyc b/URSA/diffnext/engine/__pycache__/lr_scheduler.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6f66e6982ff65d1fe433ec042dadd8b049f9ab2f
Binary files /dev/null and b/URSA/diffnext/engine/__pycache__/lr_scheduler.cpython-312.pyc differ
diff --git a/URSA/diffnext/engine/engine_utils.py b/URSA/diffnext/engine/engine_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..c77b2002a22c619e57089d261c04009660e64ba2
--- /dev/null
+++ b/URSA/diffnext/engine/engine_utils.py
@@ -0,0 +1,109 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Engine utilities."""
+
+import collections
+import pickle
+
+import numpy as np
+import torch
+from torch import nn
+
+
+def count_params(module, trainable=True, unit="M"):
+    """Return the number of parameters."""
+    counts = [v.size().numel() for v in module.parameters() if v.requires_grad or (not trainable)]
+    return sum(counts) / {"M": 1e6, "B": 1e9}[unit]
+
+
+def freeze_module(module, trainable=False):
+    """Freeze parameters of given module."""
+    module.eval() if not trainable else module.train()
+    for param in module.parameters():
+        param.requires_grad = trainable
+    return module
+
+
+def get_device(index):
+    """Create the available device object."""
+    if torch.cuda.is_available():
+        return torch.device("cuda", index)
+    for device_type in ("mps",):
+        try:
+            if getattr(torch.backends, device_type).is_available():
+                return torch.device(device_type, index)
+        except AttributeError:
+            pass
+    return torch.device("cpu")
+
+
+def get_param_groups(model):
+    """Separate parameters into groups."""
+    memo, groups, lr_scale_getter = set(), collections.OrderedDict(), None
+    norm_types = (nn.BatchNorm2d, nn.GroupNorm, nn.SyncBatchNorm, nn.LayerNorm)
+    for module_name, module in model.named_modules():
+        for param_name, param in module.named_parameters(recurse=False):
+            if not param.requires_grad or param in memo:
+                continue
+            memo.add(param)
+            attrs = collections.OrderedDict()
+            if lr_scale_getter:
+                attrs["lr_scale"] = lr_scale_getter(f"{module_name}.{param_name}")
+            if hasattr(param, "lr_scale"):
+                attrs["lr_scale"] = param.lr_scale
+            if getattr(param, "no_weight_decay", False) or isinstance(module, norm_types):
+                attrs["weight_decay"] = 0
+            group_name = "/".join(["%s:%s" % (v[0], v[1]) for v in list(attrs.items())])
+            groups[group_name] = groups.get(group_name, {**attrs, **{"params": []}})
+            groups[group_name]["params"].append(param)
+    return list(groups.values())
+
+
+def load_weights(module, weights_file, prefix_removed="", strict=True):
+    """Load a weights file."""
+    if not weights_file:
+        return
+    if weights_file.endswith(".pkl"):
+        with open(weights_file, "rb") as f:
+            state_dict = pickle.load(f)
+            for k, v in state_dict.items():
+                state_dict[k] = torch.as_tensor(v)
+    else:
+        state_dict = torch.load(weights_file, map_location="cpu", weights_only=False)
+    if prefix_removed:
+        new_state_dict = type(state_dict)()
+        for k in list(state_dict.keys()):
+            if k.startswith(prefix_removed):
+                new_state_dict[k.replace(prefix_removed, "")] = state_dict.pop(k)
+        state_dict = new_state_dict
+    module.load_state_dict(state_dict, strict=strict)
+
+
+def manual_seed(seed, device_and_seed=None):
+    """Set the cpu and device random seed."""
+    torch.manual_seed(seed)
+    if device_and_seed is not None:
+        device_index, device_seed = device_and_seed
+        device_type = get_device(device_index).type
+        np.random.seed(device_seed)
+        if device_type in ("cuda", "mps"):
+            getattr(torch, device_type).manual_seed(device_seed)
+
+
+def synchronize_device(device):
+    """Synchronize the computation of device."""
+    if device.type in ("cuda", "mps"):
+        getattr(torch, device.type).synchronize(device)
diff --git a/URSA/diffnext/engine/lr_scheduler.py b/URSA/diffnext/engine/lr_scheduler.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b11cdff6a2c26aa36c42067069c7276c68fd487
--- /dev/null
+++ b/URSA/diffnext/engine/lr_scheduler.py
@@ -0,0 +1,76 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Learning rate schedulers."""
+
+import math
+
+
+class ConstantLR(object):
+    """Constant LR scheduler."""
+
+    def __init__(self, **kwargs):
+        self._lr_max = kwargs.pop("lr_max")
+        self._lr_min = kwargs.pop("lr_min", 0)
+        self._warmup_steps = kwargs.pop("warmup_steps", 0)
+        self._warmup_factor = kwargs.pop("warmup_factor", 0.001)
+        self._step_count, self._last_decay = 0, 1.0
+
+    def step(self):
+        self._step_count += 1
+
+    def get_lr(self):
+        if self._step_count < self._warmup_steps:
+            alpha = (self._step_count + 1.0) / self._warmup_steps
+            return self._lr_max * (alpha + (1.0 - alpha) * self._warmup_factor)
+        return self._lr_min + (self._lr_max - self._lr_min) * self.get_decay()
+
+    def get_decay(self):
+        return self._last_decay
+
+
+class CosineLR(ConstantLR):
+    """LR scheduler with cosine decay."""
+
+    def __init__(self, lr_max, max_steps, lr_min=0, decay_step=1, **kwargs):
+        super().__init__(lr_max=lr_max, lr_min=lr_min, **kwargs)
+        self._decay_step, self._max_steps = decay_step, max_steps
+
+    def get_decay(self):
+        t = self._step_count - self._warmup_steps
+        t_max = self._max_steps - self._warmup_steps
+        if t > 0 and t % self._decay_step == 0:
+            self._last_decay = 0.5 * (1.0 + math.cos(math.pi * t / t_max))
+        return self._last_decay
+
+
+class MultiStepLR(ConstantLR):
+    """LR scheduler with multi-steps decay."""
+
+    def __init__(self, lr_max, decay_steps, decay_gamma, **kwargs):
+        super().__init__(lr_max=lr_max, **kwargs)
+        self._decay_steps, self._decay_gamma = decay_steps, decay_gamma
+        self._stage_count, self._num_stages = 0, len(decay_steps)
+
+    def get_decay(self):
+        if self._stage_count < self._num_stages:
+            k = self._decay_steps[self._stage_count]
+            while self._step_count >= k:
+                self._stage_count += 1
+                if self._stage_count >= self._num_stages:
+                    break
+                k = self._decay_steps[self._stage_count]
+            self._last_decay = self._decay_gamma**self._stage_count
+        return self._last_decay
diff --git a/URSA/diffnext/engine/model_ema.py b/URSA/diffnext/engine/model_ema.py
new file mode 100644
index 0000000000000000000000000000000000000000..e28d7661eeb27792d28d27a900712fbf5f3a94ac
--- /dev/null
+++ b/URSA/diffnext/engine/model_ema.py
@@ -0,0 +1,44 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Exponential Moving Average (EMA) of model updates."""
+
+import copy
+import torch
+
+
+class ModelEMA(torch.nn.Module):
+    """Model Exponential Moving Average."""
+
+    def __init__(self, model, decay=0.99, update_every=100, device="gpu"):
+        super().__init__()
+        self.decay = decay
+        self.update_every = update_every
+        self.model = copy.deepcopy(model).eval()
+        self.model._apply(lambda t: t.float() if t.requires_grad else t) if decay < 1 else None
+        [setattr(p, "requires_grad", False) for p in self.model.parameters()]
+        self.model.cpu() if device == "cpu" else None
+
+    def forward(self, *args, **kwargs):
+        return self.model(*args, **kwargs)
+
+    @torch.no_grad()
+    def update(self, model):
+        for ema_v, model_v in zip(self.model.parameters(), model.parameters()):
+            if not model_v.requires_grad:
+                continue
+            new_value = model_v.data.float()
+            value = ema_v.to(device=new_value.device)
+            ema_v.copy_(value.mul_(self.decay).add_(new_value, alpha=1 - self.decay))
diff --git a/URSA/diffnext/engine/train_engine.py b/URSA/diffnext/engine/train_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f3a4668b96d6a58735c754dcef2890b34f04b40
--- /dev/null
+++ b/URSA/diffnext/engine/train_engine.py
@@ -0,0 +1,195 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Custom trainer focused on data parallelism specialization."""
+
+import collections
+import os
+import shutil
+
+import torch
+
+from diffnext.engine import engine_utils
+from diffnext.engine.model_ema import ModelEMA
+from diffnext.pipelines.builder import build_pipeline
+from diffnext.pipelines.builder import get_pipeline_path
+from diffnext.utils import accelerate_utils
+from diffnext.utils import profiler
+from diffnext.utils.omegaconf_utils import config_to_class
+from diffnext.utils.omegaconf_utils import config_to_object
+
+
+class Trainer(object):
+    """Schedule the iterative model training."""
+
+    def __init__(self, config, accelerator, logger):
+        """Create a trainer instance."""
+        self.config, self.accelerator, self.logger = config, accelerator, logger
+        self.dtype = accelerate_utils.precision_to_dtype(config.training.mixed_precision)
+        self.train_dataloader = config_to_object(config.train_dataloader)
+        self.pipe_path = get_pipeline_path(**config.pipeline.paths)
+        self.pipe = build_pipeline(self.pipe_path, config_to_class(config.pipeline), self.dtype)
+        self.pipe = self.pipe.to(device=engine_utils.get_device(config.training.gpu_id))
+        self.ema = ModelEMA(self.pipe.model, **config.ema.params) if "ema" in config else None
+        self.model = self.pipe.configure_model(config, accelerator, logger)
+        param_groups = [_ for _ in self.model.parameters() if _.requires_grad]
+        if config.optimizer.get("param_groups", True):
+            param_groups = engine_utils.get_param_groups(self.model)
+        self.optimizer = config_to_object(config.optimizer, params=param_groups)
+        self.scheduler = config_to_object(config.lr_scheduler)
+        self.model, self.optimizer = self.accelerator.prepare(self.model, self.optimizer)
+        if config.training.get("sequence_parallel_size", 1) > 1:
+            if not hasattr(self.pipe.model, "configure_sequence_parallel"):
+                raise RuntimeError("Model does not support sequence parallelism.")
+            self.pipe.model.configure_sequence_parallel(self.model.seq_parallel_group)
+        self.metrics = collections.OrderedDict()
+        if self.ema and config.experiment.resume_iter > 0:
+            ckpt = config.experiment.resume_from_checkpoint
+            ema_ckpt = ckpt.replace("checkpoints", "ema_checkpoints")
+            ema_weights = os.path.join(ema_ckpt, config.model.name, "diffusion_pytorch_model.bin")
+            engine_utils.load_weights(self.ema.model, ema_weights)
+
+    @property
+    def global_step(self) -> int:
+        """Return the global iteration step.
+
+        Returns:
+            int: The global step.
+        """
+        return self.scheduler._step_count
+
+    def save(self):
+        """Save the checkpoint of current iterative step."""
+        f = "checkpoint-{}/{}".format(self.global_step, self.config.model.name)
+        f = os.path.join(self.config.experiment.output_dir, "checkpoints", f)
+        if self.accelerator.is_main_process and not os.path.exists(f):
+            self.model.save_pretrained(f, safe_serialization=False)
+            self.logger.info("Wrote snapshot to: {:s}".format(f))
+            if self.ema is not None:
+                config_json = os.path.join(f, "config.json")
+                f = f.replace("checkpoints", "ema_checkpoints")
+                os.makedirs(f), shutil.copy(config_json, os.path.join(f, "config.json"))
+                f = os.path.join(f, "diffusion_pytorch_model.bin")
+                torch.save(self.ema.model.state_dict(), f)
+
+    def add_metrics(self, stats):
+        """Add or update the metrics.
+
+        Args:
+            stats (Dict)
+                The current iteration stats.
+        """
+        for k, v in stats["metrics"].items():
+            if k not in self.metrics:
+                self.metrics[k] = profiler.SmoothedValue()
+            self.metrics[k].update(v)
+
+    def log_metrics(self, stats):
+        """Send metrics to available trackers.
+
+        Args:
+            stats (Dict)
+                The current iteration stats.
+        """
+        iter_template = "Iteration %d, lr = %.8f, time = %.2fs"
+        metric_template = " " * 4 + "Train net output({}): {}"
+        [self.logger.info(iter_template % (stats["step"], stats["lr"], stats["time"]))]
+        [self.logger.info(metric_template.format(k, v)) for k, v in self.metrics.items()]
+        tracker_logs = dict((k, v.median) for k, v in self.metrics.items())
+        tracker_logs.update({"lr": stats["lr"], "time": stats["time"]})
+        self.accelerator.log(tracker_logs, step=stats["step"])
+        self.metrics.clear()
+
+    def run_model(self, inputs, metrics, accum_steps=1):
+        """Run multiple model steps.
+
+        Args:
+            inputs (Dict)
+                The model inputs.
+            metrics (Dict)
+                The current iteration metrics.
+            accum_step (int, optional, defaults to 1)
+                The gradient accumulation steps.
+
+        """
+        for _ in range(accum_steps):
+            inputs = inputs if inputs else self.train_dataloader.next()[0]
+            outputs, losses = self.model(inputs), []
+            for k, v in outputs.items():
+                if "loss" not in k and "metric" not in k:
+                    continue
+                if isinstance(v, torch.Tensor) and v.requires_grad:
+                    losses.append(v)
+                if k.startswith("metric/"):  # Custom metrics.
+                    metrics[k[len("metric/") :]] += float(v.mean()) / accum_steps
+                elif f"metric/{k}" not in outputs:  # Legacy metrics.
+                    metrics[k] += float(self.accelerator.gather(v).mean()) / accum_steps
+            losses = sum(losses[1:], losses[0])
+            self.accelerator.accumulate().__enter__()
+            self.accelerator.backward(losses)
+
+    def run_step(self, inputs, accum_steps=1) -> dict:
+        """Run single iteration step.
+
+        Args:
+            inputs (Dict)
+                The model inputs.
+            accum_step (int, optional, defaults to 1)
+                The gradient accumulation steps.
+
+        Returns:
+            Dict: The current iteration stats.
+        """
+        stats = {"step": self.global_step}
+        metrics = collections.defaultdict(float)
+        timer = profiler.Timer().tic()
+        stats["lr"] = self.scheduler.get_lr()
+        for group in self.optimizer.param_groups:
+            group["lr"] = stats["lr"] * group.get("lr_scale", 1.0)
+        self.run_model(inputs, metrics, accum_steps)
+        self.optimizer.step()
+        self.optimizer.zero_grad(set_to_none=True)
+        self.scheduler.step()
+        stats["time"] = timer.toc()
+        stats["metrics"] = collections.OrderedDict(sorted(metrics.items()))
+        return stats
+
+    def train_loop(self):
+        """Training loop."""
+        timer = profiler.Timer()
+        max_steps = self.config.training.max_train_steps
+        accum_steps = self.config.training.gradient_accumulation_steps
+        log_every = self.config.experiment.log_every
+        save_every = self.config.experiment.save_every
+        data_every, inputs = self.config.training.get("data_every", -1), {}
+        self.scheduler._step_count = self.config.experiment.get("resume_iter", 0)
+        while self.global_step < max_steps:
+            if data_every >= 1 and self.global_step % data_every == 0:
+                inputs = self.train_dataloader.next()[0]
+            with timer.tic_and_toc():
+                stats = self.run_step(inputs, accum_steps)
+            self.add_metrics(stats)
+            if stats["step"] % log_every == 0:
+                self.log_metrics(stats)
+            if self.global_step % (10 * log_every) == 0:
+                self.logger.info(profiler.get_progress(timer, self.global_step, max_steps))
+            if self.ema and self.global_step % self.ema.update_every == 0:
+                self.ema.update(self.model)
+            if self.global_step % save_every == 0:
+                self.save()
+        stats["step"] = self.global_step
+        self.log_metrics({**stats, **{"step": self.global_step}})
+        self.accelerator.wait_for_everyone()
+        self.accelerator.end_training()
diff --git a/URSA/diffnext/image_processor.py b/URSA/diffnext/image_processor.py
new file mode 100644
index 0000000000000000000000000000000000000000..2420e79d74dd34897b68ce703770b38699f34982
--- /dev/null
+++ b/URSA/diffnext/image_processor.py
@@ -0,0 +1,105 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Image processor."""
+
+from typing import List, Union
+
+import numpy as np
+import PIL.Image
+import torch
+from torch import nn
+
+from diffusers.configuration_utils import ConfigMixin
+
+
+class VaeImageProcessor(ConfigMixin):
+    """Image processor for VAE."""
+
+    def postprocess(
+        self, image: torch.Tensor, output_type: str = "pil"
+    ) -> Union[PIL.Image.Image, np.ndarray, torch.Tensor]:
+        """Postprocess the image output from tensor.
+
+        Args:
+            image (torch.Tensor):
+                The image tensor.
+            output_type (str, *optional*, defaults to `pil`):
+                The output image type, can be one of `pil`, `np`, `pt`, `latent`.
+
+        Returns:
+            Union[PIL.Image.Image, np.ndarray, torch.Tensor]: The postprocessed image.
+        """
+        if output_type == "latent" or output_type == "pt":
+            return image
+        image = self.pt_to_numpy(image)
+        if output_type == "np":
+            return image
+        if output_type == "pil":
+            return self.numpy_to_pil(image)
+        return image
+
+    @staticmethod
+    @torch.no_grad()
+    def decode_latents(vae: nn.Module, latents: torch.Tensor, vae_batch_size=1) -> torch.Tensor:
+        """Decode VAE latents.
+
+        Args:
+            vae (torch.nn.Module):
+                The VAE model.
+            latents (torch.Tensor):
+                The input latents.
+            vae_batch_size (int, *optional*, defaults to 1)
+                The maximum images in a batch to decode.
+
+        Returns:
+            torch.Tensor: The output tensor.
+
+        """
+        x, batch_size = vae.unscale_(latents), latents.size(0)
+        sizes, splits = [vae_batch_size] * (batch_size // vae_batch_size), []
+        sizes += [batch_size - sum(sizes)] if sum(sizes) != batch_size else []
+        for x_split in x.split(sizes) if len(sizes) > 1 else [x]:
+            splits.append(vae.decode(x_split).sample)
+        return torch.cat(splits) if len(splits) > 1 else splits[0]
+
+    @staticmethod
+    def pt_to_numpy(images: torch.Tensor) -> np.ndarray:
+        """Convert images from a torch tensor to a numpy array.
+
+        Args:
+            images (torch.Tensor):
+                The image tensor.
+
+        Returns:
+            np.ndarry: The image array.
+        """
+        x = images.permute(0, 2, 3, 4, 1) if images.dim() == 5 else images.permute(0, 2, 3, 1)
+        return x.mul(127.5).add_(127.5).clamp(0, 255).byte().cpu().numpy()
+
+    @staticmethod
+    def numpy_to_pil(images: np.ndarray) -> List[PIL.Image.Image]:
+        """Convert images from a numpy array to a list of PIL objects.
+
+        Args:
+            images (np.ndarray):
+                The image array.
+
+        Returns:
+            List[PIL.Image.Image]: A list of PIL images.
+        """
+        images = images[None, ...] if images.ndim == 3 else images
+        images = images.reshape((-1,) + images.shape[2:]) if images.ndim == 5 else images
+        return [PIL.Image.fromarray(image) for image in images]
diff --git a/URSA/diffnext/models/__init__.py b/URSA/diffnext/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5dba9c7ce2aff73501c11425026c9d293bc2d827
--- /dev/null
+++ b/URSA/diffnext/models/__init__.py
@@ -0,0 +1,16 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Models."""
diff --git a/URSA/diffnext/models/__pycache__/flex_attention.cpython-312.pyc b/URSA/diffnext/models/__pycache__/flex_attention.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b96f0838c23d86226995c2a24dfda9e61845b329
Binary files /dev/null and b/URSA/diffnext/models/__pycache__/flex_attention.cpython-312.pyc differ
diff --git a/URSA/diffnext/models/autoencoders/__init__.py b/URSA/diffnext/models/autoencoders/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa6b4a3edaab0f4ed2ef6764c555a628688cf69a
--- /dev/null
+++ b/URSA/diffnext/models/autoencoders/__init__.py
@@ -0,0 +1,16 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Autoencoders."""
diff --git a/URSA/diffnext/models/autoencoders/autoencoder_kl.py b/URSA/diffnext/models/autoencoders/autoencoder_kl.py
new file mode 100644
index 0000000000000000000000000000000000000000..6decaa8edf5df967474133de7b31d7ed8f4b8015
--- /dev/null
+++ b/URSA/diffnext/models/autoencoders/autoencoder_kl.py
@@ -0,0 +1,226 @@
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Simple implementation of AutoEncoderKL."""
+
+import torch
+from torch import nn
+
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.models.modeling_outputs import AutoencoderKLOutput
+from diffusers.models.modeling_utils import ModelMixin
+
+from diffnext.models.autoencoders.modeling_utils import DecoderOutput
+from diffnext.models.autoencoders.modeling_utils import DiagonalGaussianDistribution
+from diffnext.models.autoencoders.modeling_utils import IdentityDistribution
+
+
+class Attention(nn.Module):
+    """Multi-headed attention."""
+
+    def __init__(self, dim, num_heads=1):
+        super(Attention, self).__init__()
+        self.num_heads = num_heads or dim // 64
+        self.head_dim = dim // self.num_heads
+        self.group_norm = nn.GroupNorm(32, dim, eps=1e-6)
+        self.to_q, self.to_k, self.to_v = [nn.Linear(dim, dim) for _ in range(3)]
+        self.to_out = nn.ModuleList([nn.Linear(dim, dim)])
+        self._from_deprecated_attn_block = True  # Fix for diffusers>=0.15.0
+
+    def forward(self, x) -> torch.Tensor:
+        x, shape = self.group_norm(x), (-1,) + x.shape[1:]
+        x = x.flatten(2).transpose(1, 2).contiguous()
+        qkv_shape = (-1, x.size(1), self.num_heads, self.head_dim)
+        q, k, v = [f(x).view(qkv_shape).transpose(1, 2) for f in (self.to_q, self.to_k, self.to_v)]
+        o = nn.functional.scaled_dot_product_attention(q, k, v).transpose(1, 2)
+        return self.to_out[0](o.flatten(2)).transpose(1, 2).reshape(shape)
+
+
+class Resize(nn.Module):
+    """Resize layer."""
+
+    def __init__(self, dim, downsample=1):
+        super(Resize, self).__init__()
+        self.conv = nn.Conv2d(dim, dim, 3, 2, 0) if downsample else None
+        self.conv = nn.Conv2d(dim, dim, 3, 1, 1) if not downsample else self.conv
+        self.downsample, self.upsample = downsample, int(not downsample)
+
+    def forward(self, x) -> torch.Tensor:
+        x = nn.functional.pad(x, (0, 1, 0, 1)) if self.downsample else x
+        return self.conv(nn.functional.interpolate(x, None, (2, 2)) if self.upsample else x)
+
+
+class ResBlock(nn.Module):
+    """Resnet block."""
+
+    def __init__(self, dim, out_dim):
+        super(ResBlock, self).__init__()
+        self.norm1 = nn.GroupNorm(32, dim, eps=1e-6)
+        self.conv1 = nn.Conv2d(dim, out_dim, 3, 1, 1)
+        self.norm2 = nn.GroupNorm(32, out_dim, eps=1e-6)
+        self.conv2 = nn.Conv2d(out_dim, out_dim, 3, 1, 1)
+        self.conv_shortcut = nn.Conv2d(dim, out_dim, 1) if out_dim != dim else None
+        self.nonlinearity = nn.SiLU()
+
+    def forward(self, x) -> torch.Tensor:
+        shortcut = self.conv_shortcut(x) if self.conv_shortcut else x
+        x = self.conv1(self.nonlinearity(self.norm1(x)))
+        return self.conv2(self.nonlinearity(self.norm2(x))).add_(shortcut)
+
+
+class UNetResBlock(nn.Module):
+    """UNet resnet block."""
+
+    def __init__(self, dim, out_dim, depth=2, downsample=0, upsample=0):
+        super(UNetResBlock, self).__init__()
+        block_dims = [(out_dim, out_dim) if i > 0 else (dim, out_dim) for i in range(depth)]
+        self.resnets = nn.ModuleList(ResBlock(*dims) for dims in block_dims)
+        self.attentions = nn.ModuleList()  # Legacy AttnBlock.
+        self.downsamplers = nn.ModuleList([Resize(out_dim, 1)]) if downsample else []
+        self.upsamplers = nn.ModuleList([Resize(out_dim, 0)]) if upsample else []
+
+    def forward(self, x) -> torch.Tensor:
+        for i, resnet in enumerate(self.resnets):
+            x = resnet(x)
+            x = self.attentions[i](x).add_(x) if i < len(self.attentions) else x
+        x = self.downsamplers[0](x) if self.downsamplers else x
+        return self.upsamplers[0](x) if self.upsamplers else x
+
+
+class UNetMidBlock(nn.Module):
+    """UNet mid block."""
+
+    def __init__(self, dim, num_heads=1, depth=1):
+        super(UNetMidBlock, self).__init__()
+        self.resnets = nn.ModuleList(ResBlock(dim, dim) for _ in range(depth + 1))
+        self.attentions = nn.ModuleList(Attention(dim, num_heads) for _ in range(depth))
+
+    def forward(self, x) -> torch.Tensor:
+        x = self.resnets[0](x)
+        for attn, resnet in zip(self.attentions, self.resnets[1:]):
+            x = resnet(attn(x).add_(x))
+        return x
+
+
+class Encoder(nn.Module):
+    """VAE encoder."""
+
+    def __init__(self, dim, out_dim, block_dims, block_depth=2):
+        super(Encoder, self).__init__()
+        self.conv_in = nn.Conv2d(dim, block_dims[0], 3, 1, 1)
+        self.down_blocks = nn.ModuleList()
+        for i, block_dim in enumerate(block_dims):
+            downsample = 1 if i < (len(block_dims) - 1) else 0
+            args = (block_dims[max(i - 1, 0)], block_dim, block_depth)
+            self.down_blocks += [UNetResBlock(*args, downsample=downsample)]
+        self.mid_block = UNetMidBlock(block_dims[-1])
+        self.conv_act = nn.SiLU()
+        self.conv_norm_out = nn.GroupNorm(32, block_dims[-1], eps=1e-6)
+        self.conv_out = nn.Conv2d(block_dims[-1], out_dim, 3, 1, 1)
+
+    def forward(self, x) -> torch.Tensor:
+        x = self.conv_in(x)
+        for blk in self.down_blocks:
+            x = blk(x)
+        x = self.mid_block(x)
+        return self.conv_out(self.conv_act(self.conv_norm_out(x)))
+
+
+class Decoder(nn.Module):
+    """VAE decoder."""
+
+    def __init__(self, dim, out_dim, block_dims, block_depth=2):
+        super(Decoder, self).__init__()
+        block_dims = list(reversed(block_dims))
+        self.up_blocks = nn.ModuleList()
+        for i, block_dim in enumerate(block_dims):
+            upsample = 1 if i < (len(block_dims) - 1) else 0
+            args = (block_dims[max(i - 1, 0)], block_dim, block_depth + 1)
+            self.up_blocks += [UNetResBlock(*args, upsample=upsample)]
+        self.conv_in = nn.Conv2d(dim, block_dims[0], 3, 1, 1)
+        self.mid_block = UNetMidBlock(block_dims[0])
+        self.conv_act = nn.SiLU()
+        self.conv_norm_out = nn.GroupNorm(32, block_dims[-1], eps=1e-6)
+        self.conv_out = nn.Conv2d(block_dims[-1], out_dim, 3, 1, 1)
+
+    def forward(self, x) -> torch.Tensor:
+        x = self.conv_in(x)
+        x = self.mid_block(x)
+        for blk in self.up_blocks:
+            x = blk(x)
+        return self.conv_out(self.conv_act(self.conv_norm_out(x)))
+
+
+class AutoencoderKL(ModelMixin, ConfigMixin):
+    """AutoEncoder KL."""
+
+    @register_to_config
+    def __init__(
+        self,
+        in_channels=3,
+        out_channels=3,
+        down_block_types=("DownEncoderBlock2D",) * 4,
+        up_block_types=("UpDecoderBlock2D",) * 4,
+        block_out_channels=(128, 256, 512, 512),
+        layers_per_block=2,
+        act_fn="silu",
+        latent_channels=16,
+        norm_num_groups=32,
+        sample_size=1024,
+        scaling_factor=0.18215,
+        shift_factor=None,
+        latents_mean=None,
+        latents_std=None,
+        force_upcast=True,
+        double_z=True,
+        use_quant_conv=True,
+        use_post_quant_conv=True,
+    ):
+        super(AutoencoderKL, self).__init__()
+        channels, layers = block_out_channels, layers_per_block
+        self.encoder = Encoder(in_channels, (1 + double_z) * latent_channels, channels, layers)
+        self.decoder = Decoder(latent_channels, out_channels, channels, layers)
+        quant_conv_type = type(self.decoder.conv_in) if use_quant_conv else nn.Identity
+        post_quant_conv_type = type(self.decoder.conv_in) if use_post_quant_conv else nn.Identity
+        self.quant_conv = quant_conv_type(*([(1 + double_z) * latent_channels] * 2 + [1]))
+        self.post_quant_conv = post_quant_conv_type(latent_channels, latent_channels, 1)
+        self.latent_dist = DiagonalGaussianDistribution if double_z else IdentityDistribution
+
+    def scale_(self, x) -> torch.Tensor:
+        """Scale the input latents."""
+        x.add_(-self.config.shift_factor) if self.config.shift_factor else None
+        return x.mul_(self.config.scaling_factor)
+
+    def unscale_(self, x) -> torch.Tensor:
+        """Unscale the input latents."""
+        x.mul_(1 / self.config.scaling_factor)
+        return x.add_(self.config.shift_factor) if self.config.shift_factor else x
+
+    def encode(self, x) -> AutoencoderKLOutput:
+        """Encode the input samples."""
+        z = self.quant_conv(self.encoder(self.forward(x)))
+        posterior = self.latent_dist(z)
+        return AutoencoderKLOutput(latent_dist=posterior)
+
+    def decode(self, z) -> DecoderOutput:
+        """Decode the input latents."""
+        t = z.size(2) if z.dim() == 5 else 1
+        z = z.transpose(1, 2).flatten(0, 1) if t > 1 else z
+        z = z.squeeze_(2) if z.dim() == 5 else z
+        x = self.decoder(self.post_quant_conv(self.forward(z)))
+        x = x.view(-1, t, *x.shape[1:]).transpose(1, 2) if t > 1 else x
+        return DecoderOutput(sample=x)
+
+    def forward(self, x):  # NOOP.
+        return x
diff --git a/URSA/diffnext/models/autoencoders/autoencoder_kl_cogvideox.py b/URSA/diffnext/models/autoencoders/autoencoder_kl_cogvideox.py
new file mode 100644
index 0000000000000000000000000000000000000000..61958cbad8411411f315b70f0a0ccb6aac12ea90
--- /dev/null
+++ b/URSA/diffnext/models/autoencoders/autoencoder_kl_cogvideox.py
@@ -0,0 +1,262 @@
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+"""Simple implementation of AutoEncoderKL for CogVideoX."""
+
+import torch
+import torch.nn as nn
+
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.models.modeling_outputs import AutoencoderKLOutput
+from diffusers.models.modeling_utils import ModelMixin
+
+from diffnext.models.autoencoders.modeling_utils import DiagonalGaussianDistribution
+from diffnext.models.autoencoders.modeling_utils import DecoderOutput, TilingMixin
+
+
+class Conv3d(nn.Conv3d):
+    """3D convolution layer."""
+
+    def __init__(self, *args, **kwargs):
+        super(Conv3d, self).__init__(*args, **kwargs)
+        self.padding = (0,) + self.padding[1:]
+        self.pad = nn.ReplicationPad3d((0,) * 4 + (self.kernel_size[0] - 1, 0))
+        self.pad = nn.Identity() if self.kernel_size[0] == 1 else self.pad
+
+    def forward(self, x) -> torch.Tensor:
+        x = self.pad(x)
+        num_splits = x.numel() // 1073741824 + 1
+        if num_splits == 1 or x.size(2) <= 3:
+            return super().forward(x)
+        if self.kernel_size[0] == 1:
+            return torch.cat([super(Conv3d, self).forward(x) for x in x.chunk(num_splits, 2)], 2)
+        x, ks = list(x.chunk(num_splits, 2)), self.kernel_size[0]
+        for i in range(num_splits - 1, -1, -1):
+            x[i] = super().forward(torch.cat((x[i - 1][:, :, -ks + 1 :], x[i]), 2) if i else x[i])
+        return torch.cat(x, 2)
+
+
+class AdaGroupNorm(nn.GroupNorm):
+    """Adaptive group normalization layer."""
+
+    def __init__(self, dim, z_dim=None, num_groups=32, eps=1e-6):
+        super(AdaGroupNorm, self).__init__(num_groups, dim, eps=eps)
+        self.scale = Conv3d(z_dim, dim, 1) if z_dim else None
+        self.shift = Conv3d(z_dim, dim, 1) if z_dim else None
+
+    def forward(self, x, z=None) -> torch.Tensor:
+        if not self.scale or z is None:
+            return super().forward(x)
+        t, h, w = x.shape[2:]
+        if t > 1 and t % 2 == 1:
+            _ = nn.functional.interpolate(z[:, :, :1], (1, h, w))
+            z = torch.cat([_, nn.functional.interpolate(z[:, :, 1:], (t - 1, h, w))], 2)
+        else:
+            z = nn.functional.interpolate(z, (t, h, w))
+        return super().forward(x).mul_(self.scale(z)).add_(self.shift(z))
+
+
+class Resize(nn.Module):
+    """Resize layer."""
+
+    def __init__(self, dim, downsample=1, upsample=0):
+        super(Resize, self).__init__()
+        self.downsample, self.upsample = downsample, upsample
+        self.conv = nn.Conv2d(dim, dim, 3, 2, 0) if downsample else None
+        self.conv = nn.Conv2d(dim, dim, 3, 1, 1) if upsample else self.conv
+
+    def forward(self, x) -> torch.Tensor:
+        c, t, h, w = x.shape[1:]
+        if self.downsample == 2 and t > 1:
+            x = x.permute(0, 3, 4, 1, 2).reshape((-1, c, t))
+            x = torch.cat([x[..., :1], nn.functional.avg_pool1d(x[..., 1:], 2, 2)], dim=-1)
+            x = x.view(-1, h, w, c, x.size(-1)).permute(0, 4, 3, 1, 2)
+        elif self.upsample == 2 and t > 1:
+            x1 = x[:, :, :1].repeat_interleave(2, 3).repeat_interleave(2, 4)
+            x2 = x[:, :, 1:].repeat_interleave(2, 2).repeat_interleave(2, 3).repeat_interleave(2, 4)
+            x = torch.cat([x1, x2], dim=2) if x1 is not None else x2
+        elif self.downsample:
+            x = x.permute(0, 2, 1, 3, 4)
+        elif self.upsample:
+            x = x.repeat_interleave(2, 3).repeat_interleave(2, 4)
+        if self.downsample:
+            t, c, h, w = x.shape[1:]
+            x = self.conv(nn.functional.pad(x.flatten(0, 1), (0, 1, 0, 1)))
+        elif self.upsample:
+            c, t, h, w = x.shape[1:]
+            x = self.conv(x.permute(0, 2, 1, 3, 4).flatten(0, 1))
+        return x.view(*((-1, t, c) + x.shape[-2:])).transpose(1, 2)
+
+
+class ResBlock(nn.Module):
+    """Resnet block."""
+
+    def __init__(self, dim, out_dim, z_dim=None):
+        super(ResBlock, self).__init__()
+        self.norm1 = AdaGroupNorm(dim, z_dim)
+        self.norm2 = AdaGroupNorm(out_dim, z_dim)
+        self.conv1 = Conv3d(dim, out_dim, 3, 1, 1)
+        self.conv2 = Conv3d(out_dim, out_dim, 3, 1, 1)
+        self.conv_shortcut = Conv3d(dim, out_dim, 1) if out_dim != dim else None
+        self.nonlinearity, self.dropout = nn.SiLU(), nn.Dropout(0, inplace=True)
+
+    def forward(self, x, z=None) -> torch.Tensor:
+        shortcut = self.conv_shortcut(x) if self.conv_shortcut else x
+        x = self.norm1(x, z) if z is not None else self.norm1(x)
+        x = self.conv1(self.nonlinearity(x))
+        x = self.norm2(x, z) if z is not None else self.norm2(x)
+        return self.conv2(self.dropout(self.nonlinearity(x))).add_(shortcut)
+
+
+class UNetMidBlock(nn.Module):
+    """UNet mid block."""
+
+    def __init__(self, dim, z_dim=None, depth=2):
+        super(UNetMidBlock, self).__init__()
+        self.resnets = nn.ModuleList(ResBlock(dim, dim, z_dim) for _ in range(depth))
+
+    def forward(self, x, z=None) -> torch.Tensor:
+        for resnet in self.resnets:
+            x = resnet(x, z)
+        return x
+
+
+class UNetResBlock(nn.Module):
+    """UNet resnet block."""
+
+    def __init__(self, dim, out_dim, depth, z_dim=None, downsample=0, upsample=0):
+        super(UNetResBlock, self).__init__()
+        block_dims = [(out_dim, out_dim) if i > 0 else (dim, out_dim) for i in range(depth)]
+        self.resnets = nn.ModuleList(ResBlock(*(dims + (z_dim,))) for dims in block_dims)
+        self.downsamplers = nn.ModuleList([Resize(out_dim, downsample)]) if downsample else []
+        self.upsamplers = nn.ModuleList([Resize(out_dim, 0, upsample)]) if upsample else []
+
+    def forward(self, x, z=None) -> torch.Tensor:
+        for resnet in self.resnets:
+            x = resnet(x, z)
+        x = self.downsamplers[0](x) if self.downsamplers else x
+        return self.upsamplers[0](x) if self.upsamplers else x
+
+
+class Encoder(nn.Module):
+    """VAE encoder."""
+
+    def __init__(self, dim, out_dim, block_dims, block_depth):
+        super(Encoder, self).__init__()
+        self.conv_in = Conv3d(dim, block_dims[0], 3, 1, 1)
+        self.down_blocks = nn.ModuleList()
+        for i, block_dim in enumerate(block_dims):
+            downsample = 2 if i < 2 else (1 if i < (len(block_dims) - 1) else 0)
+            args = (block_dims[max(i - 1, 0)], block_dim, block_depth)
+            self.down_blocks += [UNetResBlock(*args, downsample=downsample)]
+        self.mid_block = UNetMidBlock(block_dims[-1])
+        self.conv_norm_out = AdaGroupNorm(block_dims[-1])
+        self.conv_act = nn.SiLU()
+        self.conv_out = Conv3d(block_dims[-1], 2 * out_dim, 3, 1, 1)
+
+    def forward(self, x) -> torch.Tensor:
+        x = self.conv_in(x)
+        for blk in self.down_blocks:
+            x = blk(x)
+        x = self.mid_block(x)
+        return self.conv_out(self.conv_act(self.conv_norm_out(x)))
+
+
+class Decoder(nn.Module):
+    """VAE decoder."""
+
+    def __init__(self, dim, out_dim, block_dims, block_depth):
+        super(Decoder, self).__init__()
+        block_dims = list(reversed(block_dims))
+        self.up_blocks = nn.ModuleList()
+        for i, block_dim in enumerate(block_dims):
+            upsample = 2 if i < 2 else (1 if i < (len(block_dims) - 1) else 0)
+            args = (block_dims[max(i - 1, 0)], block_dim, block_depth + 1, dim)
+            self.up_blocks += [UNetResBlock(*args, upsample=upsample)]
+        self.conv_in = Conv3d(dim, block_dims[0], 3, 1, 1)
+        self.mid_block = UNetMidBlock(block_dims[0], dim)
+        self.conv_act = nn.SiLU()
+        self.conv_norm_out = AdaGroupNorm(block_dims[-1], dim)
+        self.conv_out = Conv3d(block_dims[-1], out_dim, 3, 1, 1)
+
+    def forward(self, x) -> torch.Tensor:
+        x, z = self.conv_in(x), x
+        x = self.mid_block(x, z)
+        for blk in self.up_blocks:
+            x = blk(x, z)
+        return self.conv_out(self.conv_act(self.conv_norm_out(x, z)))
+
+
+class AutoencoderKLCogVideoX(ModelMixin, ConfigMixin, TilingMixin):
+    """AutoEncoder KL."""
+
+    @register_to_config
+    def __init__(
+        self,
+        in_channels=3,
+        out_channels=3,
+        down_block_types=("CogVideoXDownBlock3D",) * 4,
+        up_block_types=("CogVideoXUpBlock3D",) * 4,
+        block_out_channels=(128, 256, 256, 512),
+        layers_per_block=3,
+        act_fn="silu",
+        latent_channels=16,
+        norm_num_groups=32,
+        sample_size=480,
+        scaling_factor=0.7,
+        shift_factor=None,
+        latents_mean=None,
+        latents_std=None,
+        force_upcast=True,
+        use_quant_conv=False,
+        use_post_quant_conv=False,
+    ):
+        super(AutoencoderKLCogVideoX, self).__init__()
+        TilingMixin.__init__(self, sample_min_t=17, latent_min_t=5, sample_ovr_t=1)
+        self.encoder = Encoder(in_channels, latent_channels, block_out_channels, layers_per_block)
+        self.decoder = Decoder(latent_channels, out_channels, block_out_channels, layers_per_block)
+        quant_conv_type = type(self.decoder.conv_in) if use_quant_conv else nn.Identity
+        post_quant_conv_type = type(self.decoder.conv_in) if use_post_quant_conv else nn.Identity
+        self.quant_conv = quant_conv_type(2 * latent_channels, 2 * latent_channels, 1)
+        self.post_quant_conv = post_quant_conv_type(latent_channels, latent_channels, 1)
+        self.latent_dist = DiagonalGaussianDistribution
+
+    def scale_(self, x) -> torch.Tensor:
+        """Scale the input latents."""
+        x.add_(-self.config.shift_factor) if self.config.shift_factor else None
+        return x.mul_(self.config.scaling_factor)
+
+    def unscale_(self, x) -> torch.Tensor:
+        """Unscale the input latents."""
+        x.mul_(1 / self.config.scaling_factor)
+        return x.add_(self.config.shift_factor) if self.config.shift_factor else x
+
+    def encode(self, x) -> AutoencoderKLOutput:
+        """Encode the input samples."""
+        z = self.tiled_encoder(self.forward(x))
+        z = self.quant_conv(z)
+        posterior = DiagonalGaussianDistribution(z)
+        return AutoencoderKLOutput(latent_dist=posterior)
+
+    def decode(self, z) -> DecoderOutput:
+        """Decode the input latents."""
+        extra_dim = 2 if z.dim() == 4 else None
+        z = z.unsqueeze_(extra_dim) if extra_dim is not None else z
+        z = self.post_quant_conv(self.forward(z))
+        x = self.tiled_decoder(z)
+        x = x.squeeze_(extra_dim) if extra_dim is not None else x
+        return DecoderOutput(sample=x)
+
+    def forward(self, x):  # NOOP.
+        return x
diff --git a/URSA/diffnext/models/autoencoders/autoencoder_kl_ltx.py b/URSA/diffnext/models/autoencoders/autoencoder_kl_ltx.py
new file mode 100644
index 0000000000000000000000000000000000000000..d738c5b94f71181eb3abfd1ee1db1b4dc93134c1
--- /dev/null
+++ b/URSA/diffnext/models/autoencoders/autoencoder_kl_ltx.py
@@ -0,0 +1,313 @@
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+"""Simple implementation of AutoEncoderKL for LTX v0.95."""
+
+from einops import rearrange
+import torch
+import torch.nn as nn
+
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.models.modeling_outputs import AutoencoderKLOutput
+from diffusers.models.modeling_utils import ModelMixin
+
+from diffnext.models.autoencoders.modeling_utils import DiagonalGaussianDistribution
+from diffnext.models.autoencoders.modeling_utils import DecoderOutput, TilingMixin
+
+
+class Conv3d(nn.Conv3d):
+    """3D convolution."""
+
+    def __init__(self, *args, **kwargs):
+        self.causal = kwargs.pop("causal", True)
+        super(Conv3d, self).__init__(*args, **kwargs)
+        self.padding = (0,) + tuple((_ // 2 for _ in self.kernel_size[1:]))
+        self.pad1 = nn.ReplicationPad3d((0,) * 4 + (self.kernel_size[0] - 1, 0))
+        self.pad2 = nn.ReplicationPad3d((0,) * 4 + (self.pad1.padding[-2] // 2,) * 2)
+        self.pad1 = nn.Identity() if self.kernel_size[0] == 1 else self.pad1
+        self.pad2 = nn.Identity() if self.kernel_size[0] == 1 else self.pad2
+
+    def forward(self, x):
+        return super().forward(self.pad1(x) if self.causal else self.pad2(x))
+
+
+class RMSNorm(nn.Module):
+    """RMS normalization."""
+
+    def forward(self, x):
+        # Enforce high precision RMS to avoid float16 underflow.
+        return x.mul(x.float().square().mean(-1, True).add_(1e-8).rsqrt().to(x.dtype))
+
+
+class TimeEmbed(nn.Module):
+    """Time embedding layer."""
+
+    def __init__(self, embed_dim, freq_dim=256):
+        super(TimeEmbed, self).__init__()
+        self.timestep_proj = nn.Module()
+        self.timestep_proj.fc1 = nn.Linear(freq_dim, embed_dim)
+        self.timestep_proj.fc2 = nn.Linear(embed_dim, embed_dim)
+        self.freq_dim, self.time_freq = freq_dim, None
+
+    def get_freq_embed(self, timestep) -> torch.Tensor:
+        if self.time_freq is None:
+            dim, log_theta = self.freq_dim // 2, 9.210340371976184  # math.log(10000)
+            freq = torch.arange(dim, dtype=torch.float32, device=timestep.device)
+            self.time_freq = freq.mul(-log_theta / dim).exp().unsqueeze_(0)
+        emb = timestep.unsqueeze(-1).float() * self.time_freq
+        return torch.cat([emb.cos(), emb.sin()], dim=-1).to(dtype=timestep.dtype)
+
+    def forward(self, temb) -> torch.Tensor:
+        x = self.get_freq_embed(temb) if temb.dim() == 1 else temb
+        return self.timestep_proj.fc2(nn.functional.silu(self.timestep_proj.fc1(x)))
+
+
+class ResBlock(nn.Module):
+    """Resnet block."""
+
+    def __init__(self, dim, out_dim, causal=True):
+        super(ResBlock, self).__init__()
+        self.norm1, self.norm2 = RMSNorm(), RMSNorm()
+        self.conv1 = Conv3d(dim, out_dim, 3, causal=causal)
+        self.conv2 = Conv3d(out_dim, out_dim, 3, causal=causal)
+        self.nonlinearity, self.dropout = nn.SiLU(), nn.Dropout(0, inplace=True)
+        self.scale_shift_table = None if causal else nn.Parameter(torch.randn(4, dim) / dim**0.5)
+
+    def forward(self, x: torch.Tensor, temb: torch.Tensor = None) -> torch.Tensor:
+        shortcut, stats = x, []
+        if self.scale_shift_table is not None:
+            stats = temb.add(self.scale_shift_table.view(1, -1))[..., None, None, None].chunk(4, 1)
+        x = self.norm1(x.movedim(1, -1)).movedim(-1, 1)
+        x = x.mul(1 + stats[1]).add_(stats[0]) if stats else x
+        x = self.conv1(self.nonlinearity(x))
+        x = self.norm2(x.movedim(1, -1)).movedim(-1, 1)
+        x = x.mul(1 + stats[3]).add_(stats[2]) if stats else x
+        return self.conv2(self.dropout(self.nonlinearity(x))).add_(shortcut)
+
+
+class MidBlock(nn.Module):
+    """UNet mid block."""
+
+    def __init__(self, dim, depth=1, causal=True):
+        super(MidBlock, self).__init__()
+        self.time_embed = None if causal else TimeEmbed(dim * 4)
+        self.resnets = nn.ModuleList(ResBlock(dim, dim, causal=causal) for _ in range(depth))
+
+    def forward(self, x: torch.Tensor, temb: torch.Tensor = None) -> torch.Tensor:
+        temb = self.time_embed(temb) if self.time_embed else None
+        for resnet in self.resnets:
+            x = resnet(x, temb)
+        return x
+
+
+class Downsample(nn.Module):
+    """Residual downsample layer."""
+
+    def __init__(self, dim, out_dim, stride, causal=True):
+        super(Downsample, self).__init__()
+        self.stride = stride = stride if isinstance(stride, (tuple, list)) else (stride,) * 3
+        self.group_size = (dim * torch.Size(stride).numel()) // out_dim
+        self.pad_t, conv_dim = stride[0] - 1, out_dim // torch.Size(stride).numel()
+        self.conv = Conv3d(dim, conv_dim, 3, 1, causal=causal)
+        self.patch_args = {"r": stride[0], "p": stride[1], "q": stride[2]}
+        self.patch_args["pattern"] = "b c (t r) (h p) (w q) -> b (c r p q) t h w"
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = nn.functional.pad(x, (0,) * 4 + (self.pad_t, 0), "replicate") if self.pad_t else x
+        shortcut = rearrange(x, **self.patch_args).unflatten(1, (-1, self.group_size)).mean(dim=2)
+        return rearrange(self.conv(x), **self.patch_args).add_(shortcut)
+
+
+class Upsample(nn.Module):
+    """Residual upsample layer."""
+
+    def __init__(self, dim, out_dim, stride, causal=False):
+        super(Upsample, self).__init__()
+        self.stride = stride = stride if isinstance(stride, (tuple, list)) else (stride,) * 3
+        self.repeats = (out_dim * torch.Size(stride).numel()) // dim
+        self.slice_t, conv_dim = stride[0] - 1, out_dim * torch.Size(stride).numel()
+        self.conv = Conv3d(dim, conv_dim, 3, 1, causal=causal)
+        self.patch_args = {"r": stride[0], "p": stride[1], "q": stride[2]}
+        self.patch_args["pattern"] = "b (c r p q) t h w -> b c (t r) (h p) (w q)"
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        shortcut = rearrange(x, **self.patch_args).repeat(1, self.repeats, 1, 1, 1)
+        x = rearrange(self.conv(x), **self.patch_args)
+        x = x[:, :, self.slice_t :] if self.slice_t else x
+        return x.add_(shortcut[:, :, self.slice_t :] if self.slice_t else shortcut)
+
+
+class DownBlock(nn.Module):
+    """Downsample block."""
+
+    def __init__(self, dim, out_dim, depth=1, causal=True, downsample=""):
+        super(DownBlock, self).__init__()
+        self.resnets, self.downsamplers = nn.ModuleList(), nn.ModuleList()
+        for _ in range(depth):
+            self.resnets.append(ResBlock(dim, dim, causal=causal))
+        for _ in range(1 if downsample else 0):
+            stride = {"spatial": (1, 2, 2), "temporal": (2, 1, 1), "spatiotemporal": 2}[downsample]
+            self.downsamplers.append(Downsample(dim, out_dim, stride, causal=causal))
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        for resnet in self.resnets:
+            x = resnet(x)
+        for downsampler in self.downsamplers:
+            x = downsampler(x)
+        return x
+
+
+class UpBlock(nn.Module):
+    """Upsample block."""
+
+    def __init__(self, dim, out_dim, depth=1, causal=False, upscale_factor=2):
+        super(UpBlock, self).__init__()
+        self.time_embed = TimeEmbed(out_dim * 4)
+        self.resnets, self.upsamplers = nn.ModuleList(), nn.ModuleList()
+        for _ in range(1 if upscale_factor > 1 else 0):
+            self.upsamplers.append(Upsample(dim, out_dim, 2, causal=causal))
+        for _ in range(depth):
+            self.resnets.append(ResBlock(out_dim, out_dim, causal=causal))
+
+    def forward(self, x: torch.Tensor, temb: torch.Tensor = None) -> torch.Tensor:
+        for upsampler in self.upsamplers:
+            x = upsampler(x)
+        temb = self.time_embed(temb)
+        for resnet in self.resnets:
+            x = resnet(x, temb)
+        return x
+
+
+class Encoder(nn.Module):
+    """VAE encoder."""
+
+    def __init__(self, dim, out_dim, block_dims, block_depths, patch_size=4):
+        super(Encoder, self).__init__()
+        self.patch_args = {"p": patch_size, "q": patch_size}
+        downsample_type = ["spatial", "temporal", "spatiotemporal", "spatiotemporal"]
+        self.conv_in = Conv3d(dim * patch_size**2, block_dims[0], 3, 1)
+        self.down_blocks = nn.ModuleList()
+        for i, (in_dim, depth, down) in enumerate(zip(block_dims, block_depths, downsample_type)):
+            blk = DownBlock(in_dim, block_dims[i + 1], depth, downsample=down)
+            self.down_blocks.append(blk)
+        self.mid_block = MidBlock(block_dims[-1], block_depths[-1])
+        self.norm_out, self.conv_act = RMSNorm(), nn.SiLU()
+        self.conv_out = Conv3d(block_dims[-1], out_dim + 1, 3, 1)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = rearrange(x, "b c t (h p) (w q) -> b (c q p) t h w", **self.patch_args)
+        x = self.conv_in(x)
+        for down_block in self.down_blocks:
+            x = down_block(x)
+        x = self.mid_block(x)
+        x = self.norm_out(x.movedim(1, -1)).movedim(-1, 1)
+        return self.conv_out(self.conv_act(x))
+
+
+class Decoder(nn.Module):
+    """VAE decoder."""
+
+    def __init__(self, dim, out_dim, block_dims, block_depths, patch_size=4):
+        super(Decoder, self).__init__()
+        block_dims = tuple(reversed(block_dims))
+        self.patch_args = {"p": patch_size, "q": patch_size}
+        self.conv_in = Conv3d(dim, block_dims[0], 3, 1, causal=False)
+        self.mid_block = MidBlock(block_dims[0], block_depths[-1], causal=False)
+        self.up_blocks = nn.ModuleList([])
+        for in_dim, depth in zip(block_dims, block_depths[:-1]):
+            self.up_blocks.append(UpBlock(in_dim, in_dim // 2, depth, upscale_factor=2))
+        self.norm_out, self.conv_act = RMSNorm(), nn.SiLU()
+        self.conv_out = Conv3d(block_dims[-1], out_dim * patch_size**2, 3, 1, causal=False)
+        self.time_embed = TimeEmbed(block_dims[-1] * 2)
+        self.scale_shift_table = nn.Parameter(torch.randn(2, block_dims[-1]))
+        self.timestep_scale = nn.Parameter(torch.tensor(1000, dtype=torch.float32))
+
+    def forward(self, x: torch.Tensor, temb: torch.Tensor = None) -> torch.Tensor:
+        x = self.conv_in(x)
+        temb = self.time_embed.get_freq_embed(temb * self.timestep_scale)
+        x = self.mid_block(x, temb)
+        for up_block in self.up_blocks:
+            x = up_block(x, temb)
+        x = self.norm_out(x.movedim(1, -1)).movedim(-1, 1)
+        temb = self.time_embed(temb)
+        stats = temb.add(self.scale_shift_table.view(1, -1))[..., None, None, None].chunk(2, 1)
+        x = x.mul(1 + stats[1]).add_(stats[0])
+        x = self.conv_out(self.conv_act(x))
+        return rearrange(x, "b (c q p) t h w -> b c t (h p) (w q)", **self.patch_args)
+
+
+class AutoencoderKLLTXVideo(ModelMixin, ConfigMixin, TilingMixin):
+    """AutoEncoder KL."""
+
+    @register_to_config
+    def __init__(
+        self,
+        in_channels=3,
+        out_channels=3,
+        down_block_types=("LTXVideoDownBlock3D",) * 4,
+        block_out_channels=(128, 256, 512, 1024, 2048),
+        layers_per_block=(4, 6, 6, 2, 2),
+        decoder_block_out_channels=(128, 256, 512, 1024),
+        decoder_layers_per_block=(5, 5, 5, 5),
+        act_fn="silu",
+        latent_channels=128,
+        sample_size=1024,
+        scaling_factor=1.0,
+        shift_factor=None,
+        latents_mean=None,
+        latents_std=None,
+        patch_size=4,
+    ):
+        super(AutoencoderKLLTXVideo, self).__init__()
+        TilingMixin.__init__(self, sample_min_t=249, latent_min_t=32, sample_ovr_t=1)
+        channels, layers = block_out_channels, layers_per_block
+        self.encoder = Encoder(in_channels, latent_channels, channels, layers)
+        channels, layers = decoder_block_out_channels, decoder_layers_per_block
+        self.decoder = Decoder(latent_channels, out_channels, channels, layers)
+        self.register_buffer("shift_factors", torch.zeros(latents_mean) if latents_mean else None)
+        self.register_buffer("scaling_factors", torch.ones(latents_std) if latents_std else None)
+        self.latent_dist = DiagonalGaussianDistribution
+
+    def scale_(self, x) -> torch.Tensor:
+        """Scale the input latents."""
+        if self.shift_factors is not None:
+            return x.sub_(self.shift_factors).mul_(self.scaling_factors)
+        x.add_(-self.config.shift_factor) if self.config.shift_factor else None
+        return x.mul_(self.config.scaling_factor)
+
+    def unscale_(self, x) -> torch.Tensor:
+        """Unscale the input latents."""
+        if self.shift_factors is not None:
+            return x.div_(self.scaling_factors).add_(self.shift_factors)
+        x.mul_(1 / self.config.scaling_factor)
+        return x.add_(self.config.shift_factor) if self.config.shift_factor else x
+
+    def encode(self, x) -> AutoencoderKLOutput:
+        """Encode the input samples."""
+        z = self.tiled_encoder(self.forward(x))
+        posterior = self.latent_dist(z)
+        return AutoencoderKLOutput(latent_dist=posterior)
+
+    def decode(self, z, temb: torch.Tensor = None) -> DecoderOutput:
+        """Decode the input latents."""
+        if temb is None:
+            temb = torch.tensor([0] * z.size(0), dtype=z.dtype, device=z.device)
+        extra_dim = 2 if z.dim() == 4 else None
+        z = z.unsqueeze_(extra_dim) if extra_dim is not None else z
+        x = self.tiled_decoder(self.forward(z), temb=temb)
+        x = x.squeeze_(extra_dim) if extra_dim is not None else x
+        return DecoderOutput(sample=x)
+
+    def forward(self, x):  # NOOP.
+        return x
diff --git a/URSA/diffnext/models/autoencoders/autoencoder_kl_opensora.py b/URSA/diffnext/models/autoencoders/autoencoder_kl_opensora.py
new file mode 100644
index 0000000000000000000000000000000000000000..aff5fd2ad0563e7665881cbe7f3e76f621a4cb6d
--- /dev/null
+++ b/URSA/diffnext/models/autoencoders/autoencoder_kl_opensora.py
@@ -0,0 +1,268 @@
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+"""Simple implementation of AutoEncoderKL for OpenSoraPlan."""
+
+from functools import partial
+
+import torch
+from torch import nn
+
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.models.modeling_outputs import AutoencoderKLOutput
+from diffusers.models.modeling_utils import ModelMixin
+
+from diffnext.models.autoencoders.modeling_utils import DiagonalGaussianDistribution
+from diffnext.models.autoencoders.modeling_utils import DecoderOutput, TilingMixin
+
+
+class Conv3d(nn.Conv3d):
+    """3D convolution."""
+
+    def __init__(self, *args, **kwargs):
+        super(Conv3d, self).__init__(*args, **kwargs)
+        self.padding = (0,) + self.padding[1:]
+        self.pad = nn.ReplicationPad3d((0,) * 4 + (self.kernel_size[0] - 1, 0))
+        self.pad = nn.Identity() if self.kernel_size[0] == 1 else self.pad
+
+    def forward(self, x) -> torch.Tensor:
+        return super(Conv3d, self).forward(self.pad(x))
+
+
+class Attention(nn.Module):
+    """Multi-headed attention."""
+
+    def __init__(self, dim, num_heads=1):
+        super(Attention, self).__init__()
+        self.num_heads = num_heads or dim // 64
+        self.head_dim = dim // self.num_heads
+        self.group_norm = nn.GroupNorm(32, dim, eps=1e-6)
+        self.to_q, self.to_k, self.to_v = [nn.Linear(dim, dim) for _ in range(3)]
+        self.to_out = nn.ModuleList([nn.Linear(dim, dim)])
+        self._from_deprecated_attn_block = True  # Fix for diffusers>=0.15.0
+
+    def forward(self, x) -> torch.Tensor:
+        num_windows = 1 if x.dim() == 4 else x.size(2)
+        x, x_shape = self.group_norm(x), (-1,) + x.shape[1:]
+        if num_windows == 1:
+            x = x.flatten(2).transpose(1, 2).contiguous()
+        else:  # i.e., Frame windows.
+            x = x.permute(0, 2, 3, 4, 1).flatten(0, 1).flatten(1, 2).contiguous()
+        qkv_shape = (-1, x.size(1), self.num_heads, self.head_dim)
+        q, k, v = [f(x).view(qkv_shape).transpose(1, 2) for f in (self.to_q, self.to_k, self.to_v)]
+        o = nn.functional.scaled_dot_product_attention(q, k, v).transpose(1, 2)
+        x = self.to_out[0](o.flatten(2)).transpose(1, 2)
+        x = x.view((-1, num_windows) + x.shape[1:]).transpose(1, 2) if num_windows > 1 else x
+        return x.reshape(x_shape)
+
+
+class Resize(nn.Module):
+    """Resize layer."""
+
+    def __init__(self, dim, conv_type, downsample=1):
+        super(Resize, self).__init__()
+        self.conv = conv_type(dim, dim, 3, 2, 0) if downsample else None
+        self.conv = conv_type(dim, dim, stride=1, padding=1) if not downsample else self.conv
+        self.downsample, self.upsample, self.t = downsample, int(not downsample), 1
+        self.upsample = 0 if downsample else (2 if isinstance(self.conv, Conv3d) else 1)
+        self.upsample = 1 if self.conv.kernel_size[0] == 1 else self.upsample
+
+    def forward(self, x) -> torch.Tensor:
+        if self.upsample == 2:
+            x1, x2 = (x[:, :, :1], x[:, :, 1:]) if x.size(2) > 1 else (x, None)
+            x1 = nn.functional.interpolate(x1, None, (1, 2, 2), "trilinear")
+            x2 = x2 if x2 is None else nn.functional.interpolate(x2, None, (2, 2, 2), "trilinear")
+            x = torch.cat([x1, x2], dim=2) if x2 is not None else x1
+        elif self.downsample:
+            padding = (0, 1, 0, 1) + ((0, 0) if isinstance(self.conv, Conv3d) else ())
+            if x.dim() == 4 and len(padding) == 6:  # 2D->3D
+                x = x.view((-1, self.t) + x.shape[1:]).transpose(1, 2)
+            x = nn.functional.pad(x, padding)
+        elif self.upsample:
+            x = x.repeat_interleave(2, 3).repeat_interleave(2, 4)
+        return self.conv(x)
+
+
+class ResBlock(nn.Module):
+    """Resnet block."""
+
+    def __init__(self, dim, out_dim, conv_type=nn.Conv2d):
+        super(ResBlock, self).__init__()
+        self.norm1 = nn.GroupNorm(32, dim, eps=1e-6)
+        self.conv1 = conv_type(dim, out_dim, 3, 1, 1)
+        self.norm2 = nn.GroupNorm(32, out_dim, eps=1e-6)
+        self.conv2 = conv_type(out_dim, out_dim, 3, 1, 1)
+        self.conv_shortcut = conv_type(dim, out_dim, 1) if out_dim != dim else None
+        self.nonlinearity = nn.SiLU()
+
+    def forward(self, x) -> torch.Tensor:
+        shortcut = self.conv_shortcut(x) if self.conv_shortcut else x
+        x = self.conv1(self.nonlinearity(self.norm1(x)))
+        return self.conv2(self.nonlinearity(self.norm2(x))).add_(shortcut)
+
+
+class UNetResBlock(nn.Module):
+    """UNet resnet block."""
+
+    def __init__(self, dim, out_dim, conv_type, depth=2, downsample=False, upsample=False):
+        super(UNetResBlock, self).__init__()
+        block_dims = [(out_dim, out_dim) if i > 0 else (dim, out_dim) for i in range(depth)]
+        self.resnets = nn.ModuleList(ResBlock(*dims, conv_type=conv_type) for dims in block_dims)
+        self.downsamplers = nn.ModuleList([Resize(out_dim, downsample)]) if downsample else []
+        self.upsamplers = nn.ModuleList([Resize(out_dim, upsample, 0)]) if upsample else []
+
+    def forward(self, x) -> torch.Tensor:
+        for resnet in self.resnets:
+            x = resnet(x)
+        x = self.downsamplers[0](x) if self.downsamplers else x
+        return self.upsamplers[0](x) if self.upsamplers else x
+
+
+class UNetMidBlock(nn.Module):
+    """UNet mid block."""
+
+    def __init__(self, dim, conv_type, num_heads=1, depth=1):
+        super(UNetMidBlock, self).__init__()
+        self.resnets = nn.ModuleList(ResBlock(dim, dim, conv_type) for _ in range(depth + 1))
+        self.attentions = nn.ModuleList(Attention(dim, num_heads) for _ in range(depth))
+
+    def forward(self, x) -> torch.Tensor:
+        x = self.resnets[0](x)
+        for attn, resnet in zip(self.attentions, self.resnets[1:]):
+            x = resnet(attn(x).add_(x))
+        return x
+
+
+class Encoder(nn.Module):
+    """VAE encoder."""
+
+    def __init__(self, dim, out_dim, block_types, block_dims, block_depth=2):
+        super(Encoder, self).__init__()
+        self.conv_in = nn.Conv2d(dim, block_dims[0], 3, 1, 1)
+        self.down_blocks = nn.ModuleList()
+        for i, (block_type, block_dim) in enumerate(zip(block_types, block_dims)):
+            conv_type, conv_down = nn.Conv2d if "Block2D" in block_type else Conv3d, None
+            if i < len(block_dims) - 1:
+                conv_down = nn.Conv2d if "Block2D" in block_types[i + 1] else Conv3d
+            args = (block_dims[max(i - 1, 0)], block_dim, conv_type, block_depth)
+            self.down_blocks += [UNetResBlock(*args, downsample=conv_down)]
+        self.mid_block = UNetMidBlock(block_dims[-1], conv_type)
+        self.conv_act = nn.SiLU()
+        self.conv_norm_out = nn.GroupNorm(32, block_dims[-1], eps=1e-6)
+        self.conv_out = conv_type(block_dims[-1], 2 * out_dim, 3, 1, 1)
+
+    def forward(self, x) -> torch.Tensor:
+        t = x.size(2) if x.dim() == 5 else 1
+        x = x.transpose(1, 2).flatten(0, 1) if x.dim() == 5 else x
+        x = self.conv_in(x)
+        for blk in self.down_blocks:
+            [setattr(m, "t", t) for m in blk.downsamplers]
+            x = blk(x)
+        x = self.mid_block(x)
+        return self.conv_out(self.conv_act(self.conv_norm_out(x)))
+
+
+class Decoder(nn.Module):
+    """VAE decoder."""
+
+    def __init__(self, dim, out_dim, block_types, block_dims, block_depth=2):
+        super(Decoder, self).__init__()
+        block_dims = list(reversed(block_dims))
+        self.up_blocks = nn.ModuleList()
+        for i, (block_type, block_dim) in enumerate(zip(block_types, block_dims)):
+            conv_type, conv_up = nn.Conv2d if "Block2D" in block_type else Conv3d, None
+            if i < len(block_dims) - 1:
+                kernel_size = 3 if i < len(block_dims) - 2 or conv_type is nn.Conv2d else (1, 3, 3)
+                conv_up = partial(conv_type, kernel_size=kernel_size)
+            args = (block_dims[max(i - 1, 0)], block_dim, conv_type, block_depth + 1)
+            self.up_blocks += [UNetResBlock(*args, upsample=conv_up)]
+        self.conv_in = conv_type(dim, block_dims[0], 3, 1, 1)
+        self.mid_block = UNetMidBlock(block_dims[0], conv_type)
+        self.conv_act = nn.SiLU()
+        self.conv_norm_out = nn.GroupNorm(32, block_dims[-1], eps=1e-6)
+        self.conv_out = conv_type(block_dims[-1], out_dim, 3, 1, 1)
+
+    def forward(self, x) -> torch.Tensor:
+        x = self.conv_in(x)
+        x = self.mid_block(x)
+        for blk in self.up_blocks:
+            x = blk(x)
+        return self.conv_out(self.conv_act(self.conv_norm_out(x)))
+
+
+class AutoencoderKLOpenSora(ModelMixin, ConfigMixin, TilingMixin):
+    """AutoEncoder KL."""
+
+    @register_to_config
+    def __init__(
+        self,
+        in_channels=3,
+        out_channels=3,
+        down_block_types=("DownEncoderBlock2D",) * 4,
+        up_block_types=("UpDecoderBlock2D",) * 4,
+        block_out_channels=(128, 256, 512, 512),
+        layers_per_block=2,
+        act_fn="silu",
+        latent_channels=16,
+        norm_num_groups=32,
+        sample_size=256,
+        scaling_factor=0.18215,
+        shift_factor=None,
+        latents_mean=None,
+        latents_std=None,
+        force_upcast=True,
+        use_quant_conv=True,
+        use_post_quant_conv=True,
+    ):
+        super(AutoencoderKLOpenSora, self).__init__()
+        TilingMixin.__init__(self, sample_min_t=17, latent_min_t=5, sample_ovr_t=1, latent_ovr_t=1)
+        channels, layers = block_out_channels, layers_per_block
+        self.encoder = Encoder(in_channels, latent_channels, down_block_types, channels, layers)
+        self.decoder = Decoder(latent_channels, out_channels, up_block_types, channels, layers)
+        quant_conv_type = type(self.decoder.conv_in) if use_quant_conv else nn.Identity
+        post_quant_conv_type = type(self.decoder.conv_in) if use_post_quant_conv else nn.Identity
+        self.quant_conv = quant_conv_type(2 * latent_channels, 2 * latent_channels, 1)
+        self.post_quant_conv = post_quant_conv_type(latent_channels, latent_channels, 1)
+        self.latent_dist = DiagonalGaussianDistribution
+
+    def scale_(self, x) -> torch.Tensor:
+        """Scale the input latents."""
+        x.add_(-self.config.shift_factor) if self.config.shift_factor else None
+        return x.mul_(self.config.scaling_factor)
+
+    def unscale_(self, x) -> torch.Tensor:
+        """Unscale the input latents."""
+        x.mul_(1 / self.config.scaling_factor)
+        return x.add_(self.config.shift_factor) if self.config.shift_factor else x
+
+    def encode(self, x) -> AutoencoderKLOutput:
+        """Encode the input samples."""
+        extra_dim = 2 if isinstance(self.quant_conv, Conv3d) and x.dim() == 4 else None
+        z = self.tiled_encoder(self.forward(x))
+        z = self.quant_conv(z)
+        z = z.squeeze_(extra_dim) if extra_dim is not None else z
+        posterior = DiagonalGaussianDistribution(z)
+        return AutoencoderKLOutput(latent_dist=posterior)
+
+    def decode(self, z) -> DecoderOutput:
+        """Decode the input latents."""
+        extra_dim = 2 if isinstance(self.quant_conv, Conv3d) and z.dim() == 4 else None
+        z = z.unsqueeze_(extra_dim) if extra_dim is not None else z
+        z = self.post_quant_conv(self.forward(z))
+        x = self.tiled_decoder(z)
+        x = x.squeeze_(extra_dim) if extra_dim is not None else x
+        return DecoderOutput(sample=x)
+
+    def forward(self, x):  # NOOP.
+        return x
diff --git a/URSA/diffnext/models/autoencoders/autoencoder_vq.py b/URSA/diffnext/models/autoencoders/autoencoder_vq.py
new file mode 100644
index 0000000000000000000000000000000000000000..3682f08ff6a10d3bdd0d25e708dda95178b6aab3
--- /dev/null
+++ b/URSA/diffnext/models/autoencoders/autoencoder_vq.py
@@ -0,0 +1,104 @@
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Simple implementation of AutoEncoderVQ."""
+
+import torch
+from torch import nn
+
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.models.modeling_outputs import AutoencoderKLOutput
+from diffusers.models.modeling_utils import ModelMixin
+
+from diffnext.models.autoencoders.autoencoder_kl import Attention, Decoder, Encoder
+from diffnext.models.autoencoders.modeling_utils import DecoderOutput, IdentityDistribution
+from diffnext.models.autoencoders import quantizers
+
+
+class AutoencoderVQ(ModelMixin, ConfigMixin):
+    """AutoEncoder VQ."""
+
+    @register_to_config
+    def __init__(
+        self,
+        in_channels=3,
+        out_channels=3,
+        down_block_types=("DownEncoderBlock2D",) * 4,
+        up_block_types=("UpDecoderBlock2D",) * 4,
+        block_out_channels=(128, 256, 512, 512),
+        layers_per_block=2,
+        act_fn="silu",
+        latent_channels=16,
+        norm_num_groups=32,
+        sample_size=1024,
+        num_vq_embeddings=16384,
+        vq_embed_dim=8,
+        attn_down_block=False,
+        attn_up_block=False,
+        force_upcast=False,
+        temporal_stride=1,
+        spatial_stride=16,
+        decoder_dtype=None,
+        _quantizer_name="VQuantizer",
+    ):
+        super(AutoencoderVQ, self).__init__()
+        channels, layers = block_out_channels, layers_per_block
+        self.encoder = Encoder(in_channels, latent_channels, channels, layers)
+        self.decoder = Decoder(latent_channels, out_channels, channels, layers)
+        self.quant_conv = nn.Conv2d(latent_channels, vq_embed_dim, 1)
+        self.post_quant_conv = nn.Conv2d(vq_embed_dim, latent_channels, 1)
+        if attn_down_block:
+            attentions = [Attention(block_out_channels[-1]) for _ in range(layers_per_block)]
+            self.encoder.down_blocks[-1].attentions += attentions
+        if attn_up_block:
+            attentions = [Attention(block_out_channels[-1]) for _ in range(layers_per_block + 1)]
+            self.decoder.up_blocks[0].attentions += attentions
+        self.quantizer = getattr(quantizers, _quantizer_name)(num_vq_embeddings, vq_embed_dim)
+        self.latent_dist = IdentityDistribution
+
+    def to(self, *args, **kwargs):
+        """Convert to given device and dtype."""
+        super().to(*args, **kwargs)
+        if self.config.decoder_dtype:
+            self.decoder.to(dtype=getattr(torch, self.config.decoder_dtype))
+        return self
+
+    def scale_(self, x) -> torch.Tensor:
+        """Scale the input latents."""
+        return x
+
+    def unscale_(self, x) -> torch.Tensor:
+        """Unscale the input latents."""
+        return x
+
+    def encode(self, x) -> AutoencoderKLOutput:
+        """Encode the input samples."""
+        z = self.encoder(self.forward(x))
+        z = self.quant_conv(z)
+        posterior = self.latent_dist(self.quantizer.quantize(z))
+        return AutoencoderKLOutput(latent_dist=posterior)
+
+    def decode(self, ids) -> DecoderOutput:
+        """Decode the input indices."""
+        z = self.quantizer.dequantize(ids)
+        t = z.size(2) if z.dim() == 5 else 1
+        z = z.transpose(1, 2).flatten(0, 1) if t > 1 else z
+        z = z.squeeze_(2) if z.dim() == 5 else z
+        x = self.post_quant_conv(self.forward(z))
+        x = self.decoder(x.to(self.decoder.conv_in.weight))
+        x = x.view(-1, t, *x.shape[1:]).transpose(1, 2) if t > 1 else x
+        return DecoderOutput(sample=x)
+
+    def forward(self, x):  # NOOP.
+        return x
diff --git a/URSA/diffnext/models/autoencoders/autoencoder_vq_cosmos3d.py b/URSA/diffnext/models/autoencoders/autoencoder_vq_cosmos3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..915357aa8a0b096eb3cb4117d8699226e63d1f4b
--- /dev/null
+++ b/URSA/diffnext/models/autoencoders/autoencoder_vq_cosmos3d.py
@@ -0,0 +1,309 @@
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Simple implementation of AutoEncoderVQ for Cosmos3D."""
+
+import math
+
+import torch
+from einops import rearrange
+from torch import nn
+
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.models.modeling_outputs import AutoencoderKLOutput
+from diffusers.models.modeling_utils import ModelMixin
+
+from diffnext.models.autoencoders.modeling_utils import IdentityDistribution
+from diffnext.models.autoencoders.modeling_utils import DecoderOutput, TilingMixin
+from diffnext.models.autoencoders.quantizers import FSQuantizer
+from diffnext.models.autoencoders.wavelets_utils import Patcher3D
+
+
+class GroupNorm2D(nn.GroupNorm):
+    """2D group normalization."""
+
+    def forward(self, x) -> torch.Tensor:
+        x, bsz = super().forward(x.transpose(1, 2).flatten(0, 1)), x.size(0)
+        return rearrange(x, "(b t) c h w -> b c t h w", b=bsz)
+
+
+class Conv3d(nn.Conv3d):
+    """3D convolution."""
+
+    def __init__(self, *args, **kwargs):
+        stride_t = kwargs.pop("time_stride", None)
+        super(Conv3d, self).__init__(*args, **kwargs)
+        pad_t = (self.kernel_size[0] - 1) + (1 - (stride_t or self.stride[0]))
+        self.stride = (stride_t or self.stride[0],) + self.stride[1:]
+        self.padding = (0,) + self.padding[1:]
+        self.pad = nn.ReplicationPad3d((0,) * 4 + (pad_t, 0))
+        self.pad = nn.Identity() if self.kernel_size[0] == 1 else self.pad
+
+    @classmethod
+    def new_factorized(cls, dim, out_dim):
+        return nn.Sequential(cls(dim, out_dim, (1, 3, 3), 1, 1), cls(out_dim, out_dim, (3, 1, 1)))
+
+    def forward(self, x) -> torch.Tensor:
+        return super(Conv3d, self).forward(self.pad(x))
+
+
+class Attention(nn.Module):
+    """Multi-headed attention."""
+
+    def __init__(self, dim, perm="(b t) 1 (h w) c"):
+        super(Attention, self).__init__()
+        self.group_norm, self.perm = GroupNorm2D(1, dim, eps=1e-6), perm
+        self.to_q, self.to_k, self.to_v = [nn.Linear(dim, dim) for _ in range(3)]
+        self.to_out = nn.ModuleList([nn.Linear(dim, dim)])
+
+    @classmethod
+    def new_factorized(cls, dim) -> nn.Sequential:
+        return nn.Sequential(cls(dim, "(b t) 1 (h w) c"), cls(dim, "(b h w) 1 t c"))
+
+    def forward(self, x) -> torch.Tensor:
+        shortcut, x, (bsz, _, _, h, w) = x, self.group_norm(x), x.size()
+        x = rearrange(x, "b c t h w -> %s" % self.perm)
+        q, k, v = [f(x) for f in (self.to_q, self.to_k, self.to_v)]
+        o = self.to_out[0](nn.functional.scaled_dot_product_attention(q, k, v))
+        return rearrange(o, "%s -> b c t h w" % self.perm, b=bsz, h=h, w=w).add_(shortcut)
+
+
+class Resize(nn.Module):
+    """Downsample layer."""
+
+    def __init__(self, dim, spatial=1, temporal=1):
+        super(Resize, self).__init__()
+        self.spatial, self.temporal = spatial, temporal
+        self.conv1, self.conv2 = nn.Identity(), nn.Identity()
+        if spatial == 1 or temporal == 1:  # Down.
+            self.conv1 = Conv3d(dim, dim, (1, 3, 3), 2, time_stride=1)
+            self.conv2 = Conv3d(dim, dim, (3, 1, 1), 1, time_stride=2) if temporal else self.conv2
+        elif spatial == 2 or temporal == 2:  # Up.
+            self.conv1 = Conv3d(dim, dim, (3, 1, 1), 1, 0) if temporal else self.conv1
+            self.conv2 = Conv3d(dim, dim, (1, 3, 3), 1, 1)
+        self.conv3 = Conv3d(dim, dim, 1) if spatial or temporal else nn.Identity()
+
+    def forward(self, x) -> torch.Tensor:
+        if self.spatial == 1:
+            _ = nn.functional.avg_pool3d(x, (1, 2, 2), (1, 2, 2))
+            x = self.conv1(nn.functional.pad(x, (0, 1, 0, 1, 0, 0))).add_(_)
+        if self.temporal == 1:
+            x = nn.functional.pad(x, (0, 0, 0, 0, 1, 0), "replicate")
+            x = self.conv2(x).add_(nn.functional.avg_pool3d(x, (2, 1, 1), (2, 1, 1)))
+        if self.temporal == 2:
+            x = x.repeat_interleave(2, dim=2)[:, :, 1:]
+            x = self.conv1(x).add_(x)
+        if self.spatial == 2:
+            x = x.repeat_interleave(2, dim=3).repeat_interleave(2, dim=4)
+            x = self.conv2(x).add_(x)
+        return self.conv3(x)
+
+
+class ResBlock(nn.Module):
+    """Resnet block."""
+
+    def __init__(self, dim, out_dim):
+        super(ResBlock, self).__init__()
+        self.norm1 = GroupNorm2D(1, dim, eps=1e-6)
+        self.conv1 = Conv3d.new_factorized(dim, out_dim)
+        self.norm2 = GroupNorm2D(1, out_dim, eps=1e-6)
+        self.conv2 = Conv3d.new_factorized(out_dim, out_dim)
+        self.conv_shortcut = Conv3d(dim, out_dim, 1) if out_dim != dim else None
+        self.nonlinearity, self.dropout = nn.SiLU(), nn.Dropout(0)
+
+    def forward(self, x) -> torch.Tensor:
+        shortcut = self.conv_shortcut(x) if self.conv_shortcut else x
+        x = self.conv1(self.nonlinearity(self.norm1(x)))
+        return self.conv2(self.nonlinearity(self.norm2(x))).add_(shortcut)
+
+
+class UNetResBlock(nn.Module):
+    """UNet resnet block."""
+
+    def __init__(self, dim, out_dim, depth=2, downsample=None, upsample=None):
+        super(UNetResBlock, self).__init__()
+        block_dims = [(out_dim, out_dim) if i > 0 else (dim, out_dim) for i in range(depth)]
+        self.resnets = nn.ModuleList(ResBlock(*dims) for dims in block_dims)
+        self.downsamplers = nn.ModuleList([Resize(out_dim, *downsample)]) if downsample else []
+        self.upsamplers = nn.ModuleList([Resize(out_dim, *upsample)]) if upsample else []
+
+    def forward(self, x) -> torch.Tensor:
+        for resnet in self.resnets:
+            x = resnet(x)
+        x = self.downsamplers[0](x) if self.downsamplers else x
+        return self.upsamplers[0](x) if self.upsamplers else x
+
+
+class UNetMidBlock(nn.Module):
+    """UNet mid block."""
+
+    def __init__(self, dim, depth=1):
+        super(UNetMidBlock, self).__init__()
+        self.resnets = nn.ModuleList(ResBlock(dim, dim) for _ in range(depth + 1))
+        self.attentions = nn.ModuleList(Attention.new_factorized(dim) for _ in range(depth))
+
+    def forward(self, x) -> torch.Tensor:
+        x = self.resnets[0](x)
+        for attn, resnet in zip(self.attentions, self.resnets[1:]):
+            x = resnet(attn(x))
+        return x
+
+
+class Encoder(nn.Module):
+    """AE encoder."""
+
+    def __init__(
+        self,
+        dim,
+        out_dim,
+        block_dims,
+        block_depth,
+        patch_size=4,
+        temporal_stride=8,
+        spatial_stride=8,
+    ):
+        super(Encoder, self).__init__()
+        spatial_downs = int(math.log2(spatial_stride)) - int(math.log2(patch_size))
+        temporal_downs = int(math.log2(temporal_stride)) - int(math.log2(patch_size))
+        self.patcher = Patcher3D(patch_size)
+        self.conv_in = Conv3d.new_factorized(dim * patch_size**3, block_dims[0])
+        self.down_blocks = nn.ModuleList()
+        for i, dim in enumerate(block_dims[:-1]):
+            downsample, block_dim = None, block_dims[i + 1]
+            if i < len(block_dims) - 2:
+                downsample = int(i < spatial_downs), int(i < temporal_downs)
+            args = (dim, block_dim, block_depth)
+            self.down_blocks += [UNetResBlock(*args, downsample=downsample)]
+        self.mid_block = UNetMidBlock(block_dim)
+        self.conv_norm_out, self.conv_act = GroupNorm2D(1, block_dim, eps=1e-6), nn.SiLU()
+        self.conv_out = Conv3d.new_factorized(block_dim, out_dim)
+
+    def forward(self, x) -> torch.Tensor:
+        x = torch.cat([x[:, :, :1].repeat_interleave(self.patcher.patch_size, 2), x[:, :, 1:]], 2)
+        for _ in range(self.patcher.num_strides):
+            x = self.patcher.dwt(x)
+        x = self.conv_in(x)
+        for blk in self.down_blocks:
+            x = blk(x)
+        x = self.mid_block(x)
+        return self.conv_out(self.conv_act(self.conv_norm_out(x)))
+
+
+class Decoder(nn.Module):
+    """AE decoder."""
+
+    def __init__(
+        self,
+        dim,
+        out_dim,
+        block_dims,
+        block_depth,
+        patch_size=4,
+        temporal_stride=8,
+        spatial_stride=8,
+    ):
+        super(Decoder, self).__init__()
+        block_dims = list(reversed(block_dims))
+        spatial_ups = int(math.log2(spatial_stride)) - int(math.log2(patch_size))
+        temporal_ups = int(math.log2(temporal_stride)) - int(math.log2(patch_size))
+        self.patcher = Patcher3D(patch_size)
+        self.conv_in = Conv3d.new_factorized(dim, block_dims[0])
+        self.mid_block = UNetMidBlock(block_dims[0])
+        self.up_blocks = nn.ModuleList()
+        for i, block_dim in enumerate(block_dims[:-1]):
+            upsample, dim = None, block_dims[max(i - 1, 0)]
+            if i < len(block_dims) - 2:
+                temporal = 0 < i < temporal_ups + 1
+                spatial = temporal or (i < spatial_ups and spatial_ups > temporal_ups)
+                upsample = (2 if spatial else 0, 2 if temporal else 0)
+            args = (dim, block_dim, block_depth + 1)
+            self.up_blocks += [UNetResBlock(*args, upsample=upsample)]
+        self.conv_norm_out, self.conv_act = GroupNorm2D(1, block_dim, eps=1e-6), nn.SiLU()
+        self.conv_out = Conv3d.new_factorized(block_dim, out_dim * patch_size**3)
+
+    def forward(self, x) -> torch.Tensor:
+        x = self.conv_in(x)
+        x = self.mid_block(x)
+        for blk in self.up_blocks:
+            x = blk(x)
+        x = self.conv_out(self.conv_act(self.conv_norm_out(x)))
+        for _ in range(self.patcher.num_strides):
+            x = self.patcher.idwt(x)
+        return x[:, :, self.patcher.patch_size - 1 :]
+
+
+class AutoencoderVQCosmos3D(ModelMixin, ConfigMixin, TilingMixin):
+    """AutoEncoder VQ."""
+
+    @register_to_config
+    def __init__(
+        self,
+        in_channels=3,
+        out_channels=3,
+        down_block_types=("DownEncoderBlock3D",) * 3,
+        up_block_types=("UpDecoderBlock3D",) * 3,
+        block_out_channels=(128, 256, 512, 512),
+        layers_per_block=2,
+        act_fn="silu",
+        latent_channels=16,
+        norm_num_groups=1,
+        sample_size=1024,
+        sample_frames=17,
+        num_vq_embeddings=64000,
+        vq_embed_dim=6,
+        force_upcast=False,
+        patch_size=4,
+        temporal_stride=4,
+        spatial_stride=8,
+        _quantizer_name="FSQuantizer",
+    ):
+        super(AutoencoderVQCosmos3D, self).__init__()
+        latent_min_t = (sample_frames - 1) // temporal_stride + 1
+        TilingMixin.__init__(self, sample_frames, latent_min_t=latent_min_t, sample_ovr_t=1)
+        extra_args = {"patch_size": patch_size}
+        extra_args.update({"temporal_stride": temporal_stride, "spatial_stride": spatial_stride})
+        channels, layers = block_out_channels, layers_per_block
+        self.encoder = Encoder(in_channels, latent_channels, channels, layers, **extra_args)
+        self.decoder = Decoder(latent_channels, out_channels, channels, layers, **extra_args)
+        self.quant_conv = Conv3d(latent_channels, vq_embed_dim, 1)
+        self.post_quant_conv = Conv3d(vq_embed_dim, latent_channels, 1)
+        self.quantizer, self.latent_dist = FSQuantizer(), IdentityDistribution
+
+    def scale_(self, x) -> torch.Tensor:
+        """Scale the input latents."""
+        return x
+
+    def unscale_(self, x) -> torch.Tensor:
+        """Unscale the input latents."""
+        return x
+
+    def encode(self, x) -> AutoencoderKLOutput:
+        """Encode the input samples."""
+        z = self.tiled_encoder(self.forward(x))
+        z = self.quant_conv(z)
+        posterior = self.latent_dist(self.quantizer.quantize(z))
+        return AutoencoderKLOutput(latent_dist=posterior)
+
+    def decode(self, ids) -> DecoderOutput:
+        """Decode the input indices."""
+        z = self.quantizer.dequantize(ids)
+        extra_dim = 2 if z.dim() == 4 else None
+        z = z.unsqueeze_(extra_dim) if extra_dim is not None else z
+        z = self.post_quant_conv(self.forward(z))
+        x = self.tiled_decoder(z)
+        x = x.squeeze_(extra_dim) if extra_dim is not None else x
+        return DecoderOutput(sample=x)
+
+    def forward(self, x):  # NOOP.
+        return x
diff --git a/URSA/diffnext/models/autoencoders/modeling_utils.py b/URSA/diffnext/models/autoencoders/modeling_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..5938c45665ef890ba2e9e913a96434e0b1f4b3ad
--- /dev/null
+++ b/URSA/diffnext/models/autoencoders/modeling_utils.py
@@ -0,0 +1,94 @@
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""AutoEncoder utilities."""
+
+from diffusers.models.modeling_outputs import BaseOutput
+import torch
+
+
+class DecoderOutput(BaseOutput):
+    """Output of decoding method."""
+
+    sample: torch.Tensor
+
+
+class IdentityDistribution(object):
+    """IdentityGaussianDistribution."""
+
+    def __init__(self, z):
+        self.parameters = z
+
+    def sample(self, generator=None):
+        return self.parameters
+
+
+class DiagonalGaussianDistribution(object):
+    """DiagonalGaussianDistribution."""
+
+    def __init__(self, z):
+        self.parameters = z
+        self.device, self.dtype = z.device, z.dtype
+        if z.size(1) % 2:
+            z = torch.cat([z, z[:, -1:].expand((-1, z.shape[1] - 2) + (-1,) * (z.dim() - 2))], 1)
+        self.mean, self.logvar = z.float().chunk(2, dim=1)
+        self.logvar = self.logvar.clamp(-30.0, 20.0)
+        self.std, self.var = self.logvar.mul(0.5).exp_(), self.logvar.exp()
+
+    def sample(self, generator=None) -> torch.Tensor:
+        device, dtype = self.mean.device, self.mean.dtype
+        norm_dist = torch.randn(self.mean.shape, generator=generator, device=device, dtype=dtype)
+        return norm_dist.mul_(self.std).add_(self.mean).to(device=self.device, dtype=self.dtype)
+
+
+class TilingMixin(object):
+    """Base class for input tiling.
+
+    Shape hints:
+
+    print(torch.Size((1, 256, 17, 480, 768)).numel() < 2147483647, "Supported")
+    print(torch.Size((1, 256, 17, 576, 1024)).numel() > 2147483647, "Unsupported")
+
+    """
+
+    def __init__(self, sample_min_t=17, latent_min_t=5, sample_ovr_t=1, latent_ovr_t=0):
+        self.sample_min_t, self.latent_min_t = sample_min_t, latent_min_t
+        self.sample_ovr_t, self.latent_ovr_t = sample_ovr_t, latent_ovr_t
+
+    def tiled_encoder(self, x) -> torch.Tensor:
+        if x.dim() == 4 or x.size(2) <= self.sample_min_t:
+            return self.encoder(x)
+        t = x.shape[2]
+        t_start = [i for i in range(0, t, self.sample_min_t - self.sample_ovr_t)]
+        t_slice = [slice(i, i + self.sample_min_t) for i in t_start]
+        t_tiles = [self.encoder(x[:, :, s]) for s in t_slice if s.stop <= t]
+        t_tiles = [x[:, :, self.latent_ovr_t :] if i else x for i, x in enumerate(t_tiles)]
+        return torch.cat(t_tiles, dim=2)
+
+    def tiled_decoder(self, x, **kwargs) -> torch.Tensor:
+        if x.dim() == 4 or x.size(2) <= self.latent_min_t:
+            return self.decoder(x, **kwargs)
+        t = x.shape[2]
+        t_start = [i for i in range(0, t, self.latent_min_t - self.latent_ovr_t)]
+        t_slice = [slice(i, i + self.latent_min_t) for i in t_start]
+        t_tiles = [self.decoder(x[:, :, s], **kwargs) for s in t_slice if s.stop <= t]
+        t_tiles = [x[:, :, self.sample_ovr_t :] if i else x for i, x in enumerate(t_tiles)]
+        return torch.cat(t_tiles, dim=2)
+
+
+class HybridMixin(object):
+    """Base class for hybrid module."""
+
+    def forward(self, x) -> torch.Tensor:
+        return self.forward_image(x) if x.dim() == 4 else self.forward_video(x)
diff --git a/URSA/diffnext/models/autoencoders/quantizers.py b/URSA/diffnext/models/autoencoders/quantizers.py
new file mode 100644
index 0000000000000000000000000000000000000000..718533d4b8dde0fd8a5f4778fc9d2b9b386d4730
--- /dev/null
+++ b/URSA/diffnext/models/autoencoders/quantizers.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Discrete quantizers."""
+
+import torch
+from torch import nn
+
+
+class VQuantizer(nn.Identity):
+    """Vector Quantizer."""
+
+    def __init__(self, n_e, vq_embed_dim):
+        super(VQuantizer, self).__init__()
+        self.n_e, self.vq_embed_dim = n_e, vq_embed_dim
+        self.embedding = nn.Embedding(n_e, vq_embed_dim)
+
+    def quantize(self, z: torch.Tensor) -> torch.Tensor:
+        """Quantize z to indices."""
+        z = self.forward(z)
+        ids = nn.functional.linear(z.transpose(1, -1), self.embedding.weight).argmax(-1).int()
+        return ids.permute(0, 2, 3, 1) if ids.dim() > 3 else ids.permute(0, 2, 1)
+
+    def dequantize(self, ids) -> torch.Tensor:
+        """Dequantize indices to z."""
+        z = self.embedding(self.forward(ids))
+        return z.permute(0, 4, 1, 2, 3) if z.dim() > 4 else z.permute(0, 3, 1, 2)
+
+
+class LFQuantizer(nn.Identity):
+    """Lookup-Free Quantizer."""
+
+    def __init__(self, n_e, vq_embed_dim):
+        super(LFQuantizer, self).__init__()
+        self.n_e, self.vq_embed_dim = n_e, vq_embed_dim
+        self.embedding = nn.Embedding(n_e, vq_embed_dim)
+        del self.embedding.weight
+        basis = 2 ** torch.arange(vq_embed_dim - 1, -1, -1, dtype=torch.int32)
+        weight = 2 * torch.arange(n_e).unsqueeze(-1).bitwise_and(basis).ne(0).float() - 1
+        self.register_buffer("basis", basis, persistent=False)
+        self.embedding.register_buffer("weight", weight, persistent=False)
+
+    def quantize(self, z: torch.Tensor) -> torch.Tensor:
+        """Quantize z to indices."""
+        ids = self.forward(z).transpose(1, -1).gt(0).int().mul(self.basis).sum(-1)
+        return ids.permute(0, 2, 3, 1) if ids.dim() > 3 else ids.permute(0, 2, 1)
+
+    def dequantize(self, ids) -> torch.Tensor:
+        """Dequantize indices to z."""
+        z = self.embedding(self.forward(ids))
+        return z.permute(0, 4, 1, 2, 3) if z.dim() > 4 else z.permute(0, 3, 1, 2)
+
+
+class FSQuantizer(nn.Identity):
+    """Finite Scalar Quantizer."""
+
+    def __init__(self, levels=(8, 8, 8, 5, 5, 5)):
+        super(FSQuantizer, self).__init__()
+        self.n_e, self.vq_embed_dim = torch.Size(levels).numel(), len(levels)
+        basis = torch.cumprod(torch.tensor([1] + list(levels[:-1])), dim=0, dtype=torch.int32)
+        self.register_buffer("scalar", torch.zeros(0), persistent=False)  # Dummy dtype indicator.
+        self.register_buffer("levels", torch.tensor(levels, dtype=torch.int32), persistent=False)
+        self.register_buffer("half_width", self.levels // 2, persistent=False)  # For normalization.
+        self.register_buffer("basis", basis, persistent=False)  # Quantization basis.
+
+    def bound(self, z: torch.Tensor, eps: float = 1e-3) -> torch.Tensor:
+        """Bound z."""
+        half_l = (self.levels - 1) * (1 + eps) / 2
+        offset = torch.where(self.levels % 2 == 0, 0.5, 0.0)
+        shift = (offset / half_l).atanh()
+        return (z + shift).tanh() * half_l - offset
+
+    def quantize(self, z: torch.Tensor) -> torch.Tensor:
+        """Quantize z to indices."""
+        z_q = self.bound(self.forward(z.transpose(1, -1))).round()
+        ids = (z_q + self.half_width).mul(self.basis).sum(-1).int()
+        return ids.permute(0, 2, 3, 1) if ids.dim() > 3 else ids.permute(0, 2, 1)
+
+    def dequantize(self, ids) -> torch.Tensor:
+        """Dequantize indices to z."""
+        ids = self.forward(ids)
+        z_q = ids.unsqueeze(-1).floor_divide(self.basis).fmod(self.levels) - self.half_width
+        z = z_q.div(self.half_width).to(self.scalar.dtype)
+        return z.permute(0, 4, 1, 2, 3) if z.dim() > 4 else z.permute(0, 3, 1, 2)
diff --git a/URSA/diffnext/models/autoencoders/wavelets_utils.py b/URSA/diffnext/models/autoencoders/wavelets_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..50bb6ab0379c84450ac3395103778dd1d502fb31
--- /dev/null
+++ b/URSA/diffnext/models/autoencoders/wavelets_utils.py
@@ -0,0 +1,117 @@
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Wavelets utilities.
+
+References:
+
+[Cosmos](https://github.com/nvidia-cosmos/cosmos-predict1/blob/main/cosmos_predict1/tokenizer/modules/patching.py)
+"""
+
+import math
+
+import torch
+from torch import nn
+
+
+class Patcher2D(nn.Module):
+    """2D discrete wavelet transform."""
+
+    def __init__(self, patch_size=4):
+        super(Patcher2D, self).__init__()
+        self.rescale_factor = 2
+        self.patch_size, self.num_strides = patch_size, int(math.log2(patch_size))
+        wavelets1 = torch.tensor([0.7071067811865476] * 2)
+        wavelets2 = wavelets1 * ((-1) ** torch.arange(2))
+        self.register_buffer("wavelets1", wavelets1, persistent=False)
+        self.register_buffer("wavelets2", wavelets2, persistent=False)
+
+    def dwt(self, x) -> torch.Tensor:
+        g = x.size(1)
+        hl = self.wavelets1.flip(0).view(1, 1, -1).repeat(g, 1, 1)
+        hh = self.wavelets2.view(1, 1, -1).repeat(g, 1, 1)
+        x1, out = nn.functional.pad(x, (0, 1, 0, 1), "reflect"), []
+        for w1 in (hl, hh):
+            x2 = nn.functional.conv2d(x1, w1[:, :, None, :], stride=(1, 2), groups=g)
+            for w2 in (hl, hh):
+                out.append(nn.functional.conv2d(x2, w2[:, :, :, None], stride=(2, 1), groups=g))
+        return torch.cat(out, dim=1).mul_(1 / self.rescale_factor)
+
+    def idwt(self, x) -> torch.Tensor:
+        g = x.size(1) // 4
+        hl = self.wavelets1.flip([0]).view(1, 1, -1).repeat([g, 1, 1])
+        hh = self.wavelets2.view(1, 1, -1).repeat(g, 1, 1)
+        out = list(torch.chunk(x, 4, dim=1))
+        for i in range(2):
+            for j, w in enumerate((hl, hh)):
+                x, w = out[i * 2 + j], w[:, :, :, None]
+                out.append(nn.functional.conv_transpose2d(x, w, stride=(2, 1), groups=g))
+        out = [out[i] + out[i + 1] for i in range(4, 8, 2)]
+        for j, w in enumerate((hl, hh)):
+            x, w = out[j], w[:, :, None, :]
+            out.append(nn.functional.conv_transpose2d(x, w, stride=(1, 2), groups=g))
+        return out[2].add(out[3]).mul_(self.rescale_factor)
+
+    def forward(self, x) -> torch.Tensor:
+        for _ in range(self.num_strides):
+            x = self.dwt(x)
+        return x
+
+
+class Patcher3D(Patcher2D):
+    """3D discrete wavelet transform."""
+
+    def __init__(self, patch_size=4):
+        super(Patcher3D, self).__init__(patch_size)
+        self.rescale_factor = 2 * 2**0.5
+
+    def dwt(self, x) -> torch.Tensor:
+        g = x.size(1)
+        hl = self.wavelets1.flip(0).view(1, 1, -1).repeat(g, 1, 1)
+        hh = self.wavelets2.view(1, 1, -1).repeat(g, 1, 1)
+        x1, out = nn.functional.pad(x, (0, 1, 0, 1, 0, 1), "reflect"), []
+        for w1 in (hl, hh):
+            x2 = nn.functional.conv3d(x1, w1[:, :, :, None, None], stride=(2, 1, 1), groups=g)
+            for w2 in (hl, hh):
+                x3 = nn.functional.conv3d(x2, w2[:, :, None, :, None], stride=(1, 2, 1), groups=g)
+                for w3 in (hl, hh):
+                    w3 = w3[:, :, None, None, :]
+                    out.append(nn.functional.conv3d(x3, w3, stride=(1, 1, 2), groups=g))
+        return torch.cat(out, dim=1).mul_(1.0 / self.rescale_factor)
+
+    def idwt(self, x) -> torch.Tensor:
+        g = x.size(1) // 8
+        hl = self.wavelets1.flip([0]).view(1, 1, -1).repeat([g, 1, 1])
+        hh = self.wavelets2.view(1, 1, -1).repeat(g, 1, 1)
+        out = list(torch.chunk(x, 8, dim=1))
+        for i in range(4):
+            for j, w in enumerate((hl, hh)):
+                x, w = out[i * 2 + j], w[:, :, None, None, :]
+                out.append(nn.functional.conv_transpose3d(x, w, stride=(1, 1, 2), groups=g))
+        out = [out[i] + out[i + 1] for i in range(8, 16, 2)]
+        for i in range(2):
+            for j, w in enumerate((hl, hh)):
+                x, w = out[i * 2 + j], w[:, :, None, :, None]
+                out.append(nn.functional.conv_transpose3d(x, w, stride=(1, 2, 1), groups=g))
+        out = [out[i] + out[i + 1] for i in range(4, 8, 2)]
+        for j, w in enumerate((hl, hh)):
+            x, w = out[j], w[:, :, :, None, None]
+            out.append(nn.functional.conv_transpose3d(x, w, stride=(2, 1, 1), groups=g))
+        return out[2].add(out[3]).mul_(self.rescale_factor)
+
+    def forward(self, x) -> torch.Tensor:
+        x = torch.cat([x[:, :, :1].repeat_interleave(self.patch_size, 2), x[:, :, 1:]], 2)
+        for _ in range(self.num_strides):
+            x = self.dwt(x)
+        return x
diff --git a/URSA/diffnext/models/diffusion_mlp.py b/URSA/diffnext/models/diffusion_mlp.py
new file mode 100644
index 0000000000000000000000000000000000000000..308fde7ac859d913a937ac595de3485456fe90d5
--- /dev/null
+++ b/URSA/diffnext/models/diffusion_mlp.py
@@ -0,0 +1,99 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Diffusion MLP."""
+
+import torch
+from torch import nn
+from torch.utils.checkpoint import checkpoint as apply_ckpt
+
+from diffnext.models.embeddings import PatchEmbed
+from diffnext.models.normalization import AdaLayerNormZero
+
+
+class Projector(nn.Module):
+    """MLP Projector layer."""
+
+    def __init__(self, dim, mlp_dim=None, out_dim=None):
+        super(Projector, self).__init__()
+        self.fc1 = nn.Linear(dim, mlp_dim or dim)
+        self.fc2 = nn.Linear(mlp_dim or dim, out_dim or dim)
+        self.activation = nn.SiLU()
+
+    def forward(self, x) -> torch.Tensor:
+        return self.fc2(self.activation(self.fc1(x)))
+
+
+class DiffusionBlock(nn.Module):
+    """Diffusion block."""
+
+    def __init__(self, dim):
+        super(DiffusionBlock, self).__init__()
+        self.dim, self.mlp_checkpointing = dim, False
+        self.norm1 = AdaLayerNormZero(dim, num_stats=3, eps=1e-6)
+        self.proj, self.norm2 = Projector(dim, dim, dim), nn.LayerNorm(dim)
+
+    def forward(self, x, z) -> torch.Tensor:
+        if self.mlp_checkpointing and x.requires_grad:
+            h, (gate,) = apply_ckpt(self.norm1, x, z, use_reentrant=False)
+            return self.norm2(apply_ckpt(self.proj, h, use_reentrant=False)).mul(gate).add_(x)
+        h, (gate,) = self.norm1(x, z)
+        return self.norm2(self.proj(h)).mul(gate).add_(x)
+
+
+class TimeCondEmbed(nn.Module):
+    """Time-Condition embedding layer."""
+
+    def __init__(self, cond_dim, embed_dim, freq_dim=256):
+        super(TimeCondEmbed, self).__init__()
+        self.timestep_proj = Projector(freq_dim, embed_dim, embed_dim)
+        self.condition_proj = Projector(cond_dim, embed_dim, embed_dim)
+        self.freq_dim, self.time_freq = freq_dim, None
+
+    def get_freq_embed(self, timestep, dtype) -> torch.Tensor:
+        if self.time_freq is None:
+            dim, log_theta = self.freq_dim // 2, 9.210340371976184  # math.log(10000)
+            freq = torch.arange(dim, dtype=torch.float32, device=timestep.device)
+            self.time_freq = freq.mul(-log_theta / dim).exp().unsqueeze(0)
+        emb = timestep.unsqueeze(-1).float() * self.time_freq
+        return torch.cat([emb.cos(), emb.sin()], dim=-1).to(dtype=dtype)
+
+    def forward(self, timestep, z) -> torch.Tensor:
+        t = self.timestep_proj(self.get_freq_embed(timestep, z.dtype))
+        return self.condition_proj(z).add_(t.unsqueeze_(1) if t.dim() == 2 else t)
+
+
+class DiffusionMLP(nn.Module):
+    """Diffusion MLP model."""
+
+    def __init__(self, depth, embed_dim, cond_dim, patch_size=2, image_dim=4):
+        super(DiffusionMLP, self).__init__()
+        self.patch_embed = PatchEmbed(image_dim, embed_dim, patch_size)
+        self.time_cond_embed = TimeCondEmbed(cond_dim, embed_dim)
+        self.blocks = nn.ModuleList(DiffusionBlock(embed_dim) for _ in range(depth))
+        self.norm = AdaLayerNormZero(embed_dim, num_stats=2, eps=1e-6)
+        self.head = nn.Linear(embed_dim, patch_size**2 * image_dim)
+
+    def forward(self, x, timestep, z, pred_ids=None) -> torch.Tensor:
+        x, o = self.patch_embed(x), None if pred_ids is None else x
+        o = None if pred_ids is None else self.patch_embed.patchify(o)
+        x = x if pred_ids is None else x.gather(1, pred_ids.expand(-1, -1, x.size(-1)))
+        z = z if pred_ids is None else z.gather(1, pred_ids.expand(-1, -1, z.size(-1)))
+        z = self.time_cond_embed(timestep, z)
+        for blk in self.blocks:
+            x = blk(x, z)
+        x = self.norm(x, z)[0]
+        x = self.head(x)
+        return x if pred_ids is None else o.scatter(1, pred_ids.expand(-1, -1, x.size(-1)), x)
diff --git a/URSA/diffnext/models/diffusion_transformer.py b/URSA/diffnext/models/diffusion_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..7eb9764cae6967efe9a9466e538b71fe8168319c
--- /dev/null
+++ b/URSA/diffnext/models/diffusion_transformer.py
@@ -0,0 +1,151 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Diffusion Transformer."""
+
+from functools import partial
+from typing import Tuple
+
+import torch
+from torch import nn
+from torch.utils.checkpoint import checkpoint as apply_ckpt
+
+from diffnext.models.embeddings import PatchEmbed, RotaryEmbed3D
+from diffnext.models.normalization import AdaLayerNormZero, AdaLayerNormSingle
+from diffnext.models.diffusion_mlp import Projector, TimeCondEmbed
+
+
+class TimeEmbed(TimeCondEmbed):
+    """Time embedding layer."""
+
+    def __init__(self, embed_dim, freq_dim=256):
+        nn.Module.__init__(self)
+        self.timestep_proj = Projector(freq_dim, embed_dim, embed_dim)
+        self.freq_dim, self.time_freq = freq_dim, None
+
+    def forward(self, timestep) -> torch.Tensor:
+        dtype = self.timestep_proj.fc1.weight.dtype
+        temb = self.timestep_proj(self.get_freq_embed(timestep, dtype))
+        return temb.unsqueeze_(1) if temb.dim() == 2 else temb
+
+
+class MLP(nn.Module):
+    """Two layers MLP."""
+
+    def __init__(self, dim, mlp_ratio=4):
+        super(MLP, self).__init__()
+        self.fc1 = nn.Linear(dim, int(dim * mlp_ratio))
+        self.fc2 = nn.Linear(int(dim * mlp_ratio), dim)
+        self.activation = nn.GELU()
+
+    def forward(self, x) -> torch.Tensor:
+        return self.fc2(self.activation(self.fc1(x)))
+
+
+class Attention(nn.Module):
+    """Multihead attention."""
+
+    def __init__(self, dim, num_heads, qkv_bias=True):
+        super(Attention, self).__init__()
+        self.num_heads, self.head_dim = num_heads, dim // num_heads
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.proj, self.pe_func = nn.Linear(dim, dim), None
+
+    def forward(self, x) -> torch.Tensor:
+        qkv_shape = [-1, x.size(1), 3, self.num_heads, self.head_dim]
+        q, k, v = self.qkv(x).view(qkv_shape).permute(2, 0, 3, 1, 4).unbind(dim=0)
+        q, k = (self.pe_func(q), self.pe_func(k)) if self.pe_func else (q, k)
+        o = nn.functional.scaled_dot_product_attention(q, k, v)
+        return self.proj(o.transpose(1, 2).flatten(2))
+
+
+class Block(nn.Module):
+    """Transformer block."""
+
+    def __init__(self, dim, num_heads, mlp_ratio=4, qkv_bias=True, modulation_type=None):
+        super(Block, self).__init__()
+        self.modulation = (modulation_type or AdaLayerNormZero)(dim, num_stats=6, eps=1e-6)
+        self.norm1, self.norm2 = nn.LayerNorm(dim), nn.LayerNorm(dim)
+        self.attn = Attention(dim, num_heads, qkv_bias=qkv_bias)
+        self.mlp = MLP(dim, mlp_ratio=mlp_ratio)
+        self.attn_checkpointing = self.mlp_checkpointing = self.stg_skip = False
+
+    def forward_modulation(self, x, z) -> Tuple[torch.Tensor, Tuple[torch.Tensor]]:
+        return self.modulation(x, z)
+
+    def forward_attn(self, x) -> torch.Tensor:
+        return self.norm1(self.attn(x))
+
+    def forward_mlp(self, x) -> torch.Tensor:
+        return self.norm2(self.mlp(x))
+
+    def forward_ckpt(self, x, name) -> torch.Tensor:
+        if getattr(self, f"{name}_checkpointing", False) and x.requires_grad:
+            return apply_ckpt(getattr(self, f"forward_{name}"), x, use_reentrant=False)
+        return getattr(self, f"forward_{name}")(x)
+
+    def forward(self, x, z, pe_func: callable = None) -> torch.Tensor:
+        self.attn.pe_func = pe_func
+        stg_x = x.chunk(3)[-1] if self.stg_skip else None
+        if self.mlp_checkpointing and x.requires_grad:
+            x, stats = apply_ckpt(self.forward_modulation, x, z, use_reentrant=False)
+        else:
+            x, stats = self.forward_modulation(x, z)
+        gate_msa, scale_mlp, shift_mlp, gate_mlp = stats
+        x = self.forward_ckpt(x, "attn").mul(gate_msa).add_(x)
+        x = self.modulation.norm(x).mul(1 + scale_mlp).add_(shift_mlp)
+        x = self.forward_ckpt(x, "mlp").mul(gate_mlp).add_(x)
+        return torch.cat(x.chunk(3)[:2] + (stg_x,)) if self.stg_skip else x
+
+
+class DiffusionTransformer(nn.Module):
+    """Diffusion transformer."""
+
+    def __init__(
+        self,
+        depth,
+        embed_dim,
+        num_heads,
+        mlp_ratio=4,
+        patch_size=2,
+        image_size=32,
+        image_dim=None,
+        modulation=True,
+    ):
+        super(DiffusionTransformer, self).__init__()
+        final_norm = AdaLayerNormSingle if modulation else AdaLayerNormZero
+        block = partial(Block, modulation_type=AdaLayerNormSingle) if modulation else Block
+        self.embed_dim, self.image_size, self.image_dim = embed_dim, image_size, image_dim
+        self.patch_embed = PatchEmbed(image_dim, embed_dim, patch_size)
+        self.time_embed = TimeEmbed(embed_dim)
+        self.modulation = AdaLayerNormZero(embed_dim, num_stats=6, eps=1e-6) if modulation else None
+        self.rope = RotaryEmbed3D(embed_dim // num_heads)
+        self.blocks = nn.ModuleList(block(embed_dim, num_heads, mlp_ratio) for _ in range(depth))
+        self.norm = final_norm(embed_dim, num_stats=2, eps=1e-6)
+        self.head = nn.Linear(embed_dim, patch_size**2 * image_dim)
+
+    def prepare_pe(self, c=None, pos=None) -> Tuple[callable, callable]:
+        return self.rope.get_func(pos, pad=0 if c is None else c.size(1))
+
+    def forward(self, x, timestep, c=None, pos=None) -> torch.Tensor:
+        x = self.patch_embed(x)
+        t = self.time_embed(timestep)
+        z = self.modulation.proj(self.modulation.activation(t)) if self.modulation else t
+        pe = self.prepare_pe(c, pos) if pos is not None else None
+        x = x if c is None else torch.cat([c, x], dim=1)
+        for blk in self.blocks:
+            x = blk(x, z, pe)
+        x = self.norm(x if c is None else x[:, c.size(1) :], t)[0]
+        return self.head(x)
diff --git a/URSA/diffnext/models/embeddings.py b/URSA/diffnext/models/embeddings.py
new file mode 100644
index 0000000000000000000000000000000000000000..da1ddb70df96f7201e3c07ddd760f6d81e7042d7
--- /dev/null
+++ b/URSA/diffnext/models/embeddings.py
@@ -0,0 +1,361 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Embedding layers."""
+
+import sys
+from typing import List, Tuple, Union
+
+import numpy as np
+import scipy.stats as stats
+import torch
+from torch import nn
+
+
+class FlexRotaryEmbedding(nn.Identity):
+    """Flexible rotary position embedding layer."""
+
+    class PEFunc(object):
+        """Apply RoPE weight to Q/K tensor."""
+
+        def __init__(self, weight: torch.Tensor):
+            self.weight = weight
+
+        @torch.compile(fullgraph=True, disable=sys.platform != "linux")
+        def interleaved_impl(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
+            return w[..., 0].mul(x[..., 0]).add_(w[..., 1] * x[..., 1]).flatten(3)
+
+        @torch.compile(fullgraph=True, disable=sys.platform != "linux")
+        def partitioned_impl(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
+            return w[..., 0].mul(x[:, :, :, 0]).add_(w[..., 1] * x[:, :, :, 1]).flatten(3)
+
+        def __call__(self, x: torch.Tensor, interleaved=False) -> torch.Tensor:
+            w = self.weight = self.weight.to(dtype=x.dtype)
+            x = x.unflatten(-1, (-1, 1, 2) if interleaved else (2, -1, 1))
+            return (self.interleaved_impl if interleaved else self.partitioned_impl)(x, w)
+
+    @staticmethod
+    def from_config(config):
+        head_dim = getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)
+        # return FlexRotaryEmbedding(head_dim, base=config.rope_theta)
+        base = getattr(config, "rope_theta", None)
+        if base is None and hasattr(config, "to_dict"):
+            base = config.to_dict().get("rope_theta", None)
+        if base is None:
+            base = 10000.0
+        return FlexRotaryEmbedding(head_dim, base=float(base))
+
+    def __init__(self, dim=128, base=10000.0):
+        super(FlexRotaryEmbedding, self).__init__()
+        self.dim, self.base = dim, base
+        self.rep1, self.rep2 = dim // 8, (dim // 2 - dim // 8 * 3) // 2
+        self.register_buffer("scale", torch.arange(0, dim, 2).float() / dim, persistent=False)
+
+    def get_pos(self, input_shape, shift=0, has_bov=True) -> torch.Tensor:
+        num_blocks = 1 if len(input_shape) < 4 else input_shape[-4]
+        block_size = 1 if len(input_shape) < 3 else input_shape[-3]
+        grid_shape = [num_blocks * block_size] + list(input_shape[-2:])
+        pos = torch.zeros(grid_shape + [3], dtype=torch.int32, device=self.scale.device)
+        grid = [torch.arange(_, device=pos.device) for _ in grid_shape]
+        [pos[..., i].add_(grid[i].view([-1 if i == j else 1 for j in range(3)])) for i in range(3)]
+        pos, device = pos.unflatten(0, (-1, block_size)).flatten(1, 3), pos.device
+        bov_pos = torch.arange(num_blocks, device=device).view(-1, 1, 1).repeat(1, 1, 3)
+        pos[..., 0] += torch.arange(num_blocks, device=device).view(-1, 1).add_(shift + has_bov)
+        return torch.cat([bov_pos.mul(block_size + 1).add(shift), pos], 1) if has_bov else pos
+
+    def get_func(self, pos: torch.Tensor, *args, **kwargs) -> PEFunc:
+        t = torch.cat([pos.repeat(1, 1, self.rep1), pos[..., 1:].repeat(1, 1, self.rep2)], -1)
+        freq = t * torch.pow(self.base, self.scale.float()).reciprocal_().unsqueeze(0)
+        freq = torch.stack([freq.cos(), -freq.sin(), freq.sin(), freq.cos()], dim=-1)
+        return self.PEFunc(freq.view(freq.shape[:-1] + (2, 2)).unsqueeze(2))
+
+
+class RotaryEmbed3D(nn.Identity):
+    """3D rotary position embedding layer."""
+
+    class PEFunc(object):
+        """Apply RoPE weight to Q/K tensor."""
+
+        def __init__(self, weight: torch.Tensor):
+            self.weight = weight
+
+        @torch.compile(fullgraph=True, disable=sys.platform != "linux")
+        def call_impl(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
+            return w[..., 0].mul(x[..., 0]).add_(w[..., 1] * x[..., 1]).flatten(3)
+
+        def __call__(self, x: torch.Tensor) -> torch.Tensor:
+            x = x.view(*x.shape[:-1], -1, 1, 2)
+            w = self.weight = self.weight.to(dtype=x.dtype)
+            return self.call_impl(x, w)
+
+    def __init__(self, dim=64, base_size=(16, 16), theta=10000.0):
+        super(RotaryEmbed3D, self).__init__()
+        self.dim, self.base_size, self.theta = dim, base_size, theta
+        for i, rotary_dim in enumerate(([dim // 8] + [(dim - dim // 8) // 2] * 2)):
+            scale = torch.arange(0, rotary_dim, 2).float().div_(rotary_dim)
+            self.register_buffer("scale%d" % i, scale, persistent=False)
+
+    def get_pos(self, t=1, bs=1, hw=None) -> torch.Tensor:
+        thw = [t] + list(hw or self.base_size)
+        pos = torch.zeros(thw + [3], device=self.scale1.device)
+        grid = [torch.arange(_, device=self.scale1.device) for _ in thw]
+        [pos[..., i].add_(grid[i].view([-1 if i == j else 1 for j in range(3)])) for i in range(3)]
+        return pos.view(1, -1, 3).expand(bs, -1, -1)
+
+    def get_func(self, pos: torch.Tensor, pad=0, ids: torch.Tensor = None) -> PEFunc:
+        pos, weight = pos.gather(1, ids) if ids is not None else pos, []
+        pos = nn.functional.pad(pos, (0, 0, pad, 0), value=0) if pad else pos
+        for i, grid in enumerate(pos.split(1, dim=-1)):
+            freq = torch.pow(self.theta, getattr(self, "scale%d" % i).float())
+            freq = grid * freq.reciprocal().unsqueeze(0)
+            freq = torch.stack([freq.cos(), -freq.sin(), freq.sin(), freq.cos()], dim=-1)
+            weight += [freq.view(freq.shape[:-1] + (2, 2))]
+        return self.PEFunc(torch.cat(weight, dim=-3).unsqueeze(1))
+
+
+class PosEmbed(nn.Module):
+    """Position embedding layer."""
+
+    def __init__(self, dim, base_size=(16, 16)):
+        super(PosEmbed, self).__init__()
+        (self.base_h, self.base_w), self.space_embed = base_size, None
+        self.freq_hw = 1 / (10000 ** (torch.arange(dim // 4, dtype=torch.float32) / (dim // 4)))
+
+    def get_space_embed(self, device=None, dtype=None) -> torch.Tensor:
+        h, w = self.base_h, self.base_w
+        if self.space_embed is not None and self.space_embed.size(0) == h * w:
+            return self.space_embed
+        grid_h = torch.arange(h, dtype=torch.float32) * (self.base_h / h)
+        grid_w = torch.arange(w, dtype=torch.float32) * (self.base_w / w)
+        grid_w, grid_h = torch.meshgrid(grid_w, grid_h, indexing="xy")
+        freq_w, freq_h = [_.reshape(-1, 1) * self.freq_hw.unsqueeze(0) for _ in (grid_w, grid_h)]
+        embed = torch.cat([freq_w.sin(), freq_w.cos(), freq_h.sin(), freq_h.cos()], dim=-1)
+        self.space_embed = embed.to(device=device, dtype=dtype)
+        return self.space_embed
+
+    def forward(self, x) -> torch.Tensor:
+        return x.add_(self.get_space_embed(x.device, x.dtype))
+
+
+class VideoPosEmbed(PosEmbed):
+    """Video position embedding layer."""
+
+    def __init__(self, dim, base_size):
+        super(VideoPosEmbed, self).__init__(dim, base_size=base_size[1:])
+        self.base_t, self.time_embed, self.norm = base_size[0], None, nn.LayerNorm(dim)
+        self.time_proj = nn.Sequential(nn.Linear(256, dim), nn.SiLU(), nn.Linear(dim, dim))
+        self.freq_t = 1 / (10000 ** (torch.arange(128, dtype=torch.float32).unsqueeze(0) / 128))
+
+    def get_time_embed(self, t) -> torch.Tensor:
+        if self.time_embed is not None and t == self.time_embed.size(0):
+            return self.norm(self.time_proj(self.time_embed))
+        device, dtype = self.time_proj[0].weight.device, self.time_proj[0].weight.dtype
+        grid = torch.arange(t, dtype=torch.float32) / (t / self.base_t)
+        freq_t = grid.view(-1, 1, 1).mul(self.freq_t)
+        sincos = torch.cat([freq_t.sin(), freq_t.cos()], dim=-1)
+        self.time_embed = sincos.to(device=device, dtype=dtype)
+        return self.norm(self.time_proj(self.time_embed))
+
+    def forward(self, x) -> torch.Tensor:
+        x = x.add_(self.get_time_embed(x.size(-3))) if x.dim() == 4 else x
+        return x.add_(self.get_space_embed(x.device, x.dtype))
+
+
+class MotionEmbed(nn.Module):
+    """Motion embedding layer."""
+
+    def __init__(self, dim, base_flow=5, base_fps=12):
+        super(MotionEmbed, self).__init__()
+        self.base_flow, self.base_fps = base_flow, base_fps
+        self.flow_proj = nn.Sequential(nn.Linear(256, dim), nn.SiLU(), nn.Linear(dim, dim))
+        self.fps_proj = nn.Sequential(nn.Linear(256, dim), nn.SiLU(), nn.Linear(dim, dim))
+        self.freq_m = 1 / (10000 ** (torch.arange(128, dtype=torch.float32).unsqueeze(0) / 128))
+
+    def get_embed(self, c, x, k) -> torch.Tensor:
+        x = [getattr(self, f"base_{k}")] * c.size(0) if x is None else x
+        freq_m = torch.as_tensor(x).view(-1, 1, 1).float().mul(self.freq_m)
+        sincos = torch.cat([freq_m.sin(), freq_m.cos()], dim=-1)
+        return getattr(self, f"{k}_proj")(sincos.to(device=c.device, dtype=c.dtype))
+
+    def forward(self, c, flow=None, fps=None) -> torch.Tensor:
+        outputs = [self.get_embed(c, x, k) for k, x in [("flow", flow), ("fps", fps)]]
+        return torch.cat(outputs, dim=1) if len(outputs) > 1 else outputs[0]
+
+
+class PatchEmbed(nn.Module):
+    """Patch embedding layer."""
+
+    def __init__(self, image_dim, embed_dim, patch_size):
+        super(PatchEmbed, self).__init__()
+        self.patch_size = patch_size
+        self.image_dim, self.height, self.width = image_dim, None, None
+        self.proj = nn.Conv2d(image_dim, embed_dim, patch_size, patch_size)
+
+    @property
+    def hw(self) -> Tuple[int, int]:
+        return self.height, self.width
+
+    def patchify(self, x) -> torch.Tensor:
+        x = x.view(-1, self.image_dim, self.height, self.patch_size, self.width, self.patch_size)
+        return x.permute(0, 2, 4, 3, 5, 1).flatten(1, 2).flatten(2, 4).contiguous()
+
+    def unpatchify(self, x) -> torch.Tensor:
+        x = x.view(-1, self.height, self.width, self.patch_size, self.patch_size, self.image_dim)
+        return x.permute(0, 5, 1, 3, 2, 4).flatten(2, 3).flatten(3, 4).contiguous()
+
+    def forward(self, x) -> torch.Tensor:
+        flat_shape = (x.size(0), x.size(2)) if x.dim() == 5 else None
+        x = x.transpose(1, 2).flatten(0, 1) if x.dim() == 5 else x
+        self.width = x.size(-1) // self.patch_size if x.dim() == 4 else self.width
+        self.height = x.size(-2) // self.patch_size if x.dim() == 4 else self.height
+        x = self.proj(x).flatten(2).transpose(1, 2) if x.dim() == 4 else x
+        return x.view(flat_shape + x.shape[1:]) if flat_shape else x
+
+
+class TextEmbed(nn.Module):
+    """Encode text tokens into embeddings."""
+
+    def __init__(self, token_dim, embed_dim, num_tokens=256, dropout=0.1):
+        super(TextEmbed, self).__init__()
+        self.token_dim, self.num_tokens, self.encoders = token_dim, num_tokens, []
+        self.proj, self.norm = nn.Linear(token_dim, embed_dim), nn.LayerNorm(embed_dim)
+        self.register_buffer("weight", torch.zeros(512, token_dim))  # Maximum positions.
+        _, self.dropout, self.mask = nn.init.normal_(self.weight, std=0.02), dropout, []
+
+    @torch.no_grad()
+    def encode_prompts(self, prompts, prompt_size=None) -> torch.Tensor:
+        device, dtype = self.weight.device, self.weight.dtype
+        x = self.weight[: self.num_tokens].expand(len(prompts), -1, -1).clone()
+        for i, p in enumerate(prompts if not isinstance(prompts[0], str) else []):
+            if self.training and self.dropout > 0 and np.random.rand() < self.dropout:
+                continue
+            x[i, : p.shape[0]] = torch.as_tensor(p, device=device).to(dtype)
+        if not isinstance(prompts[0], str):
+            return x
+        tokenizer, encoder = self.encoders
+        trunc_args = {"max_length": self.num_tokens, "truncation": True}
+        pad_args = {"padding": "max_length", **trunc_args}
+        tokens = [tokenizer(p, **pad_args).input_ids for p in prompts]
+        maxlens = [len(tokenizer(p, **trunc_args).input_ids) for p in prompts]
+        tokens = torch.as_tensor(tokens, device=encoder.device)
+        embeds, x = encoder(tokens).last_hidden_state.to(dtype), x.to(encoder.device)
+        self.mask = [0] * (x.size(0) // prompt_size if prompt_size else 0)
+        for i, maxlen in enumerate([] if prompt_size else maxlens):
+            if self.training and self.dropout and np.random.rand() < self.dropout:
+                continue
+            x[i, :maxlen] = embeds[i, :maxlen]
+        for k in range(x.size(0) // prompt_size if prompt_size else 0):
+            if np.random.rand() < self.dropout:
+                self.mask[k] = 1
+                continue
+            for j in range(prompt_size):
+                if j and np.random.rand() < self.dropout:
+                    continue
+                i, maxlen = k * prompt_size + j, maxlens[k * prompt_size + j]
+                x[i, :maxlen] = embeds[i, :maxlen]
+        return x
+
+    def apply_mask(self, x, mask_token=0) -> torch.Tensor:
+        """Apply the current mask to input."""
+        if len(self.mask) == 0:
+            return x
+        mask = torch.as_tensor(self.mask, device=x.device, dtype=x.dtype)
+        mask = mask.view([-1] + [1] * (x.dim() - 1))
+        return x.mul(1 - mask).add_(mask_token * mask)
+
+    def forward(self, x, prompt_size=None) -> torch.Tensor:
+        if isinstance(x, (tuple, list)):
+            return self.norm(self.proj(self.encode_prompts(x, prompt_size)))
+        return self.norm(self.proj(x))
+
+
+class LabelEmbed(nn.Module):
+    """Encode class labels into embeddings."""
+
+    def __init__(self, embed_dim, num_classes=1000, dropout=0.1):
+        super(LabelEmbed, self).__init__()
+        self.dropout, self.num_classes = dropout, num_classes
+        self.weight = nn.Parameter(torch.zeros(num_classes + (dropout > 0), embed_dim))
+        _, self.norm = nn.init.normal_(self.weight, std=0.02), nn.LayerNorm(embed_dim)
+
+    def forward(self, input_ids):
+        input_ids = input_ids.unsqueeze(-1) if input_ids.dim() == 1 else input_ids
+        if self.training and self.dropout > 0:
+            keep = torch.rand(input_ids.size(), device=input_ids.device).gt(self.dropout)
+            input_ids = input_ids.where(keep, self.num_classes)
+        return self.norm(self.weight[input_ids])
+
+
+class MaskEmbed(nn.Module):
+    """Apply mask positions to input embeddings."""
+
+    def __init__(self, embed_dim, mask_ratios=(0.7, 1.0)):
+        super(MaskEmbed, self).__init__()
+        self.mask_ratios = list(mask_ratios) + ([0.25] if len(mask_ratios) == 2 else [])
+        self.bos_token = nn.Parameter(torch.zeros(1, embed_dim))
+        self.mask_token = nn.Parameter(torch.zeros(1, embed_dim))
+        [nn.init.normal_(_, std=0.02) for _ in (self.bos_token, self.mask_token)]
+        self.mask, self.attn_mask = None, None
+        self.pred_ids, self.pred_pos, self.generator = None, 0, None
+
+    def get_attn_lens(
+        self, x: Union[torch.Tensor, Tuple[torch.Tensor]], c: torch.Tensor = None
+    ) -> List[int]:
+        """Return the attention length according to inputs."""
+        lens = [_.shape[1:3].numel() for _ in x] if isinstance(x, (tuple, list)) else []
+        lens += [x.size(2)] * x.size(1) if not isinstance(x, (tuple, list)) else []
+        lens[0] += c.size(1) if c is not None else 0
+        return lens
+
+    def get_attn_mask(
+        self, x: Union[torch.Tensor, Tuple[torch.Tensor]], c: torch.Tensor = None, persistent=True
+    ) -> torch.Tensor:
+        """Return the attention mask according to inputs."""
+        if self.attn_mask is not None and persistent:
+            return self.attn_mask
+        if isinstance(x, (tuple, list)):
+            d = torch.cat([torch.full(_.shape[1:3], t) for t, _ in enumerate(x)]).flatten()
+        else:
+            d = torch.cat([torch.full([x.size(2)], i) for i in range(x.size(1))])
+        d = torch.cat([torch.full([c.size(1)], 0), d]) if c is not None else d
+        attn_mask = torch.where(d.unsqueeze(1).ge(d.unsqueeze(0)), 0, -float("inf"))
+        self.attn_mask = attn_mask.to(device=self.bos_token.device, dtype=self.bos_token.dtype)
+        return self.attn_mask
+
+    def get_pred_mask(self, num_preds) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Return the current mask for next prediction."""
+        if self.pred_ids is None:
+            u_dist = torch.empty_like(self.mask).uniform_(generator=self.generator)
+            self.pred_ids = u_dist.argsort(dim=1)
+        pred_ids = self.pred_ids[:, self.pred_pos : self.pred_pos + num_preds]
+        pred_mask = torch.zeros_like(self.mask).scatter_(1, pred_ids, 1)
+        self.pred_pos, self.mask = self.pred_pos + num_preds, self.mask.mul_(1 - pred_mask)
+        return pred_mask, pred_ids
+
+    def apply_mask(self, x) -> torch.Tensor:
+        """Apply the current mask to input."""
+        return x.mul(1 - self.mask).add_(self.mask_token * self.mask)
+
+    def forward(self, x) -> torch.Tensor:
+        if self.training:
+            u_dist = torch.rand(x.shape[:-1] + (1,), device=x.device)
+            a, b = [(v - 1) / self.mask_ratios[2] for v in self.mask_ratios[:2]]
+            mask_ratio = stats.truncnorm(a, b, loc=1, scale=self.mask_ratios[2]).rvs(1)[0]
+            prev_ids = u_dist.argsort(1)[:, : int(np.round((1 - mask_ratio) * u_dist.size(1)))]
+            self.mask = x.new_ones(u_dist.shape).scatter_(1, prev_ids, 0)
+            return self.apply_mask(x), prev_ids
+        if self.mask is None:
+            self.mask, self.pred_pos = x.new_ones(x.shape[:-1] + (1,)), 0
+        return self.apply_mask(x)
diff --git a/URSA/diffnext/models/flash_attention.py b/URSA/diffnext/models/flash_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..8bb2d268d36c1f25af7c79ec77f09bc7546d9dc8
--- /dev/null
+++ b/URSA/diffnext/models/flash_attention.py
@@ -0,0 +1,99 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Flash attention layers. Copied from https://github.com/Dao-AILab/flash-attention"""
+
+import torch
+
+# RoPE (Triton)
+try:
+    from flash_attn.layers.rotary import apply_rotary_emb
+except ImportError:
+    from einops import rearrange, repeat
+
+    def rotate_half(x, interleaved=False) -> torch.Tensor:
+        if not interleaved:
+            x1, x2 = x.chunk(2, dim=-1)
+            return torch.cat((-x2, x1), dim=-1)
+        x1, x2 = x[..., ::2], x[..., 1::2]
+        return rearrange(torch.stack((-x2, x1), dim=-1), "... d two -> ... (d two)", two=2)
+
+    def apply_rotary_emb(x, cos, sin, interleaved=False, inplace=False) -> torch.Tensor:
+        ro_dim = cos.shape[-1] * 2
+        cos = repeat(cos, "... d -> ... 1 (2 d)" if not interleaved else "... d -> ... 1 (d 2)")
+        sin = repeat(sin, "... d -> ... 1 (2 d)" if not interleaved else "... d -> ... 1 (d 2)")
+        return torch.cat(
+            [
+                x[..., :ro_dim] * cos + rotate_half(x[..., :ro_dim], interleaved) * sin,
+                x[..., ro_dim:],
+            ],
+            -1,
+        )
+
+
+# SwiGLU (TorchJIT)
+swiglu_fwd_codestring = """
+template <typename T> T swiglu_fwd(T x, T y) {
+    return float(x) * float(y) / (1.0f + ::exp(-float(x)));
+}
+"""
+swiglu_bwd_codestring = """
+template <typename T> void swiglu_bwd(T x, T y, T g, T& dx, T& dy) {
+    float x_sigmoid = 1.0f / (1.0f + ::exp(-float(x)));
+    dx = x_sigmoid * (1 + float(x) * (1.0f - x_sigmoid)) * float(g) * float(y);
+    dy = float(x) * x_sigmoid * float(g);
+}
+"""
+swiglu_fwd = torch.cuda.jiterator._create_jit_fn(swiglu_fwd_codestring)
+swiglu_bwd = torch.cuda.jiterator._create_multi_output_jit_fn(swiglu_bwd_codestring, num_outputs=2)
+
+
+class SwiGLUFunction(torch.autograd.Function):
+
+    @staticmethod
+    def forward(ctx, x, y):
+        ctx.save_for_backward(x, y)
+        return swiglu_fwd(x, y)
+
+    @staticmethod
+    def backward(ctx, dout):
+        x, y = ctx.saved_tensors
+        return swiglu_bwd(x, y, dout)
+
+
+swiglu = SwiGLUFunction.apply
+
+# RMSNorm (Triton)
+try:
+    from flash_attn.ops.triton.layer_norm import RMSNorm
+except ImportError:
+
+    class RMSNorm(torch.nn.Module):
+
+        def __init__(self, hidden_size, eps: float = 1e-6) -> None:
+            super().__init__()
+            self.weight = torch.nn.Parameter(torch.ones(hidden_size))
+            self.eps = eps
+
+        def forward(self, x: torch.Tensor) -> torch.Tensor:
+            x = x.mul(x.float().square().mean(-1, True).add_(self.eps).rsqrt().to(x.dtype))
+            return x * self.weight
+
+
+# CrossEntropy (Triton)
+try:
+    from flash_attn.ops.triton.cross_entropy import cross_entropy_loss
+except ImportError:
+    cross_entropy_loss = None
diff --git a/URSA/diffnext/models/flex_attention.py b/URSA/diffnext/models/flex_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..936cf987805fe80d970c5bb5df3d45d455bea55f
--- /dev/null
+++ b/URSA/diffnext/models/flex_attention.py
@@ -0,0 +1,81 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Flex attention layers."""
+
+from itertools import accumulate
+from typing import List
+
+import torch
+from torch import nn
+
+try:
+    from torch.nn.attention.flex_attention import create_block_mask
+    from torch.nn.attention.flex_attention import flex_attention
+except ImportError:
+    flex_attention = create_block_mask = None
+
+
+class FlexAttentionCausal2D(nn.Module):
+    """Block-wise causal flex attention."""
+
+    def __init__(self):
+        super(FlexAttentionCausal2D, self).__init__()
+        self.attn_func = self.offsets = self.flags = None
+        self.cu_offsets = self.block_mask = None
+
+    def set_offsets(self, offsets: List[int]):
+        """Set block-wise mask offsets."""
+        offsets = list(type(offsets)([0]) + offsets if offsets[0] != 0 else offsets)
+        if offsets != self.offsets:
+            self.offsets, self.block_mask = offsets, None
+
+    def set_offsets_by_lens(self, lens, flags=None):
+        """Set block-wise mask offsets by lengths."""
+        self.set_offsets(list(accumulate(type(lens)([0]) + lens if lens[0] != 0 else lens)))
+        self.flags = flags  # Bidirectional flags (-1: lower triangular, 1: full)
+
+    def get_mask_mod(self) -> callable:
+        """Return the mask modification."""
+        counts = self.cu_offsets[1:] - self.cu_offsets[:-1]
+        ids = torch.arange(len(counts), device=self.cu_offsets.device, dtype=torch.int32)
+        ids = ids.repeat_interleave(counts)
+        if self.flags is None:
+            return lambda b, h, qi, ki: (qi >= ki) | (ids[qi] == ids[ki])
+        flags = list(self.flags) + [-1] * (len(counts) - len(self.flags))
+        flags = torch.as_tensor(flags, device=self.cu_offsets.device, dtype=torch.int32)
+        flags = flags.repeat_interleave(counts)
+        return lambda b, h, qi, ki: (qi >= ki) | ((ids[qi] * flags[qi]) == ids[ki])
+
+    def get_attn_func(self) -> callable:
+        """Return the attention function."""
+        if flex_attention is None:
+            raise NotImplementedError(f"FlexAttn requires torch>=2.5 but got {torch.__version__}")
+        if self.attn_func is None:
+            self.attn_func = torch.compile(flex_attention)
+        return self.attn_func
+
+    def get_block_mask(self, q: torch.Tensor) -> torch.Tensor:
+        """Return the attention block mask according to inputs."""
+        if self.block_mask is not None:
+            return self.block_mask
+        b, h, q_len = q.shape[:3]
+        args = {"B": b, "H": h, "Q_LEN": q_len, "KV_LEN": q_len, "_compile": True}
+        self.cu_offsets = torch.as_tensor(self.offsets, device=q.device, dtype=torch.int32)
+        self.block_mask = create_block_mask(self.get_mask_mod(), **args)
+        return self.block_mask
+
+    def forward(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor) -> torch.Tensor:
+        return self.get_attn_func()(q, k, v, block_mask=self.get_block_mask(q), enable_gqa=True)
diff --git a/URSA/diffnext/models/guidance_scaler.py b/URSA/diffnext/models/guidance_scaler.py
new file mode 100644
index 0000000000000000000000000000000000000000..b684cc2db9b5f7c406de0be3da6e8e48377aff03
--- /dev/null
+++ b/URSA/diffnext/models/guidance_scaler.py
@@ -0,0 +1,87 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Classifier-free guidance scaler."""
+
+import torch
+
+
+class GuidanceScaler(object):
+    """Guidance scaler."""
+
+    def __init__(self, **kwargs):
+        self.guidance_scale = kwargs.get("guidance_scale", 1)
+        self.guidance_trunc = kwargs.get("guidance_trunc", 0)
+        self.guidance_renorm = kwargs.get("guidance_renorm", 1)
+        self.image_guidance_scale = kwargs.get("image_guidance_scale", 0)
+        self.spatiotemporal_guidance_scale = kwargs.get("spatiotemporal_guidance_scale", 0)
+        self.min_guidance_scale = kwargs.get("min_guidance_scale", None) or self.guidance_scale
+        self.inc_guidance_scale = self.guidance_scale - self.min_guidance_scale
+
+    @property
+    def extra_pass(self) -> bool:
+        """Return if an additional (third) guidance pass is required."""
+        return self.image_guidance_scale + self.spatiotemporal_guidance_scale > 0
+
+    def clone(self):
+        """Return a deepcopy of current guidance scaler."""
+        return GuidanceScaler(**self.__dict__)
+
+    def decay_guidance_scale(self, decay=0):
+        """Scale guidance scale according to decay."""
+        self.guidance_scale = self.inc_guidance_scale * decay + self.min_guidance_scale
+
+    def expand(self, x: torch.Tensor, padding: torch.Tensor = None) -> torch.Tensor:
+        """Expand input tensor for guidance passes."""
+        x = torch.stack([x] * (3 if self.extra_pass else 2)) if self.guidance_scale > 1 else x
+        x.__setitem__(1, padding) if self.image_guidance_scale and padding is not None else None
+        return x.flatten(0, 1) if self.guidance_scale > 1 else x
+
+    def expand_text(self, c: torch.Tensor) -> torch.Tensor:
+        """Expand text embedding tensor for guidance passes."""
+        c = list(c.chunk(2)) if self.extra_pass else c
+        c.append(c[1]) if self.image_guidance_scale else None  # Null, Null
+        c.append(c[0]) if self.spatiotemporal_guidance_scale else None  # Null, Text
+        return torch.cat(c) if self.extra_pass else c
+
+    def maybe_disable(self, timestep, *args):
+        """Disable all guidance passes if matching truncation threshold."""
+        if self.guidance_scale > 1 and self.guidance_trunc:
+            if float(timestep) < self.guidance_trunc:
+                self.guidance_scale = 1
+                return [_.chunk(3 if self.extra_pass else 2)[0] for _ in args]
+        return args
+
+    def renorm(self, x, cond):
+        """Apply guidance renormalization to input logits."""
+        if self.guidance_renorm >= 1:
+            return x
+        args = {"dim": tuple(range(1, len(x.shape))), "keepdim": True}
+        return x.mul_(cond.norm(**args).div_(x.norm(**args)).clamp(self.guidance_renorm, 1))
+
+    def scale(self, x: torch.Tensor) -> torch.Tensor:
+        """Apply guidance passes to input logits."""
+        if self.guidance_scale <= 1:
+            return x
+        if self.image_guidance_scale:
+            cond, uncond, imgcond = x.chunk(3)
+            x = self.renorm(uncond.add(cond.sub(imgcond).mul_(self.guidance_scale)), cond)
+            return x.add_(imgcond.sub_(uncond).mul_(self.image_guidance_scale))
+        if self.spatiotemporal_guidance_scale:
+            cond, uncond, perturb = x.chunk(3)
+            x = self.renorm(uncond.add_(cond.sub(uncond).mul_(self.guidance_scale)), cond)
+            return x.add_(cond.sub_(perturb).mul_(self.spatiotemporal_guidance_scale))
+        cond, uncond = x.chunk(2)
+        return self.renorm(uncond.add_(cond.sub(uncond).mul_(self.guidance_scale)), cond)
diff --git a/URSA/diffnext/models/normalization.py b/URSA/diffnext/models/normalization.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9f41b67236a39440923dcb6b38f95b6216ace01
--- /dev/null
+++ b/URSA/diffnext/models/normalization.py
@@ -0,0 +1,62 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Normalization Layers."""
+
+from typing import Tuple
+
+import torch
+from torch import nn
+
+
+class AdaLayerNormZero(nn.Module):
+    """Adaptive LayerNorm with residual stats."""
+
+    def __init__(self, dim, rank=None, num_stats=2, eps=1e-6):
+        super(AdaLayerNormZero, self).__init__()
+        self.lora = nn.Linear(dim, rank, bias=False) if rank else nn.Identity()
+        self.proj = nn.Linear(rank if rank else dim, num_stats * dim)
+        self.norm = nn.LayerNorm(dim, eps, elementwise_affine=False) if eps else nn.Identity()
+        self.activation, self.num_stats = nn.SiLU(), num_stats
+
+    def forward(self, x, z) -> Tuple[torch.Tensor, Tuple[torch.Tensor]]:
+        stats = self.proj(self.lora(self.activation(z))).chunk(self.num_stats, dim=-1)
+        return self.norm(x).mul(1 + stats[0]).add_(stats[1]), stats[2:]
+
+
+class AdaLayerNorm(AdaLayerNormZero):
+    """Adaptive LayerNorm."""
+
+    def __init__(self, dim, rank=None, eps=1e-6):
+        super(AdaLayerNorm, self).__init__(dim, rank, num_stats=2, eps=eps)
+
+    def forward(self, x, z) -> torch.Tensor:
+        return super().forward(x, z)[0]
+
+
+class AdaLayerNormSingle(nn.Module):
+    """Adaptive LayerNorm with shared residual stats."""
+
+    def __init__(self, dim, num_stats=2, eps=1e-6):
+        super(AdaLayerNormSingle, self).__init__()
+        self.bias = nn.Parameter(torch.randn(num_stats, dim) / dim**0.5)
+        self.norm = nn.LayerNorm(dim, eps, elementwise_affine=False) if eps else nn.Identity()
+        self.num_stats = num_stats
+
+    def forward(self, x, z) -> Tuple[torch.Tensor, Tuple[torch.Tensor]]:
+        axis = -2 if z.size(-1) == self.bias.size(-1) else -1
+        bias = self.bias.flatten(-1 if z.size(-1) == self.bias.size(-1) else 0)
+        stats = z.add(bias).chunk(self.num_stats, dim=axis)
+        return self.norm(x).mul(1 + stats[0]).add_(stats[1]), stats[2:]
diff --git a/URSA/diffnext/models/text_encoders/__init__.py b/URSA/diffnext/models/text_encoders/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..585d9a793b7dccefe60646e551d2b16c70749e37
--- /dev/null
+++ b/URSA/diffnext/models/text_encoders/__init__.py
@@ -0,0 +1,16 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Text encoders."""
diff --git a/URSA/diffnext/models/text_encoders/phi.py b/URSA/diffnext/models/text_encoders/phi.py
new file mode 100644
index 0000000000000000000000000000000000000000..083e6db6d9bfe417bfb7d4ebf6c38205388704b7
--- /dev/null
+++ b/URSA/diffnext/models/text_encoders/phi.py
@@ -0,0 +1,271 @@
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Simple implementation of Phi model."""
+
+import torch
+import torch.utils.checkpoint
+from torch import nn
+
+from transformers.activations import ACT2FN
+from transformers.generation import GenerationMixin
+from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
+from transformers.modeling_utils import PreTrainedModel
+from transformers.models.phi.configuration_phi import PhiConfig
+
+from diffnext.models.flash_attention import apply_rotary_emb
+
+
+def maybe_apply_ckpt(function, x, enable=False) -> torch.Tensor:
+    """Apply gradient checkpointing if possible."""
+    if enable and (x[0] if isinstance(x, (tuple, list)) else x).requires_grad:
+        return torch.utils.checkpoint.checkpoint(function, x, use_reentrant=False)
+    return function(x)
+
+
+class PhiRotaryEmbedding(nn.Module):
+    """Rotary embedding layer."""
+
+    class PEFunc(object):
+        """Apply RoPE weight to Q/K tensor."""
+
+        def __init__(self, weight):
+            self.cos, self.sin = weight
+
+        def __call__(self, x: torch.Tensor) -> torch.Tensor:
+            self.cos, self.sin = self.cos.to(x), self.sin.to(x)
+            return apply_rotary_emb(x, self.cos, self.sin, inplace=True)
+
+    @staticmethod
+    def from_config(config):
+        head_dim = config.hidden_size // config.num_attention_heads
+        rotary_dim = int(config.partial_rotary_factor * head_dim)
+        return PhiRotaryEmbedding(rotary_dim, config.max_position_embeddings, config.rope_theta)
+
+    def __init__(self, dim, max_position_embeddings=2048, base=10000):
+        super().__init__()
+        self.dim, self.base = dim, base
+        self.max_position_embeddings = max_position_embeddings
+        freq = self.base ** (torch.arange(0, self.dim, 2, dtype=torch.int64).float() / self.dim)
+        self.register_buffer("inv_freq", freq.reciprocal_(), persistent=False)
+        self.set_cos_sin_cache(max_position_embeddings, dtype=torch.get_default_dtype())
+
+    def set_cos_sin_cache(self, seqlen, dtype):
+        self.max_seqlen_cached, device = seqlen, self.inv_freq.device
+        t = torch.arange(self.max_seqlen_cached, device=device, dtype=torch.int64)
+        freq = torch.outer(t.float(), self.inv_freq.float())
+        emb = torch.cat((freq, freq), dim=-1)
+        self.register_buffer("cos", emb.cos().to(dtype), persistent=False)
+        self.register_buffer("sin", emb.sin().to(dtype), persistent=False)
+
+    def get_func(self, pos=0, seqlen=1) -> PEFunc:
+        return self.PEFunc(_[pos : pos + seqlen].chunk(2, -1)[0] for _ in (self.cos, self.sin))
+
+
+class PhiMLP(nn.Module):
+    """Phi MLP."""
+
+    def __init__(self, config: PhiConfig):
+        super().__init__()
+        self.gradient_checkpointing = False
+        self.activation = ACT2FN[config.hidden_act]
+        self.config, self.hidden_size = config, config.hidden_size
+        self.fc1 = nn.Linear(config.hidden_size, config.intermediate_size)
+        self.fc2 = nn.Linear(config.intermediate_size, config.hidden_size)
+
+    def forward(self, x) -> torch.Tensor:
+        return self.fc2(self.activation(self.fc1(x)))
+
+
+class PhiAttention(nn.Module):
+    """Phi attention."""
+
+    def __init__(self, config: PhiConfig, layer_idx=None):
+        super().__init__()
+        self.layer_idx, hidden_size = layer_idx, config.hidden_size
+        self.config, self.is_causal, self.gradient_checkpointing = config, True, False
+        self.num_key_value_groups = config.num_attention_heads // config.num_key_value_heads
+        self.head_dim = config.hidden_size // config.num_attention_heads
+        self.q_proj = nn.Linear(hidden_size, config.num_attention_heads * self.head_dim)
+        self.k_proj = nn.Linear(hidden_size, config.num_key_value_heads * self.head_dim)
+        self.v_proj = nn.Linear(hidden_size, config.num_key_value_heads * self.head_dim)
+        self.dense = nn.Linear(config.num_attention_heads * self.head_dim, config.hidden_size)
+        self.attn_mask = self.past_key_value = self.pe_func = self.flex_attn = None
+
+    def forward_qkv(self, x) -> torch.Tensor:
+        x = x[1](x[0]) if isinstance(x, (tuple, list)) else x  # PreNorm.
+        q, k, v = [m(x) for m in (self.q_proj, self.k_proj, self.v_proj)]
+        return [_.unflatten(-1, (-1, self.head_dim)) for _ in (q, k, v)]
+
+    def repeat_kv(self, x) -> torch.Tensor:
+        return x.unsqueeze(2).expand(-1, -1, self.num_key_value_groups, -1, -1).flatten(1, 2)
+
+
+class PhiSdpaAttention(PhiAttention):
+    """Phi SDPA attention."""
+
+    def forward(self, x) -> torch.Tensor:
+        q, k, v = maybe_apply_ckpt(self.forward_qkv, x, self.gradient_checkpointing)
+        q, k = [self.pe_func(_) for _ in (q, k)]
+        q, k, v = [_.transpose(1, 2) for _ in (q, k, v)]
+        if self.past_key_value is not None and getattr(self.past_key_value, "is_frozen", False):
+            k, v = [torch.cat(_, -2) for _ in zip(self.past_key_value[self.layer_idx], (k, v))]
+        elif self.past_key_value is not None:  # Fallback to legacy NTP caching.
+            k, v = self.past_key_value.update(k, v, self.layer_idx)
+        self.past_key_value = None  # Release cache reference.
+        if self.flex_attn and self.flex_attn.offsets:
+            return self.dense(self.flex_attn(q, k, v).transpose(1, 2).flatten(2))
+        is_causal = self.is_causal and self.attn_mask is None and x.size(1) > 1
+        sdpa_args = {"is_causal": is_causal, "enable_gqa": True}
+        o = nn.functional.scaled_dot_product_attention(q, k, v, self.attn_mask, **sdpa_args)
+        return self.dense(o.transpose(1, 2).flatten(2))
+
+
+class PhiDecoderLayer(nn.Module):
+    """Phi decoder layer."""
+
+    def __init__(self, config: PhiConfig, layer_idx: int):
+        super().__init__()
+        self.self_attn = PhiSdpaAttention(config, layer_idx)
+        self.mlp, self.gradient_checkpointing = PhiMLP(config), False
+        self.input_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.dropout = nn.Dropout(config.resid_pdrop, inplace=True)
+        self.mlp_checkpointing = False
+
+    def forward(self, x) -> torch.Tensor:
+        shortcut, x = x, self.input_layernorm(x)
+        x = self.self_attn(x).add_(maybe_apply_ckpt(self.mlp, x, self.mlp.gradient_checkpointing))
+        return x.add_(shortcut)
+
+
+class PhiPreTrainedModel(PreTrainedModel):
+    """Phi pre-trained model."""
+
+    config_class = PhiConfig
+    base_model_prefix = "model"
+    supports_gradient_checkpointing = True
+    _no_split_modules = ["PhiDecoderLayer"]
+    _skip_keys_device_placement = "past_key_values"
+    _supports_flash_attn_2 = True
+    _supports_sdpa = True
+    _supports_cache_class = True
+
+    def _init_weights(self, module):
+        std = self.config.initializer_range
+        if isinstance(module, nn.Linear):
+            module.weight.data.normal_(mean=0.0, std=std)
+            if module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.Embedding):
+            module.weight.data.normal_(mean=0.0, std=std)
+            if module.padding_idx is not None:
+                module.weight.data[module.padding_idx].zero_()
+
+
+class PhiModel(PhiPreTrainedModel):
+    """Phi transformer model."""
+
+    def __init__(self, config: PhiConfig):
+        super().__init__(config)
+        self.padding_idx = config.pad_token_id
+        self.vocab_size = config.vocab_size
+        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
+        self.layers = [PhiDecoderLayer(config, i) for i in range(config.num_hidden_layers)]
+        self.layers = nn.ModuleList(self.layers)
+        self.final_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.rotary_emb, _ = PhiRotaryEmbedding.from_config(config), self.post_init()
+
+    def forward(
+        self,
+        input_ids: torch.Tensor = None,
+        attention_mask: torch.Tensor = None,
+        inputs_embeds: torch.Tensor = None,
+        past_key_values: torch.Tensor = None,
+        **kwargs,
+    ) -> BaseModelOutputWithPast:
+        x = inputs_embeds if input_ids is None else self.embed_tokens(input_ids)
+        pe_pos = kwargs.get("rope_pos", past_key_values.get_seq_length() if past_key_values else 0)
+        pe_embedder = self.flex_rope if isinstance(pe_pos, torch.Tensor) else self.rotary_emb
+        pe_func = pe_embedder.get_func(pe_pos, x.size(1))
+        for layer in self.layers:
+            layer.self_attn.pe_func = pe_func
+            layer.self_attn.attn_mask = attention_mask
+            layer.self_attn.past_key_value = past_key_values
+            x = maybe_apply_ckpt(layer.__call__, x, layer.gradient_checkpointing)
+        x = self.final_layernorm(x)
+        return BaseModelOutputWithPast(last_hidden_state=x, past_key_values=past_key_values)
+
+
+class PhiEncoderModel(PhiPreTrainedModel):
+    """Phi encoder model."""
+
+    def __init__(self, config):
+        super().__init__(config)
+        self.model = PhiModel(config)
+        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
+        self.vocab_size, _ = config.vocab_size, self.post_init()
+
+    def forward(self, input_ids, attention_mask=None, **kwargs) -> BaseModelOutputWithPast:
+        return self.model(input_ids, attention_mask, **kwargs)
+
+
+class PhiForCausalLM(PhiPreTrainedModel, GenerationMixin):
+    """Phi causal language model."""
+
+    def __init__(self, config):
+        super().__init__(config)
+        self.model = PhiModel(config)
+        self.vocab_size = config.vocab_size
+        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
+        self.lm_shift, _ = 0, self.post_init()
+
+    def get_input_embeddings(self) -> nn.Embedding:
+        return self.model.embed_tokens
+
+    def set_input_embeddings(self, value):
+        self.model.embed_tokens = value
+
+    def get_output_embeddings(self) -> nn.Linear:
+        return self.lm_head
+
+    def set_output_embeddings(self, new_embeddings):
+        self.lm_head = new_embeddings
+
+    def set_decoder(self, decoder):
+        self.model = decoder
+
+    def get_decoder(self) -> PhiModel:
+        return self.model
+
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: torch.Tensor = None,
+        inputs_embeds: torch.Tensor = None,
+        logits_to_keep=None,
+        **kwargs,
+    ) -> CausalLMOutputWithPast:
+        outputs = self.model(input_ids, attention_mask, inputs_embeds, **kwargs)
+        keep = slice(-logits_to_keep, None) if isinstance(logits_to_keep, int) else logits_to_keep
+        head_w = self.lm_head.weight[self.lm_shift :] if self.lm_shift else self.lm_head.weight
+        logits = nn.functional.linear(outputs[0] if keep is None else outputs[0][:, keep], head_w)
+        return CausalLMOutputWithPast(logits=logits, past_key_values=outputs.past_key_values)
+
+    def prepare_inputs_for_generation(self, input_ids, inputs_embeds=None, **kwargs):
+        past_key_values, _ = kwargs.get("past_key_values", None), kwargs.pop("attention_mask", None)
+        past_pos = past_key_values.get_seq_length() if past_key_values else 0
+        inputs = {"input_ids": input_ids[:, past_pos:] if past_pos else input_ids, **kwargs}
+        if inputs_embeds is not None and not past_pos:
+            inputs["inputs_embeds"] = inputs_embeds
+        return inputs
diff --git a/URSA/diffnext/models/text_encoders/qwen3.py b/URSA/diffnext/models/text_encoders/qwen3.py
new file mode 100644
index 0000000000000000000000000000000000000000..23a7b832730225880c4c71a88e0d5c224fcafa37
--- /dev/null
+++ b/URSA/diffnext/models/text_encoders/qwen3.py
@@ -0,0 +1,281 @@
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Simple implementation of Qwen3 model."""
+
+import torch
+import torch.utils.checkpoint
+from torch import nn
+
+from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
+from transformers.modeling_utils import PreTrainedModel
+from transformers.generation import GenerationMixin
+from transformers.models.qwen3.configuration_qwen3 import Qwen3Config
+
+from diffnext.models.flash_attention import apply_rotary_emb, swiglu
+from diffnext.models.flash_attention import RMSNorm as Qwen3RMSNorm
+
+
+def maybe_apply_ckpt(function, x, enable=False) -> torch.Tensor:
+    """Apply gradient checkpointing if possible."""
+    if enable and (x[0] if isinstance(x, (tuple, list)) else x).requires_grad:
+        return torch.utils.checkpoint.checkpoint(function, x, use_reentrant=False)
+    return function(x)
+
+
+class Qwen3RotaryEmbedding(nn.Module):
+    """Rotary embedding layer."""
+
+    class PEFunc(object):
+        """Apply RoPE weight to Q/K tensor."""
+
+        def __init__(self, weight):
+            self.cos, self.sin = weight
+
+        def __call__(self, x: torch.Tensor) -> torch.Tensor:
+            self.cos, self.sin = self.cos.to(x), self.sin.to(x)
+            return apply_rotary_emb(x, self.cos, self.sin, inplace=True)
+
+    @staticmethod
+    def from_config(config):
+        head_dim = getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)
+
+        rope_theta = getattr(config, "rope_theta", None)
+
+        # HF config 可能不暴露 rope_theta 属性，但会出现在 to_dict() 里
+        if rope_theta is None and hasattr(config, "to_dict"):
+            rope_theta = config.to_dict().get("rope_theta", None)
+
+        # 常见老字段兜底（可选）
+        if rope_theta is None:
+            rope_theta = getattr(config, "rotary_emb_base", None)
+        if rope_theta is None and hasattr(config, "to_dict"):
+            rope_theta = config.to_dict().get("rotary_emb_base", None)
+
+        if rope_theta is None:
+            rope_theta = 10000.0  # 最终兜底
+
+        return Qwen3RotaryEmbedding(head_dim, config.max_position_embeddings, float(rope_theta))
+
+    def __init__(self, dim, max_position_embeddings=2048, base=10000):
+        super().__init__()
+        self.dim, self.base = dim, base
+        self.max_position_embeddings = max_position_embeddings
+        freq = self.base ** (torch.arange(0, self.dim, 2, dtype=torch.int64).float() / self.dim)
+        self.register_buffer("inv_freq", freq.reciprocal_(), persistent=False)
+        self.set_cos_sin_cache(max_position_embeddings, dtype=torch.get_default_dtype())
+
+    def set_cos_sin_cache(self, seqlen, dtype):
+        self.max_seqlen_cached, device = seqlen, self.inv_freq.device
+        t = torch.arange(self.max_seqlen_cached, device=device, dtype=torch.int64)
+        freq = torch.outer(t.float(), self.inv_freq.float())
+        emb = torch.cat((freq, freq), dim=-1)
+        self.register_buffer("cos", emb.cos().to(dtype), persistent=False)
+        self.register_buffer("sin", emb.sin().to(dtype), persistent=False)
+
+    def get_func(self, pos=0, seqlen=1) -> PEFunc:
+        return self.PEFunc(_[pos : pos + seqlen].chunk(2, -1)[0] for _ in (self.cos, self.sin))
+
+
+class Qwen3MLP(nn.Module):
+    """Gated MLP."""
+
+    def __init__(self, config):
+        super().__init__()
+        self.gradient_checkpointing = False
+        self.config, self.hidden_size = config, config.hidden_size
+        self.gate_proj = nn.Linear(self.hidden_size, config.intermediate_size, bias=False)
+        self.up_proj = nn.Linear(self.hidden_size, config.intermediate_size, bias=False)
+        self.down_proj = nn.Linear(config.intermediate_size, self.hidden_size, bias=False)
+
+    def forward(self, x) -> torch.Tensor:
+        return self.down_proj(swiglu(self.gate_proj(x), self.up_proj(x)))
+
+
+class Qwen3Attention(nn.Module):
+    """Multi-headed attention."""
+
+    def __init__(self, config: Qwen3Config, layer_idx=None):
+        super().__init__()
+        self.layer_idx, hidden_size = layer_idx, config.hidden_size
+        self.config, self.is_causal, self.gradient_checkpointing = config, True, False
+        self.num_key_value_groups = config.num_attention_heads // config.num_key_value_heads
+        self.head_dim = getattr(config, "head_dim", hidden_size // config.num_attention_heads)
+        self.q_proj = nn.Linear(hidden_size, config.num_attention_heads * self.head_dim, bias=False)
+        self.k_proj = nn.Linear(hidden_size, config.num_key_value_heads * self.head_dim, bias=False)
+        self.v_proj = nn.Linear(hidden_size, config.num_key_value_heads * self.head_dim, bias=False)
+        self.o_proj = nn.Linear(config.num_attention_heads * self.head_dim, hidden_size, bias=False)
+        self.q_norm = Qwen3RMSNorm(self.head_dim, eps=config.rms_norm_eps)
+        self.k_norm = Qwen3RMSNorm(self.head_dim, eps=config.rms_norm_eps)
+        self.attn_mask = self.past_key_value = self.pe_func = self.flex_attn = None
+
+    def forward_qkv(self, x) -> torch.Tensor:
+        x = x[1](x[0]) if isinstance(x, (tuple, list)) else x  # PreNorm.
+        q, k, v = [m(x) for m in (self.q_proj, self.k_proj, self.v_proj)]
+        q, k, v = [_.unflatten(-1, (-1, self.head_dim)) for _ in (q, k, v)]
+        return [m(_) for m, _ in zip((self.q_norm, self.k_norm), (q, k))] + [v]
+
+    def repeat_kv(self, x) -> torch.Tensor:
+        return x.unsqueeze(2).expand(-1, -1, self.num_key_value_groups, -1, -1).flatten(1, 2)
+
+
+class Qwen3SdpaAttention(Qwen3Attention):
+    """Qwen3 SDPA attention."""
+
+    def forward(self, x) -> torch.Tensor:
+        q, k, v = maybe_apply_ckpt(self.forward_qkv, x, self.gradient_checkpointing)
+        q, k = [self.pe_func(_) for _ in (q, k)]
+        q, k, v = [_.transpose(1, 2) for _ in (q, k, v)]
+        if self.past_key_value is not None and getattr(self.past_key_value, "is_frozen", False):
+            k, v = [torch.cat(_, -2) for _ in zip(self.past_key_value[self.layer_idx], (k, v))]
+        elif self.past_key_value is not None:  # Fallback to legacy NTP caching.
+            k, v = self.past_key_value.update(k, v, self.layer_idx)
+        self.past_key_value = None  # Release cache reference.
+        if self.flex_attn and self.flex_attn.offsets:
+            return self.o_proj(self.flex_attn(q, k, v).transpose(1, 2).flatten(2))
+        is_causal = self.is_causal and self.attn_mask is None and x.size(1) > 1
+        sdpa_args = {"is_causal": is_causal, "enable_gqa": True}
+        o = nn.functional.scaled_dot_product_attention(q, k, v, self.attn_mask, **sdpa_args)
+        return self.o_proj(o.transpose(1, 2).flatten(2))
+
+
+class Qwen3DecoderLayer(nn.Module):
+    """Qwen3 decoder layer."""
+
+    def __init__(self, config: Qwen3Config, layer_idx: int):
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        self.self_attn = Qwen3SdpaAttention(config, layer_idx)
+        self.mlp, self.gradient_checkpointing = Qwen3MLP(config), False
+        self.input_layernorm = Qwen3RMSNorm(config.hidden_size, config.rms_norm_eps)
+        self.post_attention_layernorm = Qwen3RMSNorm(config.hidden_size, config.rms_norm_eps)
+
+    def forward_mlp(self, x) -> torch.Tensor:
+        return self.mlp(self.post_attention_layernorm(x))
+
+    def forward(self, x) -> torch.Tensor:
+        x = self.self_attn((x, self.input_layernorm)).add_(x)
+        return maybe_apply_ckpt(self.forward_mlp, x, self.mlp.gradient_checkpointing).add_(x)
+
+
+class Qwen3PreTrainedModel(PreTrainedModel):
+    """Qwen3 pre-trained model."""
+
+    config_class = Qwen3Config
+    base_model_prefix = "model"
+    supports_gradient_checkpointing = True
+    _no_split_modules = ["Qwen3DecoderLayer"]
+    _skip_keys_device_placement = "past_key_values"
+    _supports_flash_attn_2 = True
+    _supports_flex_attn = True
+    _supports_sdpa = True
+    _supports_cache_class = True
+
+    def _init_weights(self, module):
+        std = self.config.initializer_range
+        if isinstance(module, nn.Linear):
+            module.weight.data.normal_(mean=0.0, std=std)
+            if module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.Embedding):
+            module.weight.data.normal_(mean=0.0, std=std)
+            if module.padding_idx is not None:
+                module.weight.data[module.padding_idx].zero_()
+
+
+class Qwen3Model(Qwen3PreTrainedModel):
+    """Transformer decoder."""
+
+    def __init__(self, config: Qwen3Config):
+        super().__init__(config)
+        self.vocab_size = config.vocab_size
+        self.padding_idx = config.pad_token_id
+        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
+        self.layers = [Qwen3DecoderLayer(config, i) for i in range(config.num_hidden_layers)]
+        self.layers = nn.ModuleList(self.layers)
+        self.norm = Qwen3RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.rotary_emb, _ = Qwen3RotaryEmbedding.from_config(config), self.post_init()
+
+    def forward(
+        self,
+        input_ids: torch.Tensor = None,
+        attention_mask: torch.Tensor = None,
+        inputs_embeds: torch.Tensor = None,
+        past_key_values: torch.Tensor = None,
+        **kwargs,
+    ) -> BaseModelOutputWithPast:
+        x = inputs_embeds if input_ids is None else self.embed_tokens(input_ids)
+        pe_pos = kwargs.get("rope_pos", past_key_values.get_seq_length() if past_key_values else 0)
+        pe_embedder = self.flex_rope if isinstance(pe_pos, torch.Tensor) else self.rotary_emb
+        pe_func = pe_embedder.get_func(pe_pos, x.size(1))
+        for layer in self.layers:
+            layer.self_attn.pe_func = pe_func
+            layer.self_attn.attn_mask = attention_mask
+            layer.self_attn.past_key_value = past_key_values
+            x = maybe_apply_ckpt(layer.__call__, x, layer.gradient_checkpointing)
+        x = self.norm(x)
+        return BaseModelOutputWithPast(last_hidden_state=x, past_key_values=past_key_values)
+
+
+class Qwen3ForCausalLM(Qwen3PreTrainedModel, GenerationMixin):
+    """CausalLM decoder."""
+
+    _tied_weights_keys = ["lm_head.weight"]
+
+    def __init__(self, config):
+        super().__init__(config)
+        self.model = Qwen3Model(config)
+        self.vocab_size = config.vocab_size
+        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
+        self.lm_shift, _ = 0, self.post_init()
+
+    def get_input_embeddings(self):
+        return self.model.embed_tokens
+
+    def set_input_embeddings(self, value):
+        self.model.embed_tokens = value
+
+    def get_output_embeddings(self):
+        return self.lm_head
+
+    def set_output_embeddings(self, new_embeddings):
+        self.lm_head = new_embeddings
+
+    def set_decoder(self, decoder):
+        self.model = decoder
+
+    def get_decoder(self):
+        return self.model
+
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: torch.Tensor = None,
+        inputs_embeds: torch.Tensor = None,
+        logits_to_keep=None,
+        **kwargs,
+    ) -> CausalLMOutputWithPast:
+        outputs = self.model(input_ids, attention_mask, inputs_embeds, **kwargs)
+        keep = slice(-logits_to_keep, None) if isinstance(logits_to_keep, int) else logits_to_keep
+        head_w = self.lm_head.weight[self.lm_shift :] if self.lm_shift else self.lm_head.weight
+        logits = nn.functional.linear(outputs[0] if keep is None else outputs[0][:, keep], head_w)
+        return CausalLMOutputWithPast(logits=logits, past_key_values=outputs.past_key_values)
+
+    def prepare_inputs_for_generation(self, input_ids, inputs_embeds=None, **kwargs):
+        past_key_values, _ = kwargs.get("past_key_values", None), kwargs.pop("attention_mask", None)
+        past_pos = past_key_values.get_seq_length() if past_key_values else 0
+        inputs = {"input_ids": input_ids[:, past_pos:] if past_pos else input_ids, **kwargs}
+        if inputs_embeds is not None and not past_pos:
+            inputs["inputs_embeds"] = inputs_embeds
+        return inputs
diff --git a/URSA/diffnext/models/transformers/__init__.py b/URSA/diffnext/models/transformers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..63fa229f17107099d2d962a017c5edb785a88c67
--- /dev/null
+++ b/URSA/diffnext/models/transformers/__init__.py
@@ -0,0 +1,16 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Transformer models."""
diff --git a/URSA/diffnext/models/transformers/transformer_nova.py b/URSA/diffnext/models/transformers/transformer_nova.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa5182932c435f9a78cbc6af9a000e1220eb3150
--- /dev/null
+++ b/URSA/diffnext/models/transformers/transformer_nova.py
@@ -0,0 +1,102 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""3D transformer model for NOVA."""
+
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.models.modeling_utils import ModelMixin
+
+from diffnext.models.diffusion_mlp import DiffusionMLP
+from diffnext.models.embeddings import PosEmbed, VideoPosEmbed, RotaryEmbed3D
+from diffnext.models.embeddings import MaskEmbed, MotionEmbed, TextEmbed, LabelEmbed
+from diffnext.models.normalization import AdaLayerNorm
+from diffnext.models.transformers.transformer_nova_base import Transformer3DModel
+from diffnext.models.vision_transformer import VisionTransformer
+from diffnext.utils.registry import Registry
+
+VIDEO_ENCODERS = Registry("video_encoders")
+IMAGE_ENCODERS = Registry("image_encoders")
+IMAGE_DECODERS = Registry("image_decoders")
+
+
+@VIDEO_ENCODERS.register("vit_d16w768", depth=16, embed_dim=768, num_heads=12)
+@VIDEO_ENCODERS.register("vit_d16w1024", depth=16, embed_dim=1024, num_heads=16)
+@VIDEO_ENCODERS.register("vit_d16w1536", depth=16, embed_dim=1536, num_heads=16)
+def video_encoder(depth, embed_dim, num_heads, patch_size, image_size, image_dim):
+    return VisionTransformer(**locals())
+
+
+@IMAGE_ENCODERS.register("vit_d32w768", depth=32, embed_dim=768, num_heads=12)
+@IMAGE_ENCODERS.register("vit_d32w1024", depth=32, embed_dim=1024, num_heads=16)
+@IMAGE_ENCODERS.register("vit_d32w1536", depth=32, embed_dim=1536, num_heads=16)
+def image_encoder(depth, embed_dim, num_heads, patch_size, image_size, image_dim):
+    return VisionTransformer(**locals())
+
+
+@IMAGE_DECODERS.register("mlp_d3w1280", depth=3, embed_dim=1280)
+@IMAGE_DECODERS.register("mlp_d6w768", depth=6, embed_dim=768)
+@IMAGE_DECODERS.register("mlp_d6w1024", depth=6, embed_dim=1024)
+@IMAGE_DECODERS.register("mlp_d6w1536", depth=6, embed_dim=1536)
+def image_decoder(depth, embed_dim, patch_size, image_dim, cond_dim):
+    return DiffusionMLP(**locals())
+
+
+class NOVATransformer3DModel(Transformer3DModel, ModelMixin, ConfigMixin):
+    """3D transformer model for NOVA."""
+
+    @register_to_config
+    def __init__(
+        self,
+        image_dim=None,
+        image_size=None,
+        image_stride=None,
+        text_token_dim=None,
+        text_token_len=None,
+        image_base_size=None,
+        video_base_size=None,
+        video_mixer_rank=None,
+        rotary_pos_embed=False,
+        arch=("", "", ""),
+    ):
+        image_size = (image_size,) * 2 if isinstance(image_size, int) else image_size
+        image_size = tuple(v // image_stride for v in image_size)
+        image_args = {"image_dim": image_dim, "patch_size": 15 // image_stride + 1}
+        video_args = {**image_args, "patch_size": image_args["patch_size"] * 2}
+        video_encoder = VIDEO_ENCODERS.get(arch[0])(image_size=image_size, **video_args)
+        image_encoder = IMAGE_ENCODERS.get(arch[1])(image_size=image_size, **image_args)
+        image_decoder = IMAGE_DECODERS.get(arch[2])(cond_dim=image_encoder.embed_dim, **image_args)
+        if rotary_pos_embed:
+            video_pos_embed = RotaryEmbed3D(video_encoder.rope.dim, video_base_size[1:])
+            image_pos_embed = RotaryEmbed3D(image_encoder.rope.dim, image_base_size)
+        else:
+            video_pos_embed = VideoPosEmbed(video_encoder.embed_dim, video_base_size)
+            image_encoder.pos_embed = PosEmbed(image_encoder.embed_dim, image_base_size)
+        image_pos_embed = image_pos_embed if rotary_pos_embed else None
+        if video_mixer_rank:
+            video_mixer_rank = max(video_mixer_rank, 0)  # Use vanilla AdaLN if ``rank`` < 0.
+            video_encoder.mixer = AdaLayerNorm(video_encoder.embed_dim, video_mixer_rank, eps=None)
+        if text_token_dim:
+            text_embed = TextEmbed(text_token_dim, image_encoder.embed_dim, text_token_len)
+        super(NOVATransformer3DModel, self).__init__(
+            video_encoder=video_encoder,
+            image_encoder=image_encoder,
+            image_decoder=image_decoder,
+            mask_embed=MaskEmbed(image_encoder.embed_dim),
+            text_embed=text_embed if text_token_dim else None,
+            label_embed=LabelEmbed(image_encoder.embed_dim) if not text_token_dim else None,
+            video_pos_embed=video_pos_embed,
+            image_pos_embed=image_pos_embed,
+            motion_embed=MotionEmbed(video_encoder.embed_dim) if video_base_size[0] > 1 else None,
+        )
diff --git a/URSA/diffnext/models/transformers/transformer_nova_base.py b/URSA/diffnext/models/transformers/transformer_nova_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..b842064737b4919ec3a4674f9f400b6173dfa26b
--- /dev/null
+++ b/URSA/diffnext/models/transformers/transformer_nova_base.py
@@ -0,0 +1,200 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Base 3D transformer model for NOVA."""
+
+from typing import Dict
+
+import torch
+from torch import nn
+from tqdm import tqdm
+
+from diffnext.models.guidance_scaler import GuidanceScaler
+
+
+class Transformer3DModel(nn.Module):
+    """Base 3D transformer model for NOVA."""
+
+    def __init__(
+        self,
+        video_encoder=None,
+        image_encoder=None,
+        image_decoder=None,
+        mask_embed=None,
+        text_embed=None,
+        label_embed=None,
+        video_pos_embed=None,
+        image_pos_embed=None,
+        motion_embed=None,
+        noise_scheduler=None,
+        sample_scheduler=None,
+    ):
+        super(Transformer3DModel, self).__init__()
+        self.video_encoder = video_encoder
+        self.image_encoder = image_encoder
+        self.image_decoder = image_decoder
+        self.mask_embed = mask_embed
+        self.text_embed = text_embed
+        self.label_embed = label_embed
+        self.video_pos_embed = video_pos_embed
+        self.image_pos_embed = image_pos_embed
+        self.motion_embed = motion_embed
+        self.noise_scheduler = noise_scheduler
+        self.sample_scheduler = sample_scheduler
+        self.pipeline_preprocess = lambda inputs: inputs
+        self.loss_repeat = 4
+
+    def progress_bar(self, iterable, enable=True):
+        """Return a tqdm progress bar."""
+        return tqdm(iterable) if enable else iterable
+
+    def preprocess(self, inputs: Dict):
+        """Preprocess model inputs."""
+        add_guidance = inputs.get("guidance_scale", 1) > 1
+        inputs["c"], dtype, device = inputs.get("c", []), self.dtype, self.device
+        if inputs.get("x", None) is None:
+            batch_size = inputs.get("batch_size", 1)
+            image_size = (self.image_encoder.image_dim,) + self.image_encoder.image_size
+            inputs["x"] = torch.empty(batch_size, *image_size, device=device, dtype=dtype)
+        if inputs.get("prompt", None) is not None and self.text_embed:
+            inputs["c"].append(self.text_embed(inputs.pop("prompt")))
+        if inputs.get("motion", None) is not None and self.motion_embed:
+            flow, fps = inputs.pop("motion", None), inputs.pop("fps", None)
+            flow, fps = [v + v if (add_guidance and v) else v for v in (flow, fps)]
+            inputs["c"].append(self.motion_embed(inputs["c"][-1], flow, fps))
+        inputs["c"] = torch.cat(inputs["c"], dim=1) if len(inputs["c"]) > 1 else inputs["c"][0]
+
+    def get_losses(self, z: torch.Tensor, x: torch.Tensor, video_shape=None) -> Dict:
+        """Return the training losses."""
+        z = z.repeat(self.loss_repeat, *((1,) * (z.dim() - 1)))
+        x = x.repeat(self.loss_repeat, *((1,) * (x.dim() - 1)))
+        x = self.image_encoder.patch_embed.patchify(x)
+        noise = torch.randn(x.shape, dtype=x.dtype, device=x.device)
+        timestep = self.noise_scheduler.sample_timesteps(z.shape[:2], device=z.device)
+        x_t = self.noise_scheduler.add_noise(x, noise, timestep)
+        x_t = self.image_encoder.patch_embed.unpatchify(x_t)
+        timestep = getattr(self.noise_scheduler, "timestep", timestep)
+        pred_type = getattr(self.noise_scheduler.config, "prediction_type", "flow")
+        model_pred = self.image_decoder(x_t, timestep, z)
+        model_target = noise.float() if pred_type == "epsilon" else noise.sub(x).float()
+        loss = nn.functional.mse_loss(model_pred.float(), model_target, reduction="none")
+        loss, weight = loss.mean(-1, True), self.mask_embed.mask.to(loss.dtype)
+        weight = weight.repeat(self.loss_repeat, *((1,) * (z.dim() - 1)))
+        loss = loss.mul_(weight).div_(weight.sum().add_(1e-5))
+        if video_shape is not None:
+            loss = loss.view((-1,) + video_shape).transpose(0, 1).sum((1, 2))
+            i2i = loss[1:].sum().mul_(video_shape[0] / (video_shape[0] - 1))
+            return {"loss_t2i": loss[0].mul(video_shape[0]), "loss_i2i": i2i}
+        return {"loss": loss.sum()}
+
+    @torch.no_grad()
+    def denoise(self, z, x, guidance_scaler, generator=None, pred_ids=None) -> torch.Tensor:
+        """Run diffusion denoising process."""
+        self.sample_scheduler._step_index = None  # Reset counter.
+        for t in self.sample_scheduler.timesteps:
+            z, pred_ids = guidance_scaler.maybe_disable(t, z, pred_ids)
+            timestep = torch.as_tensor(t, device=x.device).expand(z.shape[0])
+            model_pred = self.image_decoder(guidance_scaler.expand(x), timestep, z, pred_ids)
+            model_pred = guidance_scaler.scale(model_pred)
+            model_pred = self.image_encoder.patch_embed.unpatchify(model_pred)
+            x = self.sample_scheduler.step(model_pred, t, x, generator=generator).prev_sample
+        return self.image_encoder.patch_embed.patchify(x)
+
+    @torch.inference_mode()
+    def generate_frame(self, states: Dict, inputs: Dict):
+        """Generate a batch of frames."""
+        guidance_scaler = GuidanceScaler(**inputs)
+        generator = self.mask_embed.generator = inputs.get("generator", None)
+        all_num_preds = [_ for _ in inputs["num_preds"] if _ > 0]
+        c, x, self.mask_embed.mask = states["c"], states["x"].zero_(), None
+        pos = self.image_pos_embed.get_pos(1, c.size(0)) if self.image_pos_embed else None
+        for i, num_preds in enumerate(self.progress_bar(all_num_preds, inputs.get("tqdm2", False))):
+            guidance_scaler.decay_guidance_scale((i + 1) / len(all_num_preds))
+            z = self.mask_embed(self.image_encoder.patch_embed(x))
+            pred_mask, pred_ids = self.mask_embed.get_pred_mask(num_preds)
+            pred_ids = guidance_scaler.expand(pred_ids)
+            prev_ids = prev_ids if i else pred_ids.new_empty((pred_ids.size(0), 0, 1))
+            z = self.image_encoder(guidance_scaler.expand(z), c, prev_ids, pos=pos)
+            prev_ids = torch.cat([prev_ids, pred_ids], dim=1)
+            states["noise"].normal_(generator=generator)
+            sample = self.denoise(z, states["noise"], guidance_scaler.clone(), generator, pred_ids)
+            x.add_(self.image_encoder.patch_embed.unpatchify(sample.mul_(pred_mask)))
+
+    @torch.inference_mode()
+    def generate_video(self, inputs: Dict):
+        """Generate a batch of videos."""
+        guidance_scaler = GuidanceScaler(**inputs)
+        max_latent_length = inputs.get("max_latent_length", 1)
+        self.sample_scheduler.set_timesteps(inputs.get("num_diffusion_steps", 25))
+        states = {"x": inputs["x"], "noise": inputs["x"].clone()}
+        latents, self.mask_embed.pred_ids, time_pos = inputs.get("latents", []), None, []
+        if self.image_pos_embed:  # RoPE.
+            time_pos = self.video_pos_embed.get_pos(max_latent_length).chunk(max_latent_length, 1)
+        else:  # Absolute PE, which will be deprecated in the future.
+            time_embed = self.video_pos_embed.get_time_embed(max_latent_length)
+        inputs["c"] = guidance_scaler.expand_text(inputs["c"])
+        self.video_encoder.enable_kvcache(max_latent_length > 1)
+        for states["t"] in self.progress_bar(range(max_latent_length), inputs.get("tqdm1", True)):
+            pos = time_pos[states["t"]] if time_pos else None
+            c = self.video_encoder.patch_embed(states["x"])
+            c.__setitem__(slice(None), self.mask_embed.bos_token) if states["t"] == 0 else c
+            c = self.video_pos_embed(c.add_(time_embed[states["t"]])) if not time_pos else c
+            c = guidance_scaler.expand(c, padding=self.mask_embed.bos_token)
+            c = states["c"] = self.video_encoder(c, None if states["t"] else inputs["c"], pos=pos)
+            if not isinstance(self.video_encoder.mixer, torch.nn.Identity):
+                states["c"] = self.video_encoder.mixer(states["*"], c) if states["t"] else c
+                states["*"] = states["*"] if states["t"] else states["c"]
+            if states["t"] == 0 and latents:
+                states["x"].copy_(latents[-1])
+            else:
+                self.generate_frame(states, inputs)
+                latents.append(states["x"].clone())
+        self.video_encoder.enable_kvcache(False)
+
+    def train_video(self, inputs):
+        """Train a batch of videos."""
+        # 3D temporal autoregressive modeling (TAM).
+        inputs["x"].unsqueeze_(2) if inputs["x"].dim() == 4 else None
+        bs, latent_length = inputs["x"].size(0), inputs["x"].size(2)
+        c = self.video_encoder.patch_embed(inputs["x"][:, :, : latent_length - 1])
+        bov = self.mask_embed.bos_token.expand(bs, 1, c.size(-2), -1)
+        c, pos = self.video_pos_embed(torch.cat([bov, c], dim=1)), None
+        if self.image_pos_embed:
+            pos = self.video_pos_embed.get_pos(c.size(1), bs, self.video_encoder.patch_embed.hw)
+        attn_mask = self.mask_embed.get_attn_mask(c, inputs["c"]) if latent_length > 1 else None
+        [setattr(blk.attn, "attn_mask", attn_mask) for blk in self.video_encoder.blocks]
+        c = self.video_encoder(c.flatten(1, 2), inputs["c"], pos=pos)
+        if not isinstance(self.video_encoder.mixer, torch.nn.Identity) and latent_length > 1:
+            c = c.view(bs, latent_length, -1, c.size(-1)).split([1, latent_length - 1], 1)
+            c = torch.cat([c[0], self.video_encoder.mixer(*c)], 1)
+        # 2D masked autoregressive modeling (MAM).
+        x = inputs["x"][:, :, :latent_length].transpose(1, 2).flatten(0, 1)
+        z, bs = self.image_encoder.patch_embed(x), bs * latent_length
+        if self.image_pos_embed:
+            pos = self.image_pos_embed.get_pos(1, bs, self.image_encoder.patch_embed.hw)
+        z = self.image_encoder(self.mask_embed(z), c.reshape(bs, -1, c.size(-1)), pos=pos)
+        # 1D token-wise diffusion modeling (MLP).
+        video_shape = (latent_length, z.size(1)) if latent_length > 1 else None
+        return self.get_losses(z, x, video_shape=video_shape)
+
+    def forward(self, inputs):
+        """Define the computation performed at every call."""
+        self.pipeline_preprocess(inputs)
+        self.preprocess(inputs)
+        if self.training:
+            return self.train_video(inputs)
+        inputs["latents"] = inputs.pop("latents", [])
+        self.generate_video(inputs)
+        return {"x": torch.stack(inputs["latents"], dim=2)}
diff --git a/URSA/diffnext/models/transformers/transformer_ursa.py b/URSA/diffnext/models/transformers/transformer_ursa.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ebf29c86837bdf415b332a5e76e7308935e9fbc
--- /dev/null
+++ b/URSA/diffnext/models/transformers/transformer_ursa.py
@@ -0,0 +1,102 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""3D transformer model for URSA."""
+
+import torch
+from torch import nn
+
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.models.modeling_utils import ModelMixin
+from diffusers.models.modeling_outputs import Transformer2DModelOutput
+
+from diffnext.models.embeddings import FlexRotaryEmbedding
+from diffnext.models.flash_attention import cross_entropy_loss
+from diffnext.models.flex_attention import FlexAttentionCausal2D
+from diffnext.models.text_encoders.qwen3 import Qwen3Config, Qwen3Model
+
+
+class URSATransformer3DModel(ModelMixin, ConfigMixin):
+    """3D transformer model for URSA."""
+
+    @register_to_config
+    def __init__(
+        self,
+        hidden_size=2048,
+        intermediate_size=6144,
+        num_attention_heads=16,
+        num_key_value_heads=8,
+        num_hidden_layers=28,
+        max_window_layers=28,
+        rope_theta=1000000,
+        vocab_size=215669,
+        lm_vocab_size=151669,
+        lm_head_size=64000,
+        bov_token_id=151652,
+        attn_implementation=None,
+        **kwargs,
+    ):
+        super().__init__()
+        # self.model = Qwen3Model(Qwen3Config.from_dict(self._internal_dict))
+        qcfg = Qwen3Config.from_dict(self._internal_dict)
+        # inject rope_theta from the raw dict (ckpt has it, HF Qwen3Config drops it)
+        if not hasattr(qcfg, "rope_theta"):
+            if "rope_theta" in self._internal_dict:
+                qcfg.rope_theta = float(self._internal_dict["rope_theta"])
+            else:
+                qcfg.rope_theta = 10000.0
+        self.model = Qwen3Model(qcfg)
+        
+        self.model.flex_attn = self.flex_attn = FlexAttentionCausal2D()
+        self.model.flex_rope = self.flex_rope = FlexRotaryEmbedding.from_config(self.model.config)
+        [setattr(layer.self_attn, "is_causal", False) for layer in self.model.layers]
+        [setattr(layer.self_attn, "flex_attn", self.flex_attn) for layer in self.model.layers]
+        self.lm_head = nn.Linear(hidden_size, lm_head_size, bias=False)
+        self.pipeline_preprocess = lambda inputs: inputs  # Preprocess hook.
+        self.pipeline_postprocess = lambda *args, **kwargs: {}  # Postprocess hook.
+
+    def forward(
+        self,
+        input_ids,
+        inputs_embeds=None,
+        labels=None,
+        logits_to_keep=None,
+        lm_head_shift=0,
+        **kwargs,
+    ) -> Transformer2DModelOutput:
+        if self.training and isinstance(input_ids, dict):  # Prepare training args.
+            inputs, _ = input_ids, self.pipeline_preprocess(input_ids)
+            input_ids, labels, kwargs = inputs.pop("input_ids"), inputs["labels"], inputs
+
+        outputs = self.model(input_ids, inputs_embeds=inputs_embeds, **kwargs)
+        keep = slice(-logits_to_keep, None) if isinstance(logits_to_keep, int) else logits_to_keep
+        head_w = self.lm_head.weight[lm_head_shift:] if lm_head_shift else self.lm_head.weight
+        logits = nn.functional.linear(outputs[0] if keep is None else outputs[0][:, keep], head_w)
+
+        def flash_loss(logits, labels):
+            if cross_entropy_loss:
+                return cross_entropy_loss(logits.flatten(0, 1), labels, inplace_backward=True)[0]
+            return nn.functional.cross_entropy(logits.flatten(0, 1), labels, reduction="none")
+
+        if labels is not None and isinstance(labels, torch.Tensor):  # NTP loss.
+            lbls = torch.nn.functional.pad(labels[:, 1:], (0, 1), value=-100)
+            loss = flash_loss(logits.float(), lbls.flatten()).view(lbls.shape)
+            acc1, mask = logits.data.argmax(-1).eq(lbls), lbls.ne(-100)
+            loss, acc1 = loss.sum().div(mask.sum()), acc1[mask].float().mean()
+            return self.pipeline_postprocess(loss, acc1)
+        elif labels is not None and isinstance(labels, dict):  # Custom losses.
+            return self.pipeline_postprocess(inputs, logits)
+
+        return Transformer2DModelOutput(sample=logits)
diff --git a/URSA/diffnext/models/vision_transformer.py b/URSA/diffnext/models/vision_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..2aff695100b87181fe59b49b6f118aaf68a199a6
--- /dev/null
+++ b/URSA/diffnext/models/vision_transformer.py
@@ -0,0 +1,146 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Vision Transformer."""
+
+from typing import Tuple
+
+import torch
+from torch import nn
+from torch.utils.checkpoint import checkpoint as apply_ckpt
+
+from diffnext.models.embeddings import PatchEmbed, RotaryEmbed3D
+from diffnext.models.flex_attention import FlexAttentionCausal2D
+
+
+class MLP(nn.Module):
+    """Two layers MLP."""
+
+    def __init__(self, dim, mlp_ratio=4):
+        super(MLP, self).__init__()
+        self.fc1 = nn.Linear(dim, int(dim * mlp_ratio))
+        self.fc2 = nn.Linear(int(dim * mlp_ratio), dim)
+        self.activation = nn.GELU()
+
+    def forward(self, x) -> torch.Tensor:
+        return self.fc2(self.activation(self.fc1(x)))
+
+
+class Attention(nn.Module):
+    """Multihead attention."""
+
+    def __init__(self, dim, num_heads, qkv_bias=True):
+        super(Attention, self).__init__()
+        self.num_heads, self.head_dim = num_heads, dim // num_heads
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.proj = nn.Linear(dim, dim)
+        self.attn_mask, self.cache_kv, self.pe_func, self.flex_attn = None, None, None, None
+
+    def forward(self, x) -> torch.Tensor:
+        qkv_shape = [-1, x.size(1), 3, self.num_heads, self.head_dim]
+        q, k, v = self.qkv(x).view(qkv_shape).permute(2, 0, 3, 1, 4).unbind(dim=0)
+        q, k = (self.pe_func(q), self.pe_func(k)) if self.pe_func else (q, k)
+        if self.cache_kv is not None and self.cache_kv:
+            if isinstance(self.cache_kv, list):
+                k = self.cache_kv[0] = torch.cat([self.cache_kv[0], k], dim=2)
+                v = self.cache_kv[1] = torch.cat([self.cache_kv[1], v], dim=2)
+            else:
+                self.cache_kv = [k, v]
+        if self.flex_attn and self.flex_attn.offsets:
+            return self.proj(self.flex_attn(q, k, v).transpose(1, 2).flatten(2))
+        o = nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=self.attn_mask)
+        return self.proj(o.transpose(1, 2).flatten(2))
+
+
+class Block(nn.Module):
+    """Transformer block."""
+
+    def __init__(self, dim, num_heads, mlp_ratio=4, qkv_bias=True):
+        super(Block, self).__init__()
+        self.norm1 = nn.LayerNorm(dim)
+        self.attn = Attention(dim, num_heads, qkv_bias=qkv_bias)
+        self.norm2 = nn.LayerNorm(dim)
+        self.mlp = MLP(dim, mlp_ratio=mlp_ratio)
+        self.attn_checkpointing, self.mlp_checkpointing = False, False
+
+    def forward_attn(self, x) -> torch.Tensor:
+        return self.norm1(self.attn(x))
+
+    def forward_mlp(self, x) -> torch.Tensor:
+        return self.norm2(self.mlp(x))
+
+    def forward_ckpt(self, x, name) -> torch.Tensor:
+        if getattr(self, f"{name}_checkpointing", False) and x.requires_grad:
+            return apply_ckpt(getattr(self, f"forward_{name}"), x, use_reentrant=False)
+        return getattr(self, f"forward_{name}")(x)
+
+    def forward(self, x, pe_func: callable = None) -> torch.Tensor:
+        self.attn.pe_func = pe_func
+        x = self.forward_ckpt(x, "attn").add_(x)
+        return self.forward_ckpt(x, "mlp").add_(x)
+
+
+class VisionTransformer(nn.Module):
+    """Vision transformer."""
+
+    def __init__(
+        self,
+        depth,
+        embed_dim,
+        num_heads,
+        mlp_ratio=4,
+        patch_size=2,
+        image_size=32,
+        image_dim=4,
+        encoder_depth=None,
+    ):
+        super(VisionTransformer, self).__init__()
+        self.embed_dim, self.image_size, self.image_dim = embed_dim, image_size, image_dim
+        self.patch_embed = PatchEmbed(image_dim, embed_dim, patch_size)
+        self.pos_embed, self.rope = nn.Identity(), RotaryEmbed3D(embed_dim // num_heads)
+        self.blocks = nn.ModuleList(Block(embed_dim, num_heads, mlp_ratio) for _ in range(depth))
+        self.norm, self.mixer = nn.LayerNorm(embed_dim), nn.Identity()
+        self.encoder_depth = len(self.blocks) // 2 if encoder_depth is None else encoder_depth
+        self.flex_attn = FlexAttentionCausal2D()
+        [setattr(blk.attn, "flex_attn", self.flex_attn) for blk in self.blocks]
+
+    def prepare_pe(self, c=None, ids=None, pos=None) -> Tuple[callable, callable]:
+        pad = 0 if c is None else c.size(1)
+        pe1 = pe2 = self.rope.get_func(pos, pad)
+        pe1 = self.rope.get_func(pos, pad, ids.expand(-1, -1, 3)) if ids is not None else pe1
+        return pe1, pe2
+
+    def enable_kvcache(self, mode=True):
+        [setattr(blk.attn, "cache_kv", mode) for blk in self.blocks]
+
+    def forward(self, x, c=None, prev_ids=None, pos=None) -> torch.Tensor:
+        x, prev_ids = x if isinstance(x, (tuple, list)) else (x, prev_ids)
+        prev_ids = prev_ids if self.encoder_depth else None
+        x = x_masked = self.pos_embed(self.patch_embed(x))
+        pe1, pe2 = self.prepare_pe(c, prev_ids, pos) if pos is not None else [None] * 2
+        if prev_ids is not None:  # Split mask from x.
+            prev_ids = prev_ids.expand(-1, -1, x.size(-1))
+            x = x.gather(1, prev_ids)
+        x = x if c is None else torch.cat([c, x], dim=1)
+        for blk in self.blocks[: self.encoder_depth]:
+            x = blk(x, pe1)
+        if prev_ids is not None and c is not None:  # Split c from x.
+            c, x = x.split((c.size(1), x.size(1) - c.size(1)), dim=1)
+        if prev_ids is not None:  # Merge mask with x.
+            x = x_masked.to(dtype=x.dtype).scatter(1, prev_ids, x)
+            x = x if c is None else torch.cat([c, x], dim=1)
+        for blk in self.blocks[self.encoder_depth :]:
+            x = blk(x, pe2)
+        return self.norm(x if c is None else x[:, c.size(1) :])
diff --git a/URSA/diffnext/pipelines/__init__.py b/URSA/diffnext/pipelines/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6f2890ba6e6ffe81ea3d569415f63678c02be29
--- /dev/null
+++ b/URSA/diffnext/pipelines/__init__.py
@@ -0,0 +1,19 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Pipelines."""
+
+from diffnext.pipelines.nova import NOVAPipeline
+from diffnext.pipelines.ursa import URSAPipeline
diff --git a/URSA/diffnext/pipelines/builder.py b/URSA/diffnext/pipelines/builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..93e0ab06687c07c5b296621ecc5e6e5517430097
--- /dev/null
+++ b/URSA/diffnext/pipelines/builder.py
@@ -0,0 +1,128 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Pipeline builders."""
+
+from typing import Dict
+
+import json
+import os
+import tempfile
+
+import torch
+
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+from diffusers.schedulers.scheduling_utils import SchedulerMixin
+from diffnext.utils.omegaconf_utils import OmegaConfEncoder
+
+
+def get_pipeline_path(
+    pretrained_path,
+    module_dict: dict = None,
+    module_config: Dict[str, dict] = None,
+    target_path: str = None,
+) -> str:
+    """Return the pipeling loading path.
+
+    Args:
+        pretrained_path (str)
+            The pretrained path to load pipeline.
+        module_dict (dict, *optional*)
+            The path dict to load custom modules.
+        module_config (Dict[str, dict], *optional*)
+            The custom configurations to dump into ``config.json``.
+        target_path (str, *optional*)
+            The path to store custom modules and configs.
+
+    Returns:
+       str: The pipeline loading path.
+    """
+    if module_dict is None and module_config is None:
+        return pretrained_path
+    target_path = target_path or tempfile.mkdtemp()
+    for k in os.listdir(pretrained_path):
+        if not os.path.isdir(os.path.join(pretrained_path, k)):
+            continue
+        os.makedirs(os.path.join(target_path, k), exist_ok=True)
+        for _ in os.listdir(os.path.join(pretrained_path, k)):
+            try:
+                os.symlink(os.path.join(pretrained_path, k, _), os.path.join(target_path, k, _))
+            except FileExistsError:  # Some components may be provided.
+                pass
+    module_dict = module_dict.copy() if module_dict is not None else {}
+    model_index = module_dict.pop("model_index", os.path.join(pretrained_path, "model_index.json"))
+    model_index = json.load(open(model_index))
+    for k, v in module_dict.items():
+        model_index.pop(k) if not v else None
+        try:
+            os.symlink(v, os.path.join(target_path, k)) if v else None
+        except FileExistsError:  # Some components may be provided.
+            pass
+    for k, v in (module_config or {}).items():
+        config_file = os.path.join(target_path, k, "config.json")
+        os.remove(config_file) if v and os.path.exists(config_file) else None
+        json.dump(v, open(config_file, "w"), cls=OmegaConfEncoder) if v else None
+    json.dump(model_index, open(os.path.join(target_path, "model_index.json"), "w"))
+    return target_path
+
+
+def build_diffusion_scheduler(scheduler_path, sample=False, **kwargs) -> SchedulerMixin:
+    """Create a diffusion scheduler instance.
+
+    Args:
+        scheduler_path (str or scheduler instance)
+            The path to load a diffusion scheduler.
+        sample (bool, *optional*, default to False)
+            Whether to create the sampling-specific scheduler.
+
+    Returns:
+        SchedulerMixin: The diffusion scheduler.
+    """
+    from diffnext.schedulers.scheduling_cfm import FlowMatchEulerDiscreteScheduler  # noqa
+    from diffnext.schedulers.scheduling_ddpm import DDPMScheduler
+
+    if isinstance(scheduler_path, str):
+        class_key = "_{}_class_name".format("sample" if sample else "noise")
+        class_type = locals()[DDPMScheduler.load_config(**locals())[class_key]]
+        return class_type.from_pretrained(**locals())
+    elif hasattr(scheduler_path, "config"):
+        class_type = locals()[type(scheduler_path).__name__]
+        return class_type.from_config(scheduler_path.config)
+    return None
+
+
+def build_pipeline(pretrained_path, pipe_cls, dtype=torch.float16, **kwargs) -> DiffusionPipeline:
+    """Create a diffnext pipeline instance.
+
+    Examples:
+        ```py
+        >>> from diffnext.pipelines import NOVAPipeline
+        >>> from diffnext.pipelines.builder import build_pipeline
+        >>> pipe = build_pipeline("BAAI/nova-d48w768-sdxl1024", NOVAPipeline)
+
+    Args:
+        pretrained_path (str):
+            The model path that includes ``model_index.json`` to create pipeline.
+        pipe_cls (object)
+            The pipeline class object that defines the ``from_pretrained`` method.
+        dtype (torch.dtype, *optional*, default to ``torch.float16``)
+            The compute dtype used for all pipeline components.
+
+    Returns:
+        DiffusionPipeline: The diffusion pipeline.
+    """
+    kwargs.setdefault("trust_remote_code", True)
+    kwargs.setdefault("torch_dtype", dtype)
+    return pipe_cls.from_pretrained(pretrained_path, **kwargs)
diff --git a/URSA/diffnext/pipelines/pipeline_utils.py b/URSA/diffnext/pipelines/pipeline_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..d63b45ccfecd0e1256c56c8fa5d6c6855a5683c4
--- /dev/null
+++ b/URSA/diffnext/pipelines/pipeline_utils.py
@@ -0,0 +1,78 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Pipeline utilities."""
+
+from typing import List, Union
+
+from diffusers.utils import BaseOutput
+import numpy as np
+import PIL.Image
+import torch
+
+
+class NOVAPipelineOutput(BaseOutput):
+    """Output class for NOVA pipelines.
+
+    Args:
+        images (List[PIL.Image.Image] or np.ndarray)
+            List of PIL images or numpy array of shape `(batch_size, height, width, num_channels)`.
+        frames (np.ndarray)
+            List of video frames. The array shape is `(batch_size, num_frames, height, width, num_channels)`
+    """  # noqa
+
+    images: Union[List[PIL.Image.Image], np.ndarray]
+    frames: np.array
+
+
+class URSAPipelineOutput(BaseOutput):
+    """Output class for URSA pipelines.
+
+    Args:
+        images (List[PIL.Image.Image] or np.ndarray)
+            List of PIL images or numpy array of shape `(batch_size, height, width, num_channels)`.
+        frames (np.ndarray)
+            List of video frames. The array shape is `(batch_size, num_frames, height, width, num_channels)`
+    """  # noqa
+
+    images: Union[List[PIL.Image.Image], np.ndarray]
+    frames: np.array
+
+
+class PipelineMixin(object):
+    """Base class for diffusion pipeline."""
+
+    def register_module(self, model_or_path, name) -> torch.nn.Module:
+        """Register pipeline component.
+
+        Args:
+            model_or_path (str or torch.nn.Module):
+                The model or path to model.
+            name (str):
+                The module name.
+
+        Returns:
+            torch.nn.Module: The registered module.
+
+        """
+        model = model_or_path
+        if isinstance(model_or_path, str):
+            cls = self.__init__.__annotations__[name]
+            if hasattr(cls, "from_pretrained") and model_or_path:
+                model = cls.from_pretrained(model_or_path, torch_dtype=self.dtype)
+                model = model.to(self.device) if isinstance(model, torch.nn.Module) else model
+            model = cls()
+        self.register_to_config(**{name: model.__class__.__name__})
+        return model
diff --git a/URSA/diffnext/schedulers/__init__.py b/URSA/diffnext/schedulers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7baa8d75fd88dcdfb923dea7e87df477739d27cf
--- /dev/null
+++ b/URSA/diffnext/schedulers/__init__.py
@@ -0,0 +1,16 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Schedulers."""
diff --git a/URSA/diffnext/schedulers/__pycache__/__init__.cpython-312.pyc b/URSA/diffnext/schedulers/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ff664902f4b657b2691cace7607766aa48b80c0a
Binary files /dev/null and b/URSA/diffnext/schedulers/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/diffnext/schedulers/__pycache__/scheduling_dfm.cpython-312.pyc b/URSA/diffnext/schedulers/__pycache__/scheduling_dfm.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b1091d235b16d16b33b2a7219af2a24f7478443b
Binary files /dev/null and b/URSA/diffnext/schedulers/__pycache__/scheduling_dfm.cpython-312.pyc differ
diff --git a/URSA/diffnext/schedulers/scheduling_cfm.py b/URSA/diffnext/schedulers/scheduling_cfm.py
new file mode 100644
index 0000000000000000000000000000000000000000..47349737ae02a8798417a6acc6443205c8e46e3d
--- /dev/null
+++ b/URSA/diffnext/schedulers/scheduling_cfm.py
@@ -0,0 +1,140 @@
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+"""Simple implementation of continuous flow matching schedulers."""
+
+import dataclasses
+import math
+
+import numpy as np
+import torch
+
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.models.modeling_outputs import BaseOutput
+from diffusers.schedulers.scheduling_utils import SchedulerMixin
+
+
+@dataclasses.dataclass
+class FlowMatchEulerDiscreteSchedulerOutput(BaseOutput):
+    """Output for scheduler's `step` function output."""
+
+    prev_sample: torch.FloatTensor
+
+
+class FlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
+
+    order = 1
+
+    @register_to_config
+    def __init__(self, num_train_timesteps=1000, shift=1.0, use_dynamic_shifting=False):
+        timesteps = np.arange(1, num_train_timesteps + 1, dtype="float32")[::-1]
+        sigmas, self._shift = timesteps / num_train_timesteps, shift
+        if not use_dynamic_shifting:
+            sigmas = shift * sigmas / (1 + (shift - 1) * sigmas)
+        self.timesteps = torch.as_tensor(sigmas * num_train_timesteps)
+        self.sigmas = torch.as_tensor(sigmas)
+        self.sigma_min, self.sigma_max = float(sigmas[-1]), float(sigmas[0])
+        self.timestep = self.sigma = None  # Training states.
+        self._begin_index = self._step_index = None  # Inference counters.
+
+    @property
+    def shift(self):
+        """The value used for shifting."""
+        return self._shift
+
+    @property
+    def step_index(self):
+        """The index counter for current timestep."""
+        return self._step_index
+
+    @property
+    def begin_index(self):
+        """The index for the first timestep."""
+        return self._begin_index
+
+    def _sigma_to_t(self, sigma):
+        return sigma * self.config.num_train_timesteps
+
+    def _init_step_index(self, timestep):
+        if self.begin_index is None:
+            self._step_index = self.index_for_timestep(timestep)
+        else:
+            self._step_index = self._begin_index
+
+    def time_shift(self, mu: float, sigma: float, t: torch.Tensor):
+        return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma)
+
+    def set_shift(self, shift: float):
+        self._shift = shift
+
+    def index_for_timestep(self, timestep, schedule_timesteps=None):
+        if schedule_timesteps is None:
+            schedule_timesteps = self.timesteps
+        indices = (schedule_timesteps == timestep).nonzero()
+        return indices[1 if len(indices) > 1 else 0].item()
+
+    def sample_timesteps(self, size, device=None):
+        """Sample the discrete timesteps used for training."""
+        dist = torch.normal(0, 1, size, device=device).sigmoid_()
+        return dist.mul_(self.config.num_train_timesteps).to(dtype=torch.int64)
+
+    def set_timesteps(self, num_inference_steps, mu=None):
+        """Sets the discrete timesteps used for the diffusion chain."""
+        self.num_inference_steps = num_inference_steps
+        t_max, t_min = self._sigma_to_t(self.sigma_max), self._sigma_to_t(self.sigma_min)
+        timesteps = np.linspace(t_max, t_min, num_inference_steps, dtype="float32")
+        sigmas = timesteps / self.config.num_train_timesteps
+        if self.config.use_dynamic_shifting:
+            sigmas = self.time_shift(mu, 1.0, sigmas)
+        else:
+            sigmas = self.shift * sigmas / (1 + (self.shift - 1) * sigmas)
+        self.sigmas = sigmas.tolist() + [0]
+        self.timesteps = sigmas * self.config.num_train_timesteps
+        self._begin_index = self._step_index = None
+
+    def add_noise(
+        self,
+        original_samples: torch.Tensor,
+        noise: torch.Tensor,
+        timesteps: torch.Tensor,
+    ):
+        """Add forward noise to samples for training."""
+        dtype, device = original_samples.dtype, original_samples.device
+        self.timestep = self.timesteps.to(device=device)[timesteps]
+        self.sigma = self.sigmas.to(device=device, dtype=dtype)[timesteps]
+        self.sigma = self.sigma.view(timesteps.shape + (1,) * (noise.dim() - timesteps.dim()))
+        return self.sigma * noise + (1.0 - self.sigma) * original_samples
+
+    def scale_noise(self, sample: torch.Tensor, timestep: float, noise: torch.Tensor):
+        """Add forward noise to samples for inference."""
+        self._init_step_index(timestep) if self.step_index is None else None
+        sigma = self.sigmas[self.step_index]
+        return sigma * noise + (1.0 - sigma) * sample
+
+    def step(
+        self,
+        model_output: torch.Tensor,
+        timestep: float,
+        sample: torch.FloatTensor,
+        generator: torch.Generator = None,
+        return_dict=True,
+    ):
+        """Predict the sample from the previous timestep."""
+        self._init_step_index(timestep) if self.step_index is None else None
+        dt = self.sigmas[self.step_index + 1] - self.sigmas[self.step_index]
+        prev_sample = model_output.mul(dt).add_(sample)
+        self._step_index += 1
+        if not return_dict:
+            return (prev_sample,)
+        return FlowMatchEulerDiscreteSchedulerOutput(prev_sample=prev_sample)
diff --git a/URSA/diffnext/schedulers/scheduling_ddpm.py b/URSA/diffnext/schedulers/scheduling_ddpm.py
new file mode 100644
index 0000000000000000000000000000000000000000..aff3f5dc18c05502fded03708c89120167c21b08
--- /dev/null
+++ b/URSA/diffnext/schedulers/scheduling_ddpm.py
@@ -0,0 +1,354 @@
+# Copyright 2024 UC Berkeley Team and The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim
+
+import math
+from dataclasses import dataclass
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+import torch
+
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.models.modeling_outputs import BaseOutput
+from diffusers.utils.torch_utils import randn_tensor
+from diffusers.schedulers.scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin
+
+
+@dataclass
+class DDPMSchedulerOutput(BaseOutput):
+    """Output class for the scheduler's `step` function output."""
+
+    prev_sample: torch.Tensor
+    pred_original_sample: Optional[torch.Tensor] = None
+
+
+def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999, alpha_transform_type="cosine"):
+    """Create a beta schedule that discretizes the given alpha_t_bar function."""
+    if alpha_transform_type == "cosine":
+        alpha_bar_fn = lambda t: math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2  # noqa
+    elif alpha_transform_type == "exp":
+        alpha_bar_fn = lambda t: math.exp(t * -12.0)  # noqa
+    else:
+        raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}")
+    betas = []
+    for i in range(num_diffusion_timesteps):
+        t1 = i / num_diffusion_timesteps
+        t2 = (i + 1) / num_diffusion_timesteps
+        betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
+    return torch.tensor(betas, dtype=torch.float32)
+
+
+def rescale_zero_terminal_snr(betas):
+    """Rescales betas to have zero terminal SNR."""
+    # Convert betas to alphas_bar_sqrt
+    alphas = 1.0 - betas
+    alphas_cumprod = torch.cumprod(alphas, dim=0)
+    alphas_bar_sqrt = alphas_cumprod.sqrt()
+    # Store old values.
+    alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
+    alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
+    # Shift so the last timestep is zero.
+    alphas_bar_sqrt -= alphas_bar_sqrt_T
+    # Scale so the first timestep is back to the old value.
+    alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
+    # Convert alphas_bar_sqrt to betas
+    alphas_bar = alphas_bar_sqrt**2  # Revert sqrt
+    alphas = alphas_bar[1:] / alphas_bar[:-1]  # Revert cumprod
+    alphas = torch.cat([alphas_bar[0:1], alphas])
+    betas = 1 - alphas
+    return betas
+
+
+class DDPMScheduler(SchedulerMixin, ConfigMixin):
+    """
+    `DDPMScheduler` explores the connections between denoising score matching and Langevin dynamics sampling.
+
+    This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
+    methods the library implements for all schedulers such as loading and saving.
+
+    Args:
+        num_train_timesteps (`int`, defaults to 1000):
+            The number of diffusion steps to train the model.
+        beta_start (`float`, defaults to 0.0001):
+            The starting `beta` value of inference.
+        beta_end (`float`, defaults to 0.02):
+            The final `beta` value.
+        beta_schedule (`str`, defaults to `"linear"`):
+            The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
+            `linear`, `scaled_linear`, or `squaredcos_cap_v2`.
+        trained_betas (`np.ndarray`, *optional*):
+            An array of betas to pass directly to the constructor without using `beta_start` and `beta_end`.
+        variance_type (`str`, defaults to `"fixed_small"`):
+            Clip the variance when adding noise to the denoised sample. Choose from `fixed_small`, `fixed_small_log`,
+            `fixed_large`, `fixed_large_log`, `learned` or `learned_range`.
+        clip_sample (`bool`, defaults to `True`):
+            Clip the predicted sample for numerical stability.
+        clip_sample_range (`float`, defaults to 1.0):
+            The maximum magnitude for sample clipping. Valid only when `clip_sample=True`.
+        prediction_type (`str`, defaults to `epsilon`, *optional*):
+            Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
+            `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
+            Video](https://imagen.research.google/video/paper.pdf) paper).
+        thresholding (`bool`, defaults to `False`):
+            Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such
+            as Stable Diffusion.
+        dynamic_thresholding_ratio (`float`, defaults to 0.995):
+            The ratio for the dynamic thresholding method. Valid only when `thresholding=True`.
+        sample_max_value (`float`, defaults to 1.0):
+            The threshold value for dynamic thresholding. Valid only when `thresholding=True`.
+        timestep_spacing (`str`, defaults to `"leading"`):
+            The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
+            Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
+        steps_offset (`int`, defaults to 0):
+            An offset added to the inference steps, as required by some model families.
+        rescale_betas_zero_snr (`bool`, defaults to `False`):
+            Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
+            dark samples instead of limiting it to samples with medium brightness. Loosely related to
+            [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
+    """  # noqa
+
+    _compatibles = [e.name for e in KarrasDiffusionSchedulers]
+    order = 1
+
+    @register_to_config
+    def __init__(
+        self,
+        num_train_timesteps: int = 1000,
+        beta_start: float = 0.0001,
+        beta_end: float = 0.02,
+        beta_schedule: str = "linear",
+        trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
+        variance_type: str = "fixed_small",
+        clip_sample: bool = True,
+        prediction_type: str = "epsilon",
+        thresholding: bool = False,
+        dynamic_thresholding_ratio: float = 0.995,
+        clip_sample_range: float = 1.0,
+        sample_max_value: float = 1.0,
+        timestep_spacing: str = "leading",
+        steps_offset: int = 0,
+        rescale_betas_zero_snr: int = False,
+    ):
+        if trained_betas is not None:
+            self.betas = torch.tensor(trained_betas, dtype=torch.float32)
+        elif beta_schedule == "linear":
+            self.betas = torch.linspace(
+                beta_start, beta_end, num_train_timesteps, dtype=torch.float32
+            )
+        elif beta_schedule == "scaled_linear":
+            a, b = beta_start**0.5, beta_end**0.5
+            self.betas = torch.linspace(a, b, num_train_timesteps, dtype=torch.float32) ** 2
+        elif beta_schedule == "squaredcos_cap_v2":  # Glide cosine schedule
+            self.betas = betas_for_alpha_bar(num_train_timesteps)
+        elif beta_schedule == "sigmoid":  # GeoDiff sigmoid schedule
+            betas = torch.linspace(-6, 6, num_train_timesteps)
+            self.betas = torch.sigmoid(betas) * (beta_end - beta_start) + beta_start
+        else:
+            raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}")
+        # Rescale for zero SNR
+        if rescale_betas_zero_snr:
+            self.betas = rescale_zero_terminal_snr(self.betas)
+        self.alphas = 1.0 - self.betas
+        self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
+        self.one = torch.tensor(1.0)
+        self.init_noise_sigma = 1.0
+        self.custom_timesteps = False
+        self.num_inference_steps = None
+        self.timesteps = torch.from_numpy(np.arange(num_train_timesteps)[::-1].copy())
+        self.variance_type = variance_type
+
+    def scale_model_input(
+        self, sample: torch.Tensor, timestep: Optional[int] = None
+    ) -> torch.Tensor:
+        """Scale the denoising model input depending on the current timestep."""
+        return sample
+
+    def sample_timesteps(self, size, device=None):
+        return torch.randint(0, self.config.num_train_timesteps, size, device=device)
+
+    def set_timesteps(
+        self,
+        num_inference_steps: Optional[int] = None,
+        device: Union[str, torch.device] = None,
+        timesteps: Optional[List[int]] = None,
+    ):
+        """Sets the discrete timesteps used for the diffusion chain (to be run before inference)."""
+        if num_inference_steps is not None and timesteps is not None:
+            raise ValueError("Can only pass one of `num_inference_steps` or `custom_timesteps`.")
+        self.custom_timesteps = timesteps is not None
+        self.num_inference_steps = num_inference_steps
+        if timesteps is not None:
+            timesteps = np.array(timesteps, dtype=np.int64)
+        # See Table 2. of https://arxiv.org/abs/2305.08891
+        elif self.config.timestep_spacing == "linspace":
+            timesteps = np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps)
+            timesteps = timesteps.round()[::-1].copy().astype(np.int64)
+        elif self.config.timestep_spacing == "leading":
+            step_ratio = self.config.num_train_timesteps // self.num_inference_steps
+            timesteps = np.arange(0, num_inference_steps) * step_ratio
+            timesteps = timesteps.round()[::-1].copy().astype(np.int64) + self.config.steps_offset
+        elif self.config.timestep_spacing == "trailing":
+            step_ratio = self.config.num_train_timesteps / self.num_inference_steps
+            timesteps = np.arange(self.config.num_train_timesteps, 0, -step_ratio)
+            timesteps = timesteps.round().astype(np.int64) - 1
+        else:
+            raise ValueError(f"{self.config.timestep_spacing} is not supported.")
+        self.timesteps = torch.as_tensor(timesteps, device=device)
+
+    def _get_variance(self, t, predicted_variance=None):
+        prev_t = self.previous_timestep(t)
+        alpha_prod_t = self.alphas_cumprod[t]
+        alpha_prod_t_prev = self.alphas_cumprod[prev_t] if prev_t >= 0 else self.one
+        current_beta_t = 1 - alpha_prod_t / alpha_prod_t_prev
+        # For t > 0, compute predicted variance βt (see formula (6) and (7) from https://arxiv.org/pdf/2006.11239.pdf)  # noqa
+        # and sample from it to get previous sample
+        # x_{t-1} ~ N(pred_prev_sample, variance) == add variance to pred_sample
+        variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * current_beta_t
+        # we always take the log of variance, so clamp it to ensure it's not 0
+        variance = torch.clamp(variance, min=1e-20)
+        if self.config.variance_type == "fixed_small_log":  # for rl-diffuser
+            return torch.exp(0.5 * variance.log())
+        elif self.config.variance_type == "fixed_large":
+            return current_beta_t
+        elif self.config.variance_type == "fixed_large_log":  # Glide max_log
+            return torch.log(current_beta_t)
+        elif self.config.variance_type == "learned":
+            return predicted_variance
+        elif self.config.variance_type == "learned_range":
+            frac = (predicted_variance + 1) / 2
+            min_log, max_log = variance.log(), torch.log(current_beta_t)
+            return frac * max_log + (1 - frac) * min_log
+        return variance
+
+    def step(
+        self,
+        model_output: torch.Tensor,
+        timestep: int,
+        sample: torch.Tensor,
+        generator=None,
+        return_dict: bool = True,
+    ) -> Union[DDPMSchedulerOutput, Tuple]:
+        """
+        Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
+        process from the learned model outputs (most often the predicted noise).
+
+        Args:
+            model_output (`torch.Tensor`):
+                The direct output from learned diffusion model.
+            timestep (`float`):
+                The current discrete timestep in the diffusion chain.
+            sample (`torch.Tensor`):
+                A current instance of a sample created by the diffusion process.
+            generator (`torch.Generator`, *optional*):
+                A random number generator.
+            return_dict (`bool`, *optional*, defaults to `True`):
+                Whether or not to return a [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] or `tuple`.
+
+        Returns:
+            [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] or `tuple`:
+                If return_dict is `True`, [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] is returned, otherwise a
+                tuple is returned where the first element is the sample tensor.
+
+        """  # noqa
+        t = timestep
+        prev_t = self.previous_timestep(t)
+
+        predicted_variance = None
+        if self.variance_type in ("learned", "learned_range"):
+            if model_output.shape[1] == sample.shape[1] * 2:
+                model_output, predicted_variance = model_output.chunk(2, dim=1)
+
+        # 1. compute alphas, betas
+        alpha_prod_t = self.alphas_cumprod[t]
+        alpha_prod_t_prev = self.alphas_cumprod[prev_t] if prev_t >= 0 else self.one
+        beta_prod_t = 1 - alpha_prod_t
+        beta_prod_t_prev = 1 - alpha_prod_t_prev
+        current_alpha_t = alpha_prod_t / alpha_prod_t_prev
+        current_beta_t = 1 - current_alpha_t
+
+        # 2. compute predicted original sample from predicted noise also called
+        # "predicted x_0" of formula (15) from https://arxiv.org/pdf/2006.11239.pdf
+        if self.config.prediction_type == "epsilon":
+            pred_sample = (sample - beta_prod_t**0.5 * model_output) / alpha_prod_t**0.5
+        elif self.config.prediction_type == "sample":
+            pred_sample = model_output
+        elif self.config.prediction_type == "v_prediction":
+            pred_sample = alpha_prod_t**0.5 * sample - beta_prod_t**0.5 * model_output
+        else:
+            raise ValueError(f"Unsupported prediction type given as {self.config.prediction_type}.")
+
+        # 4. Compute coefficients for pred_sample x_0 and current sample x_t
+        # See formula (7) from https://arxiv.org/pdf/2006.11239.pdf
+        pred_sample_coeff = alpha_prod_t_prev**0.5 * current_beta_t / beta_prod_t
+        current_sample_coeff = current_alpha_t**0.5 * beta_prod_t_prev / beta_prod_t
+
+        # 5. Compute predicted previous sample µ_t
+        # See formula (7) from https://arxiv.org/pdf/2006.11239.pdf
+        prev_sample = pred_sample_coeff * pred_sample + current_sample_coeff * sample
+
+        # 6. Add noise
+        if t > 0:
+            device, dtype = model_output.device, model_output.dtype
+            noise = randn_tensor(sample.shape, generator=generator, device=device, dtype=dtype)
+            if self.variance_type == "fixed_small_log":
+                variance = self._get_variance(t, predicted_variance)
+            elif self.variance_type == "learned_range":
+                variance = self._get_variance(t, predicted_variance).mul(0.5).exp()
+            else:
+                variance = self._get_variance(t, predicted_variance) ** 0.5
+            prev_sample.add_(noise.mul_(variance))
+
+        if not return_dict:
+            return (prev_sample,)
+        return DDPMSchedulerOutput(prev_sample=prev_sample)
+
+    def previous_timestep(self, timestep):
+        if self.custom_timesteps:
+            index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0]
+            if index == self.timesteps.shape[0] - 1:
+                return torch.tensor(-1)
+            return self.timesteps[index + 1]
+        num_inference_steps = self.num_inference_steps or self.config.num_train_timesteps
+        return timestep - self.config.num_train_timesteps // num_inference_steps
+
+    def add_noise(
+        self, original_samples: torch.Tensor, noise: torch.Tensor, timesteps: torch.Tensor
+    ) -> torch.Tensor:
+        timesteps = timesteps.to(device=original_samples.device)
+        self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
+        alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
+        sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
+        sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
+        expand_shape = timesteps.shape + (1,) * (noise.dim() - timesteps.dim())
+        sqrt_alpha_prod = sqrt_alpha_prod.view(expand_shape)
+        sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.view(expand_shape)
+        return sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise
+
+    def get_velocity(
+        self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.Tensor
+    ) -> torch.Tensor:
+        timesteps = timesteps.to(sample.device)
+        self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
+        alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
+        sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
+        sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
+        expand_shape = timesteps.shape + (1,) * (noise.dim() - timesteps.dim())
+        sqrt_alpha_prod = sqrt_alpha_prod.view(expand_shape)
+        sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.view(expand_shape)
+        return sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample
+
+    def __len__(self):
+        return self.config.num_train_timesteps
diff --git a/URSA/diffnext/schedulers/scheduling_dfm.py b/URSA/diffnext/schedulers/scheduling_dfm.py
new file mode 100644
index 0000000000000000000000000000000000000000..6bb49b96c938e10a4f72c7b677fc131a655f69ff
--- /dev/null
+++ b/URSA/diffnext/schedulers/scheduling_dfm.py
@@ -0,0 +1,347 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Simple implementation of discrete flow matching schedulers."""
+
+import dataclasses
+import os
+from typing import Union
+from typing_extensions import Self
+
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.models.modeling_outputs import BaseOutput
+from diffusers.schedulers.scheduling_utils import SchedulerMixin
+import torch
+
+
+@dataclasses.dataclass
+class KineticOptimalSchedulerOutput(BaseOutput):
+    """Output for scheduler's `step` function output."""
+
+    prev_sample: torch.LongTensor
+
+
+class DiscreteProbPath(object):
+    """Define a general discrete probability path."""
+
+    def __init__(self, emb):
+        """Create a ``DiscreteProbPath``.
+
+        Args:
+            emb (Union[torch.Tensor, torch.nn.Embedding])
+                The codebook embeddings.
+        """
+        self.generator = None
+        self.emb = emb.weight if isinstance(emb, torch.nn.Embedding) else emb
+
+    def categorical(self, prob) -> torch.Tensor:
+        """Categorical sampling according to weights in the last dimension.
+
+        Args:
+            prob (torch.Tensor)
+                The sample token probability, shape (bsz, ..., codebook_size).
+
+        Returns:
+            torch.Tensor: The sample token index, shape (bsz, ...).
+        """
+        return prob.flatten(0, -2).multinomial(1, generator=self.generator).view(*prob.shape[:-1])
+
+
+class MixtureDiscreteProbPath(DiscreteProbPath):
+    """Define a mixture discrete probability path."""
+
+    def sample(self, x_1, t: Union[float, torch.Tensor]) -> torch.Tensor:
+        """Sample from the affine probability path.
+
+        Args:
+            x_1 (torch.Tensor)
+                The target token index, shape (bsz, ...).
+            t (float or torch.Tensor)
+                The timestep ``t``, shape (bsz,).
+
+        Returns:
+            torch.Tensor: The sample token index at time t, shape (bsz, ...).
+        """
+        t = t.to(self.emb).view([-1] + [1] * (x_1.dim() - 1)) if hasattr(t, "cpu") else t
+        x_0 = x_1.new_empty(x_1.shape).random_(to=self.emb.shape[0], generator=self.generator)
+        return x_0.where(t.new_empty(x_1.shape).uniform_(generator=self.generator).lt(1 - t), x_1)
+
+    def get_velocity(self, logits, x_t, t: float, x_1=None) -> torch.Tensor:
+        """Return the velocity by converting the factorized posterior.
+
+        Args:
+            logits (torch.Tensor)
+                The sample token logits at time t+1, shape (bsz, ..., codebook_size).
+            x_t (torch.Tensor)
+                The sample token index at time t, shape (bsz, ...).
+            t (float)
+                The timestep ``t``.
+            x_1 (torch.Tensor, optional):
+                The sample token index at time t+1, shape (bsz, ...).
+
+        Returns:
+            torch.Tensor: The velocity ``v``.
+        """
+        x_1 = self.categorical(logits.softmax(-1)) if x_1 is None else x_1
+        return logits.zero_().scatter_(-1, x_1.unsqueeze(-1), 1 / (1 - t))
+
+
+class MetricDiscreteProbPath(DiscreteProbPath):
+    """Define a metric-induced discrete probability path."""
+
+    def __init__(self, emb, alpha=0.9, c=3, eps=1e-5):
+        """Create a ``MetricDiscreteProbPath``.
+
+        Args:
+            emb (Union[torch.Tensor, torch.nn.Embedding])
+                The codebook embeddings.
+            alpha (float)
+                The value to ``alpha``.
+            c (float)
+                The value to ``c``.
+            eps (float, *optional*, defaults to 1e-5):
+                A small value to clip the L2 normalization denominator.
+        """
+        self.alpha, self.c, self.eps, self.generator = alpha, c, eps, None
+        emb = emb.weight if isinstance(emb, torch.nn.Embedding) else emb
+        self.emb = torch.nn.functional.normalize(emb, dim=-1, eps=eps)
+        self.emb_sumsq = self.emb.square().sum(-1)
+        self.emb_mul2t = self.emb.mul(2).T.contiguous()
+
+    def get_dist(self, emb_1: torch.Tensor, emb_2: torch.Tensor = None) -> torch.Tensor:
+        """Return the distance between two input embeddings.
+
+        Args:
+            emb_1 (torch.Tensor)
+                The input1 embeddings, shape (bsz, ..., dim).
+            emb_2 (torch.Tensor, optional)
+                The input2 embeddings, shape (bsz, ..., dim) or (bsz, ..., codebook_size).
+
+        Returns:
+            torch.Tensor: The distance, shape (bsz, ..., 1) or (bsz, ..., codebook_size).
+        """
+        emb_1 = torch.nn.functional.normalize(emb_1, dim=-1, eps=self.eps)
+        if emb_2 is None or emb_1.size() != emb_2.size():  # Distance between input and codebook.
+            emb_1_sumsq, emb_2_sumsq = emb_1.square().sum(-1, True), self.emb_sumsq
+            return torch.add(emb_1_sumsq, emb_2_sumsq, out=emb_2).sub_(emb_1 @ self.emb_mul2t)
+        emb_2 = torch.nn.functional.normalize(emb_2, dim=-1, eps=self.eps)
+        return emb_1.sub(emb_2).abs_().square_().sum(-1, keepdim=True)
+
+    def get_prob(self, emb: torch.Tensor, t: Union[float, torch.Tensor]) -> torch.Tensor:
+        """Return the metric-induced probability.
+
+        Args:
+            emb (torch.Tensor)
+                The input embeddings, shape (bsz, ..., dim).
+            t (float or torch.Tensor)
+                The timestep ``t``, shape (bsz,).
+
+        Returns:
+            torch.Tensor: The probability at timestep ``t``, shape (bsz, ..., codebook_size).
+        """
+        beta = self.c * (t / (1 - t)) ** self.alpha
+        beta = beta.to(emb).view([-1] + [1] * (emb.dim() - 1)) if hasattr(t, "cpu") else beta
+        return self.get_dist(emb).mul_(-beta).softmax(-1)
+
+    def get_prob_by_dist(self, dist: torch.Tensor, t: Union[float, torch.Tensor]) -> torch.Tensor:
+        """Return the metric-induced probability by distance.
+
+        Args:
+            dist (torch.Tensor)
+                The distance, shape (bsz, ..., codebook_size).
+            t (float or torch.Tensor)
+                The timestep ``t``, shape (bsz,).
+
+        Returns:
+            torch.Tensor: The probability at timestep ``t``, shape (bsz, ..., codebook_size).
+        """
+        beta = self.c * (t / (1 - t)) ** self.alpha
+        beta = beta.to(dist).view([-1] + [1] * (dist.dim() - 1)) if hasattr(t, "cpu") else beta
+        return dist.mul(-beta).softmax(-1)
+
+    def sample(self, x_1, t: Union[float, torch.Tensor]) -> torch.Tensor:
+        """Sample from the affine probability path.
+
+        Args:
+            x_1 (torch.Tensor)
+                The target token index, shape (bsz, ...).
+            t (float or torch.Tensor)
+                The timestep ``t``, shape (bsz,).
+
+        Returns:
+            torch.Tensor: The sample token index at time t, shape (bsz, ...).
+        """
+        return self.categorical(self.get_prob(self.emb[x_1], t))
+
+    def get_velocity(self, logits, x_t, t: float, x_1=None) -> torch.Tensor:
+        """Return the velocity by converting the factorized posterior.
+
+        Args:
+            logits (torch.Tensor)
+                The sample token logits at time t+1, shape (bsz, ..., codebook_size).
+            x_t (torch.Tensor)
+                The sample token index at time t, shape (bsz, ...).
+            t (float)
+                The timestep ``t``.
+            x_1 (torch.Tensor, optional):
+                The sample token index at time t+1, shape (bsz, ...).
+
+        Returns:
+            torch.Tensor: The velocity ``v``, shape (bsz, ..., codebook_size).
+        """
+        numerator = self.c * self.alpha * (t ** (self.alpha - 1)) if t > 0 else 0
+        d_beta_t = numerator / (1 - t) ** (self.alpha + 1)
+        emb_x_1 = self.emb[self.categorical(logits.softmax(-1)) if x_1 is None else x_1]
+        dist_x_1_x = self.get_dist(emb_x_1, logits)  # (bsz, ..., codebook_size)
+        prob_x_1_x = self.get_prob_by_dist(dist_x_1_x, t)  # (bsz, ..., codebook_size)
+        dist_x_t_x_1 = self.get_dist(self.emb[x_t], emb_x_1)  # (bsz, ..., 1)
+        dist = torch.nn.functional.relu(dist_x_1_x.sub_(dist_x_t_x_1).neg_(), inplace=True)
+        return prob_x_1_x.mul_(d_beta_t).mul_(dist)  # (bsz, ..., codebook_size)
+
+
+class KineticOptimalScheduler(SchedulerMixin, ConfigMixin):
+    """Kinetic optimal scheduler with general discrete paths."""
+
+    @register_to_config
+    def __init__(self, alpha=None, c=None, shift=1.0, eps=1e-5, **kwargs):
+        self.alpha, self.c, self.shift, self.eps = alpha, c, shift, eps
+        self.init_args, self.path, self.codebook_size = kwargs or {}, None, 0
+        self.init_args.setdefault("shift", shift) if shift != 1 else None
+
+    def __repr__(self) -> str:
+        """Return the extra representation of this scheduler."""
+        s = f"{self.__class__.__name__}"
+        if self.alpha is None:  # Fallback to ``MixtureDiscreteProbPath``.
+            return s + "(shift={shift})".format(**self.__dict__)
+        return s + "(alpha={alpha}, c={c}, shift={shift})".format(**self.__dict__)
+
+    @classmethod
+    def from_pretrained(cls, pretrained_path, device=None, dtype=None, **kwargs) -> Self:
+        """Instantiate the scheduler from a pretrained model vocabulary."""
+        return KineticOptimalScheduler().load_pretrained(pretrained_path, device, dtype, **kwargs)
+
+    def load_pretrained(self, pretrained_path=None, device=None, dtype=None, **kwargs) -> Self:
+        """Load the scheduler from a pretrained model vocabulary."""
+        pretrained_path = self.init_args.get("pretrained_path", None) or pretrained_path
+        pretrained_args = super().from_pretrained(pretrained_path, **kwargs).__dict__
+        pretrained_args.update({"init_args": self.init_args, **self.init_args})
+        self.__dict__.update(pretrained_args)
+        model_file = os.path.join(pretrained_path, "scheduler_model.pth")
+        emb = torch.load(model_file, weights_only=False)["path.emb"]
+        emb = emb.to(device).to(dtype=dtype or torch.float16)
+        self.path = MetricDiscreteProbPath(emb=emb, alpha=self.alpha, c=self.c, eps=self.eps)
+        self.path = MixtureDiscreteProbPath(emb=emb) if self.alpha is None else self.path
+        self.codebook_size = self.path.emb.size(0)
+        return self
+
+    def to(self, device=None, dtype=None) -> Self:
+        """Convert to given device and dtype."""
+        for k, v in self.path.__dict__.items():
+            self.path.__dict__[k] = v.to(device, dtype) if isinstance(v, torch.Tensor) else v
+        return self
+
+    def sample_timesteps(self, size, device=None, generator=None) -> torch.Tensor:
+        """Sample a batch of timesteps for training.
+
+        Args:
+            size (Tuple[int])
+                The sample size of timesteps.
+            device (torch.device, optional)
+                The output device.
+            generator (torch.Generator, optional):
+                The random generator.
+        """
+        sigma = 1 - torch.rand(size, device=device, generator=generator).mul_(0.999)
+        return 1 - self.shift * sigma / (1 + (self.shift - 1) * sigma)
+
+    def set_timesteps(self, num_inference_steps, *args, **kwargs):
+        """Set the inference timesteps for sampling.
+
+        Args:
+            num_inference_steps (int)
+                The number of inference steps.
+        """
+        self.num_inference_steps = num_inference_steps
+        self.timesteps = torch.arange(num_inference_steps).tolist()
+
+    def add_noise(self, original_samples, timesteps, generator=None) -> torch.Tensor:
+        """Add forward noise to samples.
+
+        Args:
+            original_samples (torch.Tensor)
+                The sample token index, shape (bsz, ...).
+            t (float or torch.Tensor)
+                The timestep ``t``, shape (bsz,).
+            generator (torch.Generator, optional):
+                The random generator.
+
+        Returns:
+            torch.Tensor: The sample token index at time t, shape (bsz, ...).
+        """
+        self.path.generator = generator if generator else self.path.generator
+        return self.path.sample(original_samples, timesteps)
+
+    def timestep_to_t(self, timestep) -> float:
+        """Return the ``t`` for given timestep.
+
+        Args:
+            timestep (int)
+                The discrete timestep index.
+
+        Returns:
+            float: The continuous timestep in [0, 1).
+        """
+        sigma = 1 - self.timesteps[timestep] / self.num_inference_steps
+        return 1 - self.shift * sigma / (1 + (self.shift - 1) * sigma)
+
+    def step(
+        self,
+        model_output,
+        timestep,
+        sample,
+        generator=None,
+        return_dict=True,
+    ) -> KineticOptimalSchedulerOutput:
+        """Predict the sample from the previous timestep.
+
+        Args:
+            model_output (torch.Tensor)
+                The sample token logits at time t+1, shape (bsz, ..., codebook_size).
+            timestep (int)
+                The discrete timestep index.
+            sample (torch.Tensor)
+                The sample token index at time t, shape (bsz, ...).
+            generator (torch.Generator, optional):
+                The random generator.
+            return_dict (bool, optional)
+                Whether return the output in a dict.
+
+        Returns:
+            torch.Tensor: The sample token index at time t+1, shape (bsz, ...).
+        """
+        self.path.generator = generator if generator else self.path.generator
+        if timestep == self.num_inference_steps - 1:
+            prev_sample = self.path.categorical(model_output.softmax(-1))
+        else:
+            t = self.timestep_to_t(timestep)
+            dt = self.timestep_to_t(timestep + 1) - t
+            v = self.path.get_velocity(model_output, sample, t)
+            u_dist = torch.empty_like(sample, dtype=v.dtype).uniform_(generator=generator)
+            jump_thresh = 1 - v.scatter_(-1, sample[..., None], 0).sum(-1).mul_(-dt).exp_()
+            prev_sample, jump_index = sample.clone(), u_dist < jump_thresh
+            prev_sample[jump_index] = self.path.categorical(v[jump_index])
+        if not return_dict:
+            return (prev_sample,)
+        return KineticOptimalSchedulerOutput(prev_sample=prev_sample)
diff --git a/URSA/diffnext/utils/__init__.py b/URSA/diffnext/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6464320eaa9e83e6b71182dcdc99477a9f1bbb45
--- /dev/null
+++ b/URSA/diffnext/utils/__init__.py
@@ -0,0 +1,19 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Utilities."""
+
+from diffnext.utils.export_utils import export_to_image
+from diffnext.utils.export_utils import export_to_video
diff --git a/URSA/diffnext/utils/__pycache__/__init__.cpython-312.pyc b/URSA/diffnext/utils/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..86418f20eedee3ff8ae97f8a53cb51012d718149
Binary files /dev/null and b/URSA/diffnext/utils/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/diffnext/utils/__pycache__/accelerate_utils.cpython-312.pyc b/URSA/diffnext/utils/__pycache__/accelerate_utils.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c53aa34f956b8bd135efc768a48778e3c9e114f2
Binary files /dev/null and b/URSA/diffnext/utils/__pycache__/accelerate_utils.cpython-312.pyc differ
diff --git a/URSA/diffnext/utils/__pycache__/export_utils.cpython-312.pyc b/URSA/diffnext/utils/__pycache__/export_utils.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..19851dab974e33745d8a8248e9a2430c1bc30e96
Binary files /dev/null and b/URSA/diffnext/utils/__pycache__/export_utils.cpython-312.pyc differ
diff --git a/URSA/diffnext/utils/__pycache__/omegaconf_utils.cpython-312.pyc b/URSA/diffnext/utils/__pycache__/omegaconf_utils.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0835b465fb323d720e7b31688677c5ca8e550f5a
Binary files /dev/null and b/URSA/diffnext/utils/__pycache__/omegaconf_utils.cpython-312.pyc differ
diff --git a/URSA/diffnext/utils/__pycache__/profiler.cpython-312.pyc b/URSA/diffnext/utils/__pycache__/profiler.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5b964f12ba9465e08788bf35c9c8678c2ab0a53f
Binary files /dev/null and b/URSA/diffnext/utils/__pycache__/profiler.cpython-312.pyc differ
diff --git a/URSA/diffnext/utils/accelerate_utils.py b/URSA/diffnext/utils/accelerate_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee154d16a1ded91cf53a06e0f7a46e53ae07e4f1
--- /dev/null
+++ b/URSA/diffnext/utils/accelerate_utils.py
@@ -0,0 +1,105 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Accelerate utilities."""
+
+import atexit
+import functools
+import logging
+import os
+import sys
+import time
+
+import accelerate
+import torch
+import wandb
+
+from diffnext.utils.omegaconf_utils import flatten_omega_conf
+
+from accelerate import Accelerator
+from accelerate.utils import DistributedDataParallelKwargs
+
+
+def build_accelerator(config, **kwargs) -> accelerate.Accelerator:
+    """Build accelerator."""
+    
+    kwargs_handlers = []
+
+    # 对普通 DDP 开启 unused param 检测
+    ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
+    kwargs_handlers.append(ddp_kwargs)
+    
+    accelerator = accelerate.Accelerator(
+        log_with=kwargs.get("log_with", None),
+        mixed_precision=config.training.mixed_precision,
+        gradient_accumulation_steps=config.training.gradient_accumulation_steps,
+        kwargs_handlers=kwargs_handlers,
+    )
+    if hasattr(accelerator.state.deepspeed_plugin, "deepspeed_config"):
+        import deepspeed
+
+        deepspeed.logger.setLevel(kwargs.get("deepspeed_log_lvl", "WARNING"))
+        # Dummy size to avoid the raised errors.
+        accelerator.state.deepspeed_plugin.deepspeed_config["train_micro_batch_size_per_gpu"] = 1
+    return accelerator
+
+
+def build_wandb(config, accelerator):
+    """Build wandb for accelerator."""
+    if "wandb" not in config or not accelerator.is_main_process:
+        return
+    config.wandb = config.wandb or type(config)({})
+    old_run_id = config.wandb.get("run_id", None)
+    config.wandb.run_id = run_id = old_run_id or wandb.util.generate_id()
+    init_kwargs = dict(id=run_id, name=config.experiment.name, resume=old_run_id is not None)
+    init_kwargs["config"] = {k: v for k, v in flatten_omega_conf(config, True)}
+    accelerator.init_trackers(config.experiment.project, init_kwargs={"wandb": init_kwargs})
+
+
+def get_ddp_shards(accelerator) -> dict:
+    """Return the shard arguments for simple DDP."""
+    return {"shard_id": accelerator.process_index, "num_shards": accelerator.num_processes}
+
+
+def precision_to_dtype(precision="bf16") -> torch.dtype:
+    """Convert precision string to torch dtype."""
+    str_dict = {"fp16": "float16", "bf16": "bfloat16", "fp32": "float32"}
+    return getattr(torch, str_dict.get(precision.lower(), "float32"))
+
+
+@functools.lru_cache()
+def set_logger(output_dir=None, name="diffnext", level="INFO", accelerator=None):
+    """Set logger."""
+
+    @functools.lru_cache(maxsize=None)
+    def cached_log_stream(filename):
+        """Register a cached filename."""
+        f = open(filename, "a")
+        atexit.register(f.close)
+        return f
+
+    logger = logging.getLogger(name)
+    logger.propagate, _ = False, logger.setLevel(level)
+    fmt = "%(asctime)s %(levelname)s %(filename)s:%(lineno)d] %(message)s"
+    formatter = logging.Formatter(fmt, datefmt="%m/%d %H:%M:%S")
+    ch = logging.StreamHandler(sys.stdout)
+    ch.setLevel(level), ch.setFormatter(formatter), logger.addHandler(ch)
+    output_dir = "" if (accelerator and not accelerator.is_main_process) else output_dir
+    if output_dir:
+        os.makedirs(os.path.join(output_dir, "logs"), exist_ok=True)
+        log_file = time.strftime("%Y%m%d_%H%M%S", time.localtime(time.time())) + ".log"
+        fh = logging.StreamHandler(cached_log_stream(os.path.join(output_dir, "logs", log_file)))
+        fh.setLevel(level), fh.setFormatter(formatter), logger.addHandler(fh)
+    return accelerate.logging.MultiProcessAdapter(logger, {}) if accelerator else logger
diff --git a/URSA/diffnext/utils/export_utils.py b/URSA/diffnext/utils/export_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..72dd8d023617fa52e3797b475e35766cabe3639a
--- /dev/null
+++ b/URSA/diffnext/utils/export_utils.py
@@ -0,0 +1,47 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Export utilities."""
+
+import tempfile
+
+try:
+    import imageio
+except ImportError:
+    imageio = None
+import PIL.Image
+
+
+def export_to_image(image, output_image_path=None, suffix=".webp", quality=100):
+    """Export to image."""
+    if output_image_path is None:
+        output_image_path = tempfile.NamedTemporaryFile(suffix=suffix).name
+    if isinstance(image, PIL.Image.Image):
+        image.save(output_image_path, quality=quality)
+    else:
+        PIL.Image.fromarray(image).save(output_image_path, quality=quality)
+    return output_image_path
+
+
+def export_to_video(video_frames, output_video_path=None, fps=12):
+    """Export to video."""
+    if output_video_path is None:
+        output_video_path = tempfile.NamedTemporaryFile(suffix=".mp4").name
+    if imageio is None:
+        raise ImportError("Failed to import <imageio> library.")
+    with imageio.get_writer(output_video_path, fps=fps) as writer:
+        for frame in video_frames:
+            writer.append_data(frame)
+    return output_video_path
diff --git a/URSA/diffnext/utils/omegaconf_utils.py b/URSA/diffnext/utils/omegaconf_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb6c0792596961763c4e7e386cf63262ba9dca79
--- /dev/null
+++ b/URSA/diffnext/utils/omegaconf_utils.py
@@ -0,0 +1,102 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Omegaconf utilities."""
+
+import importlib
+import json
+from typing import List
+
+import omegaconf
+
+
+class OmegaConfEncoder(json.JSONEncoder):
+    """Custom JSON encoder for omegaconf objects."""
+
+    def default(self, obj):
+        if isinstance(obj, (omegaconf.ListConfig, omegaconf.DictConfig)):
+            return omegaconf.OmegaConf.to_container(obj, resolve=True)
+        return super().default(obj)
+
+
+def get_config() -> omegaconf.DictConfig:
+    """Return omega configurations from CLI."""
+    cli_conf = omegaconf.OmegaConf.from_cli()
+    omegaconf.OmegaConf.register_new_resolver("eval", eval)  # Register ``eval`` func.
+    return omegaconf.OmegaConf.merge(omegaconf.OmegaConf.load(cli_conf.config), cli_conf)
+
+
+def save_config(config: omegaconf.DictConfig, f):
+    """Save config to YAML format string."""
+    omegaconf.OmegaConf.save(config, f)
+
+
+def config_to_yaml(config: omegaconf.DictConfig) -> str:
+    """Dump config to YAML format string."""
+    return omegaconf.OmegaConf.to_yaml(config)
+
+
+def config_to_class(config: omegaconf.DictConfig) -> object:
+    """Return the class object from config."""
+
+    def get_obj_from_str(string, reload=False):
+        module, cls = string.rsplit(".", 1)
+        if reload:
+            module_imp = importlib.import_module(module)
+            importlib.reload(module_imp)
+        return getattr(importlib.import_module(module, package=None), cls)
+
+    if not config:
+        return None
+    if "target" not in config:
+        raise KeyError("Expected key `target` to instantiate.")
+    return get_obj_from_str(config["target"])
+
+
+def config_to_object(config: omegaconf.DictConfig, **kwargs) -> object:
+    """Instantiate an object from config."""
+    if not config:
+        return None
+    kwargs.update(config.get("params", dict()))
+    return config_to_class(config)(**kwargs)
+
+
+def flatten_omega_conf(cfg, resolve=True) -> List:
+    """Flatten omega configurations."""
+    ret = []
+
+    def handle_dict(key, value, resolve):
+        return [(f"{key}.{k1}", v1) for k1, v1 in flatten_omega_conf(value, resolve=resolve)]
+
+    def handle_list(key, value, resolve):
+        return [(f"{key}.{idx}", v1) for idx, v1 in flatten_omega_conf(value, resolve=resolve)]
+
+    if isinstance(cfg, omegaconf.DictConfig):
+        for k, v in cfg.items_ex(resolve=resolve):
+            if isinstance(v, omegaconf.DictConfig):
+                ret.extend(handle_dict(k, v, resolve=resolve))
+            elif isinstance(v, omegaconf.ListConfig):
+                ret.extend(handle_list(k, v, resolve=resolve))
+            else:
+                ret.append((str(k), v))
+    elif isinstance(cfg, omegaconf.ListConfig):
+        for idx, v in enumerate(cfg._iter_ex(resolve=resolve)):
+            if isinstance(v, omegaconf.DictConfig):
+                ret.extend(handle_dict(idx, v, resolve=resolve))
+            elif isinstance(v, omegaconf.ListConfig):
+                ret.extend(handle_list(idx, v, resolve=resolve))
+            else:
+                ret.append((str(idx), v))
+    return ret
diff --git a/URSA/diffnext/utils/profiler.py b/URSA/diffnext/utils/profiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..343e4904632dee3ed47383adea9c152fa0e14170
--- /dev/null
+++ b/URSA/diffnext/utils/profiler.py
@@ -0,0 +1,90 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Profiler utilities."""
+
+import collections
+import contextlib
+import datetime
+import time
+import numpy as np
+
+
+class SmoothedValue(object):
+    """Track values and provide smoothed report."""
+
+    def __init__(self, window_size=None, fmt=None):
+        self.fmt = fmt or "{median:.4f} ({mean:.4f})"
+        self.deque = collections.deque(maxlen=window_size)
+
+    def __str__(self):
+        return self.fmt.format(value=self.value, mean=self.mean, median=self.median)
+
+    @property
+    def value(self):
+        return self.deque[-1]
+
+    @property
+    def mean(self):
+        return np.mean(self.deque)
+
+    @property
+    def median(self):
+        return np.median(self.deque)
+
+    def update(self, value):
+        self.deque.append(value)
+
+
+class Timer(object):
+    """Simple timer."""
+
+    def __init__(self):
+        self.total_time = 0.0
+        self.calls = 0
+        self.start_time = 0.0
+        self.diff = 0.0
+        self.average_time = 0.0
+
+    def add_diff(self, diff, n=1, average=False):
+        self.total_time += diff
+        self.calls += n
+        self.average_time = self.total_time / self.calls
+        return self.average_time if average else self.diff
+
+    @contextlib.contextmanager
+    def tic_and_toc(self, n=1):
+        try:
+            yield self.tic()
+        finally:
+            self.toc(n)
+
+    def tic(self):
+        self.start_time = time.time()
+        return self
+
+    def toc(self, n=1, average=False):
+        self.diff = time.time() - self.start_time
+        return self.add_diff(self.diff, n, average)
+
+
+def get_progress(timer, step, max_steps):
+    """Return the progress information."""
+    eta_seconds = timer.average_time * (max_steps - step)
+    eta = str(datetime.timedelta(seconds=int(eta_seconds)))
+    progress = (step + 1.0) / max_steps
+    return "< PROGRESS: {:.2%} | SPEED: {:.3f}s / step | ETA: {} >".format(
+        progress, timer.average_time, eta
+    )
diff --git a/URSA/diffnext/utils/registry.py b/URSA/diffnext/utils/registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..b32ccfe31b835d1c306cd153a55c668b527f881b
--- /dev/null
+++ b/URSA/diffnext/utils/registry.py
@@ -0,0 +1,54 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Registry utilities."""
+
+import collections
+import functools
+
+
+class Registry(object):
+    """Registry class."""
+
+    def __init__(self, name):
+        self.name = name
+        self.registry = collections.OrderedDict()
+
+    def has(self, key) -> bool:
+        return key in self.registry
+
+    def register(self, name, func=None, **kwargs):
+        def decorated(inner_function):
+            for key in name if isinstance(name, (tuple, list)) else [name]:
+                self.registry[key] = functools.partial(inner_function, **kwargs)
+            return inner_function
+
+        if func is not None:
+            return decorated(func)
+        return decorated
+
+    def get(self, name, default=None):
+        if name is None:
+            return None
+        if not self.has(name):
+            if default is not None:
+                return default
+            raise KeyError("`%s` is not registered in <%s>." % (name, self.name))
+        return self.registry[name]
+
+    def try_get(self, name):
+        if self.has(name):
+            return self.get(name)
+        return None
diff --git a/URSA/diffnext/version.py b/URSA/diffnext/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..a15a8dc6128103bf9f7e6896519ee17284c41a52
--- /dev/null
+++ b/URSA/diffnext/version.py
@@ -0,0 +1,3 @@
+version = "0.3.0a0"
+git_version = "27f3f7577bcc71c2f08a8a069e4ef4ab70cf8bd7"
+__version__ = version
diff --git a/URSA/docs/evaluation.md b/URSA/docs/evaluation.md
new file mode 100644
index 0000000000000000000000000000000000000000..aa8e716bc0a85239a16e73bab3e487ff94a35721
--- /dev/null
+++ b/URSA/docs/evaluation.md
@@ -0,0 +1,49 @@
+# Evaluations
+
+## GenEval
+
+### 1. Sample prompt images
+```bash
+python ./evaluations/geneval/sample.py \
+--height 1024 --width 1024 \
+--guidance_scale 7 --num_inference_steps 25 \
+--ckpt /path/to/URSA-1.7B-IBQ1024 \
+--prompt_size 4 --outdir ./samples/geneval/URSA-1.7B-IBQ1024
+```
+
+### 2. Evaluation
+<IMAGE_FOLDER>=./samples/geneval/URSA-1.7B-IBQ1024
+
+Please refer [GenEval](https://github.com/djghosh13/geneval?tab=readme-ov-file#evaluation) evaluation guide.
+
+## DPG-Bench
+
+### 1. Sample prompt images
+```bash
+python evaluations/dpgbench/sample.py \
+--height 1024 --width 1024 \
+--guidance_scale 7 --num_inference_steps 25 \
+--ckpt ./checkpoints/URSA-1.7B-IBQ1024 \
+--prompt_size 4 --outdir samples/dpgbench/URSA-1.7B-IBQ1024
+```
+
+### 2. Evaluation
+<IMAGE_FOLDER>=./samples/dpgbench/URSA-1.7B-IBQ1024
+
+Please refer [DPG-Bench](https://github.com/TencentQQGYLab/ELLA?tab=readme-ov-file#-dpg-bench) evaluation guide.
+
+## VBench
+
+### 1. Sample prompt videos
+```bash
+python evaluations/vbench/sample.py \
+--num_frames 49 --height 320 --width 512 \
+--guidance_scale 7 --num_inference_steps 50 --motion_score 9 \
+--ckpt ./checkpoints/URSA-1.7B-FSQ320 \
+--prompt_size 1 --outdir ./samples/vbench/URSA-1.7B-FSQ320
+```
+
+### 2. Evaluation
+<VIDEO_FOLDER>=./samples/vbench/URSA-1.7B-FSQ320
+
+Please refer [VBench](https://github.com/Vchitect/VBench?tab=readme-ov-file#evaluation-on-the-standard-prompt-suite-of-vbench) evaluation guide.
diff --git a/URSA/docs/training.md b/URSA/docs/training.md
new file mode 100644
index 0000000000000000000000000000000000000000..22e2c5427da865daa0c6584d4e76366ab1461b3e
--- /dev/null
+++ b/URSA/docs/training.md
@@ -0,0 +1,104 @@
+# Training Guide
+This guide provides simple snippets to train diffnext models.
+
+# 1. Build VQVAE cache
+To optimize training workflow, we preprocess images or videos into VQVAE latents.
+
+## Requirements:
+```bash
+pip install protobuf==3.20.3 codewithgpu decord
+```
+
+## Build T2I cache
+Following snippet can be used to cache image latents:
+
+```python
+import os, codewithgpu, torch, PIL.Image, numpy as np
+from diffnext.models.autoencoders.autoencoder_vq import AutoencoderVQ
+
+device, dtype = torch.device("cuda"), torch.float16
+vae = AutoencoderVQ.from_pretrained("/path/to/BAAI/URSA-1.7B-IBQ1024/vae")
+vae = vae.to(device=device, dtype=dtype).eval()
+
+features = {"codes": "bytes", "caption": "string", "text": "string", "shape": ["int64"]}
+os.makedirs("./datasets/ibq1024_dataset", exist_ok=True)
+writer = codewithgpu.RecordWriter("./datasets/ibq1024_dataset", features)
+
+img = PIL.Image.open("./assets/sample_image.jpg")
+x = torch.as_tensor(np.array(img)[None, ...].transpose(0, 3, 1, 2)).to(device).to(dtype)
+with torch.no_grad():
+    x = vae.encode(x.sub(127.5).div(127.5)).latent_dist.parameters.unsqueeze(1).cpu().numpy()[0]
+example = {"caption": "long caption", "text": "short text"}
+# Ensure enough examples for codewithgou distributed dataset.
+[writer.write({"shape": x.shape, "codes": x.tobytes(), **example}) for _ in range(16)]
+writer.close()
+```
+
+## Build T2V cache
+Following snippet can be used to cache video latents:
+
+```python
+import os, codewithgpu, torch, decord, numpy as np
+from diffnext.models.autoencoders.autoencoder_vq_cosmos3d import AutoencoderVQCosmos3D
+
+device, dtype = torch.device("cuda"), torch.float16
+vae = AutoencoderVQCosmos3D.from_pretrained("/path/to/URSA-1.7B-FSQ320/vae")
+vae = vae.to(device=device, dtype=dtype).eval()
+
+features = {"codes": "bytes", "caption": "string", "text": "string", "shape": ["int64"], "flow": "float64"}
+os.makedirs("./datasets/fsq320_dataset", exist_ok=True)
+writer = codewithgpu.RecordWriter("./datasets/fsq320_dataset", features)
+
+resize, crop_size, frame_ids = 320, (320, 512), list(range(0, 97, 2))
+vid = decord.VideoReader("./assets/sample_video.mp4")
+h, w = vid[0].shape[:2]
+scale = float(resize) / float(min(h, w))
+size = int(h * scale + 0.5), int(w * scale + 0.5)
+y, x = (size[0] - crop_size[0]) // 2, (size[1] - crop_size[1]) // 2
+vid = decord.VideoReader("./assets/sample_video.mp4", height=size[0], width=size[1])
+vid = vid.get_batch(frame_ids).asnumpy()
+vid = vid[:, y : y + crop_size[0], x : x + crop_size[1]]
+x = torch.as_tensor(vid[None, ...].transpose((0, 4, 1, 2, 3))).to(device).to(dtype)
+with torch.no_grad():
+    x = vae.encode(x.sub(127.5).div(127.5)).latent_dist.parameters.cpu().numpy()[0]
+example = {"caption": "long caption", "text": "short text", "flow": 9}
+# Ensure enough examples for codewithgou distributed dataset.
+[writer.write({"shape": x.shape, "codes": x.tobytes(), **example}) for _ in range(16)]
+writer.close()
+```
+
+# 2. Train models
+
+## Train T2I model
+Following snippet provides simple T2I training arguments:
+
+```bash
+accelerate launch --config_file accelerate_configs/deepspeed_zero2.yaml \
+--machine_rank 0 --num_machines 1 --num_processes 8 \
+scripts/train.py \
+config="./configs/ursa_1.7b_ibq1024.yaml" \
+experiment.name="ursa_1.7b_ibq1024" \
+experiment.output_dir="./experiments/ursa_1.7b_ibq1024" \
+pipeline.paths.pretrained_path="/path/to/URSA-1.7B-IBQ1024" \
+train_dataloader.params.dataset="./datasets/ibq1024_dataset" \
+model.gradient_checkpointing=3 \
+training.batch_size=4 \
+trainin.gradient_accumulation_steps=16
+```
+
+## Train T2V model
+Following snippet provides simple T2V training arguments:
+
+```bash
+accelerate launch --config_file accelerate_configs/deepspeed_zero2.yaml \
+--machine_rank 0 --num_machines 1 --num_processes 8 \
+scripts/train.py \
+config="./configs/ursa_1.7b_fsq320.yaml" \
+experiment.name="ursa_1.7b_fsq320" \
+experiment.output_dir="./experiments/ursa_1.7b_fsq320" \
+pipeline.paths.pretrained_path="/path/to/URSA-1.7B-FSQ320" \
+train_dataloader.params.dataset="./datasets/fsq320_dataset" \
+model.gradient_checkpointing=3 \
+training.batch_size=1 \
+trainin.gradient_accumulation_steps=32
+```
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1200/aux.pt.metadata b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1200/aux.pt.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..d5f8fa82a6c851ffd34448798f085b212760d9d6
--- /dev/null
+++ b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1200/aux.pt.metadata
@@ -0,0 +1,8 @@
+1774261125.9293694
+3964422045
+0
+36eeed32a8389018e8d5a54591f3047be9092cb6eceae6495632f88117876cdf
+lfs
+
+1
+1
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1200/train_state.pt.lock b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1200/train_state.pt.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1400/aux.pt.lock b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1400/aux.pt.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1400/aux.pt.metadata b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1400/aux.pt.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..26d19bcf35896781e9a8030902c1f20f6e9c9432
--- /dev/null
+++ b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1400/aux.pt.metadata
@@ -0,0 +1,8 @@
+1774261125.9389641
+3964422045
+0
+a59e96b1649554869107941192e26550e747bf53f174f5a6b03f51aef311c4fc
+lfs
+
+1
+1
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1400/student.pt.lock b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1400/student.pt.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1400/student.pt.metadata b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1400/student.pt.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..da9b2bcf400199d26e2e7c200ac254038505677b
--- /dev/null
+++ b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1400/student.pt.metadata
@@ -0,0 +1,8 @@
+1774261125.9396026
+3964442641
+0
+63ccf5f5bbf0f6baa90dd5e6be2afcf0490714f901a4616cf2a29aee70f218ef
+lfs
+
+1
+1
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1400/train_state.pt.lock b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1400/train_state.pt.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1400/train_state.pt.metadata b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1400/train_state.pt.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..b0a30794b759e809ec9b74e8d68a887bd407d369
--- /dev/null
+++ b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1400/train_state.pt.metadata
@@ -0,0 +1,8 @@
+1774261125.9320042
+1349
+0
+6ed2bc0f12f0568f035d4458a2195a6377ad4908fa99d3791d15c81666e100b3
+lfs
+
+1
+1
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1500/aux.pt.lock b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1500/aux.pt.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1500/aux.pt.metadata b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1500/aux.pt.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..2b69d3d45352ada858372f7162d196fa410ae16e
--- /dev/null
+++ b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1500/aux.pt.metadata
@@ -0,0 +1,8 @@
+1774261125.933277
+3964422045
+0
+b956f1de8131b3fc249b660a139a1b8fbbd2fc8f7a772d8061de3ceaa1c0b42e
+lfs
+
+1
+1
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1500/student.pt.lock b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1500/student.pt.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1500/student.pt.metadata b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1500/student.pt.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..1166feb8adaaeefbae948aa65d5fb096da6f9759
--- /dev/null
+++ b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1500/student.pt.metadata
@@ -0,0 +1,8 @@
+1774261125.9344034
+3964442641
+0
+4f53308b9e1f677fab0c9da11ea0d2c20860e2049edee17529457a4d5e24ef95
+lfs
+
+1
+1
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1500/train_state.pt.lock b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1500/train_state.pt.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1500/train_state.pt.metadata b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1500/train_state.pt.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..4ede471d854523712c4e9407f683ec42052fedc4
--- /dev/null
+++ b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1500/train_state.pt.metadata
@@ -0,0 +1,8 @@
+1774261125.9257114
+1349
+0
+2a7cc98de1f6a7d1a8c601c684c502668bcddcf07619c70067fceca6cf79eb0b
+lfs
+
+1
+1
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1600/aux.pt.lock b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1600/aux.pt.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1600/aux.pt.metadata b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1600/aux.pt.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..56bd74a0557e068a2ca86902a60e988811d003e2
--- /dev/null
+++ b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1600/aux.pt.metadata
@@ -0,0 +1,8 @@
+1774261125.931345
+3964422045
+0
+368cd8f863d04f161ab6d61c01d21a6bf81f25fcc920f6f9a16005a2db5ff063
+lfs
+
+1
+1
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1600/student.pt.lock b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1600/student.pt.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1600/student.pt.metadata b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1600/student.pt.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..83baf2b50125e4bbd7c09b894e6e11aa3a1096bb
--- /dev/null
+++ b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1600/student.pt.metadata
@@ -0,0 +1,8 @@
+1774261125.932644
+3964442641
+0
+f85a3ef82ebc7f8e4b5d48441e221f34f9092bca2adbe8e4522fbb19e55cdd04
+lfs
+
+1
+1
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1600/train_state.pt.lock b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1600/train_state.pt.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1600/train_state.pt.metadata b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1600/train_state.pt.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..1a5ae32f911f4f133ca99a7017df74acac19d2a8
--- /dev/null
+++ b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-1600/train_state.pt.metadata
@@ -0,0 +1,8 @@
+1774261125.9235532
+1349
+0
+6a34e5f00f80d681425cd89b54770d20efa8417d0fca2f3b823b9a4b38ee3553
+lfs
+
+1
+1
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-200/aux.pt.lock b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-200/aux.pt.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-200/aux.pt.metadata b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-200/aux.pt.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..848085abc1f16515ff329d4bb59ad964cda947a2
--- /dev/null
+++ b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-200/aux.pt.metadata
@@ -0,0 +1,8 @@
+1774258123.6864238
+3964422045
+0
+b4a0b60b5a1a284199af880978f12f9569baa83bdcaa15839dc08f37b77f2c0e
+lfs
+
+1
+1
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-200/student.pt.lock b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-200/student.pt.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-200/student.pt.metadata b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-200/student.pt.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..c71fbd54789a8c483efc43194cfdff71f10899eb
--- /dev/null
+++ b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-200/student.pt.metadata
@@ -0,0 +1,8 @@
+1774258123.685267
+3964442641
+0
+67657996ccc7320db642d883c49e6ea017f0cbc9cb3c9db3329d6a6c93dcf9fc
+lfs
+
+1
+1
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-200/train_state.pt.lock b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-200/train_state.pt.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-200/train_state.pt.metadata b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-200/train_state.pt.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..8d02af631c8ebf887deb27beedb72ad7efba9228
--- /dev/null
+++ b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-200/train_state.pt.metadata
@@ -0,0 +1,8 @@
+1774261125.9357553
+1349
+0
+8132acfb73c78c589dcc7b116397fa7ab5bcb11013e092952df1988e389cd61d
+lfs
+
+1
+1
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-400/aux.pt.lock b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-400/aux.pt.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-400/aux.pt.metadata b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-400/aux.pt.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..718144c42b6bcbc5ef66dc7a9e2aaf9af78a0bc1
--- /dev/null
+++ b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-400/aux.pt.metadata
@@ -0,0 +1,8 @@
+1774261125.93767
+3964422045
+0
+dc94d0b1d3a56d724673bc814755538f8d7fa55818725b0e91623c517f12e2e4
+lfs
+
+1
+1
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-400/student.pt.lock b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-400/student.pt.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-400/student.pt.metadata b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-400/student.pt.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..f1e80d754f306d57d599ce48a7039588c625fe80
--- /dev/null
+++ b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-400/student.pt.metadata
@@ -0,0 +1,8 @@
+1774261125.9383287
+3964442641
+0
+e783b6b0a782f69a8cd25f53e6a507017b64afe10670251e966c964da781896c
+lfs
+
+1
+1
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-400/train_state.pt.lock b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-400/train_state.pt.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-400/train_state.pt.metadata b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-400/train_state.pt.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..acd3030724b8cf3cec51990e0b44405c77604488
--- /dev/null
+++ b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-400/train_state.pt.metadata
@@ -0,0 +1,8 @@
+1774261125.9307017
+1349
+0
+e6c5ab63997ff822a62537bb7cd0bf52c0f201a97aacbd281b577dff9839b88f
+lfs
+
+1
+1
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-900/aux.pt.lock b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-900/aux.pt.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-900/aux.pt.metadata b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-900/aux.pt.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..e7ebb33ff7d95a6ee4f72686bb43d2d0a6c71bb6
--- /dev/null
+++ b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-900/aux.pt.metadata
@@ -0,0 +1,8 @@
+1774261125.9364064
+3964422045
+0
+3ec9670bd2c20f6a91526be23b68074479cfea1793d7016b48c008c4ee327e23
+lfs
+
+1
+1
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-900/student.pt.lock b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-900/student.pt.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-900/student.pt.metadata b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-900/student.pt.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..95999518931ddddf311a547f71a534b56c98ec00
--- /dev/null
+++ b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-900/student.pt.metadata
@@ -0,0 +1,8 @@
+1774261125.9370458
+3964442641
+0
+16c3d52e06715ffbcc46340770d81fed75952287b9b5e60d30611181c0861260
+lfs
+
+1
+1
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-900/train_state.pt.lock b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-900/train_state.pt.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-900/train_state.pt.metadata b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-900/train_state.pt.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..deb0286bd3e5c4b4e668295e2cd104f132312cca
--- /dev/null
+++ b/URSA/experiments/distill_dimo_v3/checkpoints/checkpoint-900/train_state.pt.metadata
@@ -0,0 +1,8 @@
+1774261125.9287274
+1349
+0
+0339825bd8a84fd0a48ecbdb236b586782be24655b87b0ac892690b2f4145652
+lfs
+
+1
+1
diff --git a/URSA/her/ursa.jpg b/URSA/her/ursa.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d548214a59b1515136cc6b5b9d782de0f5622134
Binary files /dev/null and b/URSA/her/ursa.jpg differ
diff --git a/URSA/inference.py b/URSA/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b22df1a458a95194822b65ae1114ee836258d75
--- /dev/null
+++ b/URSA/inference.py
@@ -0,0 +1,71 @@
+import os, torch, numpy
+from diffnext.pipelines import URSAPipeline
+from diffnext.utils import export_to_video
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+
+
+
+model_id, height, width = "BAAI/URSA-1.7B-FSQ320", 320, 512
+model_args = {"torch_dtype": torch.bfloat16, "trust_remote_code": True}
+pipe = URSAPipeline.from_pretrained(model_id, **model_args)
+pipe = pipe.to(torch.device("cuda"))
+
+text_prompt = "tom and jerry"#"a lone grizzly bear walks through a misty forest at dawn, sunlight catching its fur."
+negative_prompt = "worst quality, low quality, inconsistent motion, static, still, blurry, jittery, distorted, ugly"
+
+import time
+
+t1 = time.time()
+
+# Text-to-Image
+prompt = text_prompt
+num_frames, num_inference_steps = 1, 25
+image = pipe(**locals()).frames[0]
+image.save("tom/ursa.jpg")
+
+t2 = time.time()
+
+# Image-to-Video
+prompt = f"motion=9.0, {text_prompt}"
+num_frames, num_inference_steps = 49, 50
+video = pipe(**locals()).frames[0]
+export_to_video(video, "tom/ursa_1+48f.mp4", fps=12)
+
+t3 = time.time()
+
+# Text-to-Video
+image, video = None, None
+prompt = f"motion=9.0, {text_prompt}"
+num_frames, num_inference_steps = 49, 50
+video = pipe(**locals()).frames[0]
+export_to_video(video, "tom/ursa_49f.mp4", fps=12)
+
+t4 = time.time()
+
+# Video-to-Video
+prompt = f"motion=5.0, {text_prompt}"
+num_frames, num_inference_steps = 49, 50
+num_cond_frames, cond_noise_scale = 13, 0.1
+for i in range(12):
+    video, start_video = video[-num_cond_frames:], video
+    video = pipe(**locals()).frames[0]
+    video = numpy.concatenate([start_video, video[num_cond_frames:]])
+    export_to_video(video, "tom/ursa_{}f.mp4".format(video.shape[0]), fps=12)
+    
+t5 = time.time()
+
+print(f"Text-to-Image time: {t2-t1:.2f} seconds")
+print(f"Image-to-Video time: {t3-t2:.2f} seconds")
+print(f"Text-to-Video time: {t4-t3:.2f} seconds")
+print(f"Video-to-Video time: {t5-t4:.2f} seconds")
+# Single H800 GPU, batch_size=1, the inference time is:
+# Text-to-Image time: 5.05 seconds
+# Image-to-Video time: 101.92 seconds
+# Text-to-Video time: 101.52 seconds
+# Video-to-Video time: 1226.25 seconds
+
+
+# cd URSA/
+# source .venv_ursa/bin/activate
+
+# accelerate launch --config_file accelerate_configs/deepspeed_zero2.yaml     --machine_rank 0 --num_machines 1 --num_processes 8     scripts/train_distill_dimo.py     config="./configs/distill_dimo.yaml"     experiment.output_dir="./experiments/distill_dimo_v3"     distill.teacher_ckpt="/gfs/space/private/fengzl/World_Model/URSA-1.7B"     distill.prompt_source="/gfs/space/private/fengzl/World_Model/Koala-36M-v1"
\ No newline at end of file
diff --git a/URSA/outputs/dimo_test/final/aux.pt b/URSA/outputs/dimo_test/final/aux.pt
new file mode 100644
index 0000000000000000000000000000000000000000..70276ab9b01bc7874f156f8d753b24e1dad79a27
--- /dev/null
+++ b/URSA/outputs/dimo_test/final/aux.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6e69d590c2cd35d175d7ed2b1c8f9422dc98026375d7a55e34275793de91eac
+size 3964449117
diff --git a/URSA/outputs/dimo_test/final/student.pt b/URSA/outputs/dimo_test/final/student.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1bc8155da7079d1296fa6545026b080765f71e01
--- /dev/null
+++ b/URSA/outputs/dimo_test/final/student.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb041db11c780b72faf29f203473db12cc8ceb3ff5f6e7756b539a12376a6893
+size 3964469713
diff --git a/URSA/outputs/dimo_test/step_000500/aux.pt b/URSA/outputs/dimo_test/step_000500/aux.pt
new file mode 100644
index 0000000000000000000000000000000000000000..70276ab9b01bc7874f156f8d753b24e1dad79a27
--- /dev/null
+++ b/URSA/outputs/dimo_test/step_000500/aux.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6e69d590c2cd35d175d7ed2b1c8f9422dc98026375d7a55e34275793de91eac
+size 3964449117
diff --git a/URSA/outputs/dimo_test/step_000500/student.pt b/URSA/outputs/dimo_test/step_000500/student.pt
new file mode 100644
index 0000000000000000000000000000000000000000..01c0750403521eee2ce751bf9f9484572439df58
--- /dev/null
+++ b/URSA/outputs/dimo_test/step_000500/student.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ae0c4ea5b14175453b8cbfdae2ab79cd0ce9fa0cad09c8a4ac876ce9a36045a
+size 3964469713
diff --git a/URSA/outputs/dimo_test/step_001000/aux.pt b/URSA/outputs/dimo_test/step_001000/aux.pt
new file mode 100644
index 0000000000000000000000000000000000000000..70276ab9b01bc7874f156f8d753b24e1dad79a27
--- /dev/null
+++ b/URSA/outputs/dimo_test/step_001000/aux.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6e69d590c2cd35d175d7ed2b1c8f9422dc98026375d7a55e34275793de91eac
+size 3964449117
diff --git a/URSA/outputs/dimo_test/step_001000/student.pt b/URSA/outputs/dimo_test/step_001000/student.pt
new file mode 100644
index 0000000000000000000000000000000000000000..183c6a7464bb372fea3b42e70d47a68c5eea2888
--- /dev/null
+++ b/URSA/outputs/dimo_test/step_001000/student.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a4ee4ed87fcdc290fa38799391e5801829937ad41a4ece8025f9af4b5886602
+size 3964469713
diff --git a/URSA/outputs/dimo_test/step_001500/aux.pt b/URSA/outputs/dimo_test/step_001500/aux.pt
new file mode 100644
index 0000000000000000000000000000000000000000..70276ab9b01bc7874f156f8d753b24e1dad79a27
--- /dev/null
+++ b/URSA/outputs/dimo_test/step_001500/aux.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6e69d590c2cd35d175d7ed2b1c8f9422dc98026375d7a55e34275793de91eac
+size 3964449117
diff --git a/URSA/outputs/dimo_test/step_001500/student.pt b/URSA/outputs/dimo_test/step_001500/student.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3ffed6498f624ccb5f44eb16f07d527aa1cf51f7
--- /dev/null
+++ b/URSA/outputs/dimo_test/step_001500/student.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9a318f30349c3e7382efb38f88f73f00dffcc0b014b52114837b58960625dae
+size 3964469713
diff --git a/URSA/outputs/dimo_test/step_002000/aux.pt b/URSA/outputs/dimo_test/step_002000/aux.pt
new file mode 100644
index 0000000000000000000000000000000000000000..70276ab9b01bc7874f156f8d753b24e1dad79a27
--- /dev/null
+++ b/URSA/outputs/dimo_test/step_002000/aux.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6e69d590c2cd35d175d7ed2b1c8f9422dc98026375d7a55e34275793de91eac
+size 3964449117
diff --git a/URSA/outputs/dimo_test/step_002000/student.pt b/URSA/outputs/dimo_test/step_002000/student.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1bc8155da7079d1296fa6545026b080765f71e01
--- /dev/null
+++ b/URSA/outputs/dimo_test/step_002000/student.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb041db11c780b72faf29f203473db12cc8ceb3ff5f6e7756b539a12376a6893
+size 3964469713
diff --git a/URSA/outputs/eval_distill_49frames/00_s0_a_lone_grizzly_bear_walks_through_a_mist_student_1step_cfg.mp4 b/URSA/outputs/eval_distill_49frames/00_s0_a_lone_grizzly_bear_walks_through_a_mist_student_1step_cfg.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..e918fa2a1d9960b17004548fa4e409b7fb2b4c3d
--- /dev/null
+++ b/URSA/outputs/eval_distill_49frames/00_s0_a_lone_grizzly_bear_walks_through_a_mist_student_1step_cfg.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15767e598a5eee485910d993cff5af9c60f46884d9c8c67cea87eca1b311a53c
+size 169687
diff --git a/URSA/outputs/eval_distill_49frames/00_s0_a_lone_grizzly_bear_walks_through_a_mist_teacher_50step_cfg.mp4 b/URSA/outputs/eval_distill_49frames/00_s0_a_lone_grizzly_bear_walks_through_a_mist_teacher_50step_cfg.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..9694b57580a96e1c6c85a9405c73cacc49ba4adb
--- /dev/null
+++ b/URSA/outputs/eval_distill_49frames/00_s0_a_lone_grizzly_bear_walks_through_a_mist_teacher_50step_cfg.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2cc91910bf29fdead486a6718f46743fd844724afbc897071963e0c7c6a425c0
+size 231282
diff --git a/URSA/outputs/eval_distill_49frames/00_s1_a_lone_grizzly_bear_walks_through_a_mist_student_1step_cfg.mp4 b/URSA/outputs/eval_distill_49frames/00_s1_a_lone_grizzly_bear_walks_through_a_mist_student_1step_cfg.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..a55cb8f953b8f87e0aae4e4500977956cca2d2b7
--- /dev/null
+++ b/URSA/outputs/eval_distill_49frames/00_s1_a_lone_grizzly_bear_walks_through_a_mist_student_1step_cfg.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9cf7f31092e90a51932a449953a268dd568d7f9c1f47b55e82888dc960adf2f
+size 110969
diff --git a/URSA/outputs/eval_distill_49frames/00_s1_a_lone_grizzly_bear_walks_through_a_mist_teacher_50step_cfg.mp4 b/URSA/outputs/eval_distill_49frames/00_s1_a_lone_grizzly_bear_walks_through_a_mist_teacher_50step_cfg.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..5dad417aae315089aab38937d170d867725fc5fe
--- /dev/null
+++ b/URSA/outputs/eval_distill_49frames/00_s1_a_lone_grizzly_bear_walks_through_a_mist_teacher_50step_cfg.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:483e08b0fd6521c702fa9a7e10a50867ee64cb9c1368391a1e6ad888e7abcb4b
+size 210380
diff --git a/URSA/outputs/eval_distill_49frames/00_s2_a_lone_grizzly_bear_walks_through_a_mist_student_1step_cfg.mp4 b/URSA/outputs/eval_distill_49frames/00_s2_a_lone_grizzly_bear_walks_through_a_mist_student_1step_cfg.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..d9af9515f05359d514ba43e187b22ad7bc2526b8
--- /dev/null
+++ b/URSA/outputs/eval_distill_49frames/00_s2_a_lone_grizzly_bear_walks_through_a_mist_student_1step_cfg.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99e6152edf1e8a47d14bceb8f5cbdee5cddf8aa37e487a99f9eb6711c9ddd411
+size 151748
diff --git a/URSA/outputs/eval_distill_v3_100steps_49frames/01_s2_beautiful_fireworks_in_the_sky_with_red__teacher_50step_cfg.mp4 b/URSA/outputs/eval_distill_v3_100steps_49frames/01_s2_beautiful_fireworks_in_the_sky_with_red__teacher_50step_cfg.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..d4cad01712d3640e24c2f4bf3ffa4b2857050b3b
--- /dev/null
+++ b/URSA/outputs/eval_distill_v3_100steps_49frames/01_s2_beautiful_fireworks_in_the_sky_with_red__teacher_50step_cfg.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9562794c68e06f05bbb7ee63745f854784177635d3f0379f059adee88f159537
+size 358381
diff --git a/URSA/outputs/eval_distill_v3_100steps_49frames/01_s3_beautiful_fireworks_in_the_sky_with_red__teacher_50step_cfg.mp4 b/URSA/outputs/eval_distill_v3_100steps_49frames/01_s3_beautiful_fireworks_in_the_sky_with_red__teacher_50step_cfg.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..3d7a5e0c85c1c3521fa3217b5ca0e6e90e78587e
--- /dev/null
+++ b/URSA/outputs/eval_distill_v3_100steps_49frames/01_s3_beautiful_fireworks_in_the_sky_with_red__teacher_50step_cfg.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c662c5cf1f9063d54feb4acc2be51102520d436434e22810cc3d23fab15d3f9
+size 399397
diff --git a/URSA/outputs/eval_distill_v3_100steps_49frames/03_s3_a_hummingbird_hovers_in_front_of_a_red_f_student_1step_cfg.mp4 b/URSA/outputs/eval_distill_v3_100steps_49frames/03_s3_a_hummingbird_hovers_in_front_of_a_red_f_student_1step_cfg.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..5daa4054a03ec7e347bd6334f839fb1414eb1985
--- /dev/null
+++ b/URSA/outputs/eval_distill_v3_100steps_49frames/03_s3_a_hummingbird_hovers_in_front_of_a_red_f_student_1step_cfg.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5eb5fb73828820c999669fa5f1e565790d3142a11e54355af8fa5ff208ca4ab
+size 135665
diff --git a/URSA/outputs/eval_distill_v3_200steps_49frames/00_s0_a_lone_grizzly_bear_walks_through_a_mist_student_1step_baked.mp4 b/URSA/outputs/eval_distill_v3_200steps_49frames/00_s0_a_lone_grizzly_bear_walks_through_a_mist_student_1step_baked.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..ad0d585bc5683264426677bca92f09d06e87d48c
--- /dev/null
+++ b/URSA/outputs/eval_distill_v3_200steps_49frames/00_s0_a_lone_grizzly_bear_walks_through_a_mist_student_1step_baked.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb961227806b60f9ea2a6103169a91448822a5ad0c6e727c674d8a41fac7e7d6
+size 241025
diff --git a/URSA/pyproject.toml b/URSA/pyproject.toml
new file mode 100644
index 0000000000000000000000000000000000000000..148cba98687334be5200bb39b9962ea43635787a
--- /dev/null
+++ b/URSA/pyproject.toml
@@ -0,0 +1,3 @@
+[tool.black]
+line-length = 100
+target-version = ['py310']
diff --git a/URSA/requirements.txt b/URSA/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aacccdd001153a183eb26343121e4731be86a34c
--- /dev/null
+++ b/URSA/requirements.txt
@@ -0,0 +1,10 @@
+torch
+diffusers
+transformers>=4.57.1
+accelerate
+imageio
+imageio-ffmpeg
+omegaconf
+wandb
+scipy
+codewithgpu
diff --git a/URSA/scripts/ab_test_inference.py b/URSA/scripts/ab_test_inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..f9c392e118c8525ad00fcdee0fc0985ddfc9e8aa
--- /dev/null
+++ b/URSA/scripts/ab_test_inference.py
@@ -0,0 +1,367 @@
+#!/usr/bin/env python3
+"""A/B test: official URSA inference vs eval_distill_dimo inference.
+
+This script runs the EXACT same pipeline call in two ways:
+  A) "official" — follows README Quick Start verbatim
+  B) "eval"    — follows eval_distill_dimo.py logic
+
+Both use the same pipeline instance, same prompt, same seed.
+Saves side-by-side outputs + prints every intermediate diagnostic.
+
+Usage:
+  python scripts/ab_test_inference.py \
+      --model /gfs/space/private/fengzl/World_Model/URSA-1.7B \
+      --device 0
+
+This will generate:
+  outputs/ab_test/official_t2i.jpg
+  outputs/ab_test/official_t2v.mp4
+  outputs/ab_test/eval_teacher_cfg.mp4
+  outputs/ab_test/eval_teacher_nocfg.mp4
+  outputs/ab_test/eval_student_*.mp4  (if --student_ckpt given)
+"""
+
+import argparse
+import os
+import sys
+
+import numpy as np
+import torch
+
+_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+if _REPO_ROOT not in sys.path:
+    sys.path.insert(0, _REPO_ROOT)
+
+from diffnext.pipelines import URSAPipeline
+from diffnext.utils import export_to_image, export_to_video
+
+
+def parse_args():
+    p = argparse.ArgumentParser(description="A/B test URSA inference")
+    p.add_argument("--model", required=True, help="URSA model path")
+    p.add_argument("--student_ckpt", default=None, help="Optional student.pt")
+    p.add_argument("--device", type=int, default=0)
+    p.add_argument("--precision", default="float16", choices=["float16", "bfloat16"])
+    p.add_argument("--out_dir", default="./outputs/ab_test")
+    # Test different resolutions — FSQ320 native is 320x512
+    p.add_argument("--test_resolutions", nargs="+", default=["320x512"],
+                   help="Resolutions to test as HxW strings (FSQ320 native: 320x512)")
+    p.add_argument("--test_steps", nargs="+", type=int, default=[25, 50],
+                   help="Inference steps to test")
+    p.add_argument("--num_frames", type=int, default=49)
+    return p.parse_args()
+
+
+def diag(label, obj):
+    """Print diagnostic."""
+    print(f"  [{label}] {obj}")
+
+
+def diag_tensor(label, t):
+    """Print tensor diagnostics."""
+    if isinstance(t, torch.Tensor):
+        print(f"  [{label}] shape={t.shape} dtype={t.dtype} device={t.device} "
+              f"min={t.min().item():.4f} max={t.max().item():.4f} mean={t.mean().item():.4f}")
+    elif isinstance(t, np.ndarray):
+        print(f"  [{label}] shape={t.shape} dtype={t.dtype} "
+              f"min={t.min()} max={t.max()} mean={t.mean():.2f}")
+
+
+def diag_pipeline(pipe):
+    """Full pipeline diagnostic."""
+    print("\n" + "=" * 70)
+    print("  PIPELINE DIAGNOSTICS")
+    print("=" * 70)
+    print(f"  pipeline class      : {type(pipe).__name__}")
+    print(f"  transformer class   : {type(pipe.transformer).__name__}")
+    print(f"  transformer device  : {next(pipe.transformer.parameters()).device}")
+    print(f"  transformer dtype   : {next(pipe.transformer.parameters()).dtype}")
+    print(f"  vae class           : {type(pipe.vae).__name__}")
+    print(f"  vae device          : {next(pipe.vae.parameters()).device}")
+    print(f"  scheduler class     : {type(pipe.scheduler).__name__}")
+    print(f"  scheduler repr      : {repr(pipe.scheduler)}")
+
+    sched = pipe.scheduler
+    if hasattr(sched, 'path') and sched.path is not None:
+        print(f"  scheduler.path class: {type(sched.path).__name__}")
+        if hasattr(sched.path, 'emb'):
+            emb = sched.path.emb
+            print(f"  path.emb shape      : {emb.shape}")
+            print(f"  path.emb device     : {emb.device}")
+            print(f"  path.emb dtype      : {emb.dtype}")
+            print(f"  path.emb[0,:5]      : {emb[0,:5].tolist()}")
+        if hasattr(sched.path, 'alpha'):
+            print(f"  path.alpha          : {getattr(sched.path, 'alpha', 'N/A')}")
+        if hasattr(sched.path, 'c'):
+            print(f"  path.c              : {getattr(sched.path, 'c', 'N/A')}")
+    else:
+        print(f"  scheduler.path      : MISSING or None!")
+
+    print(f"  codebook_size       : {getattr(sched, 'codebook_size', 'N/A')}")
+    print(f"  shift               : {getattr(sched, 'shift', 'N/A')}")
+
+    if hasattr(sched, 'config'):
+        print(f"  scheduler.config    : {dict(sched.config)}")
+
+    print(f"  vae_temporal_stride : {getattr(pipe, 'vae_temporal_stride', 'N/A')}")
+    print(f"  vae_spatial_stride  : {getattr(pipe, 'vae_spatial_stride', 'N/A')}")
+    print(f"  tokenizer class     : {type(pipe.tokenizer).__name__}")
+    print("=" * 70 + "\n")
+
+
+def diag_output(frames_output, label):
+    """Diagnose pipeline output."""
+    print(f"\n  --- Output diagnostics: {label} ---")
+    if isinstance(frames_output, np.ndarray):
+        diag_tensor(f"{label} raw", frames_output)
+    elif isinstance(frames_output, list):
+        print(f"  [{label}] list of {len(frames_output)} items")
+        if len(frames_output) > 0:
+            f0 = frames_output[0]
+            if isinstance(f0, np.ndarray):
+                diag_tensor(f"{label}[0]", f0)
+            else:
+                print(f"  [{label}[0]] type={type(f0)}")
+    else:
+        print(f"  [{label}] type={type(frames_output)}")
+
+
+def save_frames(frames, path, fps=12):
+    """Save frames as video or image."""
+    os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
+    if path.endswith(".mp4"):
+        if isinstance(frames, np.ndarray) and frames.ndim == 4:
+            export_to_video(list(frames), output_video_path=path, fps=fps)
+        elif isinstance(frames, list):
+            export_to_video(frames, output_video_path=path, fps=fps)
+        else:
+            export_to_video(frames, output_video_path=path, fps=fps)
+    elif path.endswith((".jpg", ".png")):
+        from PIL import Image
+        if isinstance(frames, np.ndarray):
+            Image.fromarray(frames).save(path)
+        elif hasattr(frames, 'save'):
+            frames.save(path)
+
+
+def main():
+    args = parse_args()
+    os.makedirs(args.out_dir, exist_ok=True)
+
+    dtype = getattr(torch, args.precision)
+    device = torch.device("cuda", args.device) if torch.cuda.is_available() else torch.device("cpu")
+
+    prompt = "a lone grizzly bear walks through a misty forest at dawn, sunlight catching its fur."
+    negative_prompt = "worst quality, low quality, inconsistent motion, static, still, blurry, jittery, distorted, ugly"
+    seed = 42
+
+    # =====================================================================
+    # Load pipeline
+    # =====================================================================
+    print(f"\n[1] Loading pipeline from {args.model} ...")
+    pipe = URSAPipeline.from_pretrained(
+        args.model, torch_dtype=dtype, trust_remote_code=True
+    ).to(device)
+
+    diag_pipeline(pipe)
+
+    # =====================================================================
+    # Test A: Official README T2V (exact copy from README for FSQ320)
+    # FSQ320: height=320, width=512, num_frames=49, steps=50
+    # =====================================================================
+    print("\n" + "#" * 70)
+    print("# TEST A: Official README T2V (FSQ320 native resolution)")
+    print("#" * 70)
+
+    gen = torch.Generator(device=device).manual_seed(seed)
+    out = pipe(
+        prompt=f"motion=9.0, {prompt}",
+        negative_prompt=negative_prompt,
+        height=320,
+        width=512,
+        num_frames=49,
+        num_inference_steps=50,
+        guidance_scale=7,
+        generator=gen,
+        output_type="np",
+    )
+    frames = out.frames
+    diag_output(frames, "A_official_t2v")
+    if isinstance(frames, np.ndarray):
+        video_frames = frames[0] if frames.ndim == 5 else frames
+    else:
+        video_frames = frames
+    path_a = os.path.join(args.out_dir, "A_official_t2v_320x512_49f_50step.mp4")
+    try:
+        if isinstance(video_frames, np.ndarray):
+            export_to_video(list(video_frames), output_video_path=path_a, fps=12)
+        else:
+            export_to_video(video_frames, output_video_path=path_a, fps=12)
+        print(f"  Saved: {path_a}")
+    except Exception as e:
+        print(f"  Failed: {e}")
+
+    # Also test T2I at native resolution (1 frame)
+    print("\n# TEST A2: T2I at 320x512 (1 frame)")
+    gen = torch.Generator(device=device).manual_seed(seed)
+    out = pipe(
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        height=320,
+        width=512,
+        num_frames=1,
+        num_inference_steps=25,
+        guidance_scale=7,
+        generator=gen,
+    )
+    image = out.frames[0]
+    path_a2 = os.path.join(args.out_dir, "A_official_t2i_320x512.jpg")
+    if hasattr(image, 'save'):
+        image.save(path_a2)
+        print(f"  Saved: {path_a2} (PIL Image)")
+    else:
+        diag_output(out.frames, "A2_t2i")
+
+    # =====================================================================
+    # Test B: Different resolutions and step counts for video
+    # =====================================================================
+    for res_str in args.test_resolutions:
+        h, w = map(int, res_str.split("x"))
+        for steps in args.test_steps:
+            for gs_label, gs_val in [("nocfg", 1.0), ("cfg7", 7.0)]:
+                label = f"B_{h}x{w}_{steps}step_{gs_label}"
+                print(f"\n{'#' * 70}")
+                print(f"# TEST {label}")
+                print(f"#   height={h} width={w} num_frames={args.num_frames}")
+                print(f"#   steps={steps} guidance_scale={gs_val}")
+                print(f"{'#' * 70}")
+
+                gen = torch.Generator(device=device).manual_seed(seed)
+                neg = negative_prompt if gs_val > 1 else None
+
+                # Print scheduler state before call
+                print(f"  scheduler.codebook_size = {pipe.scheduler.codebook_size}")
+                print(f"  scheduler.path type = {type(pipe.scheduler.path).__name__}")
+
+                out = pipe(
+                    prompt=prompt,
+                    negative_prompt=neg,
+                    height=h,
+                    width=w,
+                    num_frames=args.num_frames,
+                    num_inference_steps=steps,
+                    guidance_scale=gs_val,
+                    guidance_trunc=0.9,
+                    max_prompt_length=320,
+                    vae_batch_size=1,
+                    output_type="np",
+                    generator=gen,
+                )
+
+                frames = out.frames
+                diag_output(frames, label)
+
+                # For video output (num_frames > 1), frames is [batch, T, H, W, 3]
+                if isinstance(frames, np.ndarray):
+                    if frames.ndim == 5:
+                        video_frames = frames[0]  # [T, H, W, 3]
+                    elif frames.ndim == 4:
+                        video_frames = frames  # [T, H, W, 3]
+                    else:
+                        video_frames = frames
+                elif isinstance(frames, list):
+                    video_frames = frames
+                else:
+                    video_frames = frames
+
+                path = os.path.join(args.out_dir, f"{label}.mp4")
+                try:
+                    if isinstance(video_frames, np.ndarray):
+                        export_to_video(list(video_frames), output_video_path=path, fps=12)
+                    else:
+                        export_to_video(video_frames, output_video_path=path, fps=12)
+                    print(f"  Saved: {path}")
+                except Exception as e:
+                    print(f"  Failed to save {path}: {e}")
+
+    # =====================================================================
+    # Test C: Student (if provided)
+    # =====================================================================
+    if args.student_ckpt:
+        print(f"\n{'#' * 70}")
+        print(f"# TEST C: Student 1-step")
+        print(f"{'#' * 70}")
+
+        teacher_state = {k: v.clone() for k, v in pipe.transformer.state_dict().items()}
+        student_state = torch.load(args.student_ckpt, map_location=device, weights_only=True)
+
+        print(f"  student keys: {len(student_state)}")
+        print(f"  teacher keys: {len(teacher_state)}")
+
+        # Check key compatibility
+        missing = set(teacher_state.keys()) - set(student_state.keys())
+        extra = set(student_state.keys()) - set(teacher_state.keys())
+        if missing:
+            print(f"  WARNING: {len(missing)} keys in teacher but not student: {list(missing)[:5]}")
+        if extra:
+            print(f"  WARNING: {len(extra)} keys in student but not teacher: {list(extra)[:5]}")
+
+        pipe.transformer.load_state_dict(student_state, strict=True)
+        pipe.transformer.eval()
+
+        for res_str in args.test_resolutions[:1]:  # Just first resolution
+            h, w = map(int, res_str.split("x"))
+            for gs_label, gs_val in [("nocfg", 1.0), ("cfg7", 7.0)]:
+                label = f"C_student_{h}x{w}_1step_{gs_label}"
+                gen = torch.Generator(device=device).manual_seed(seed)
+                neg = negative_prompt if gs_val > 1 else None
+
+                out = pipe(
+                    prompt=prompt,
+                    negative_prompt=neg,
+                    height=h,
+                    width=w,
+                    num_frames=args.num_frames,
+                    num_inference_steps=1,
+                    guidance_scale=gs_val,
+                    guidance_trunc=0.9,
+                    max_prompt_length=320,
+                    vae_batch_size=1,
+                    output_type="np",
+                    generator=gen,
+                )
+
+                frames = out.frames
+                diag_output(frames, label)
+
+                if isinstance(frames, np.ndarray):
+                    video_frames = frames[0] if frames.ndim == 5 else frames
+                else:
+                    video_frames = frames
+
+                path = os.path.join(args.out_dir, f"{label}.mp4")
+                try:
+                    if isinstance(video_frames, np.ndarray):
+                        export_to_video(list(video_frames), output_video_path=path, fps=12)
+                    else:
+                        export_to_video(video_frames, output_video_path=path, fps=12)
+                    print(f"  Saved: {path}")
+                except Exception as e:
+                    print(f"  Failed to save {path}: {e}")
+
+        # Restore teacher
+        pipe.transformer.load_state_dict(teacher_state, strict=True)
+
+    print(f"\n[DONE] All outputs in {args.out_dir}")
+    print("\nCheck these files to diagnose blurriness:")
+    print("  - A_official_t2i_1024x1024.jpg  → should be sharp (official T2I)")
+    print("  - B_*_cfg7.mp4                  → teacher video with CFG")
+    print("  - B_*_nocfg.mp4                 → teacher video without CFG")
+    print("  - Compare different resolutions and step counts")
+    print("  - If ALL are blurry, the issue is in pipeline/scheduler/VAE loading")
+    print("  - If only low-res are blurry, it's a resolution issue")
+    print("  - If only low-step are blurry, need more steps")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/URSA/scripts/app_ursa_t2i.py b/URSA/scripts/app_ursa_t2i.py
new file mode 100644
index 0000000000000000000000000000000000000000..03b67657c823a3765dd61d52f248d964f8a63c5f
--- /dev/null
+++ b/URSA/scripts/app_ursa_t2i.py
@@ -0,0 +1,147 @@
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""URSA T2I application."""
+
+import argparse
+import os
+
+import gradio as gr
+import numpy as np
+import torch
+
+from diffnext.pipelines import URSAPipeline
+from diffnext.utils import export_to_image
+
+# Switch to the allocator optimized for dynamic shape.
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+
+
+def parse_args():
+    """Parse arguments."""
+    parser = argparse.ArgumentParser(description="Serve URSA T2I application")
+    parser.add_argument("--model", default="", help="model path")
+    parser.add_argument("--device", type=int, default=0, help="device index")
+    parser.add_argument("--precision", default="float16", help="compute precision")
+    return parser.parse_args()
+
+
+def generate_image(
+    prompt,
+    negative_prompt,
+    seed,
+    randomize_seed,
+    width,
+    height,
+    guidance_scale,
+    num_inference_steps,
+):
+    """Generate an image."""
+    args = locals()
+    seed = np.random.randint(2147483647) if randomize_seed else seed
+    device = getattr(pipe, "_offload_device", pipe.device)
+    generator = torch.Generator(device=device).manual_seed(seed)
+    images = pipe(generator=generator, **args).frames
+    return [export_to_image(image, quality=95) for image in images] + [seed]
+
+
+css = """#col-container {margin: 0 auto; max-width: 1366px}"""
+title = "Uniform Discrete Diffusion with Metric Path for Video Generation"
+header = (
+    "<div align='center'>"
+    "<h2>Uniform Discrete Diffusion with Metric Path for Video Generation</h2>"
+    "<h3><a href='https://arxiv.org/abs/2510.24717' target='_blank' rel='noopener'>[paper]</a>"
+    "<a href='https://github.com/baaivision/URSA' target='_blank' rel='noopener'>[code]</a></h3>"
+    "</div>"
+)
+
+examples = [
+    "a selfie of an old man with a white beard.",
+    "a woman with long hair next to a luminescent bird.",
+    "a digital artwork of a cat styled in a whimsical fashion. The overall vibe is quirky and artistic.",  # noqa
+    "a lone grizzly bear walks through a misty forest at dawn, sunlight catching its fur.",
+    "a beautiful afghan women by red hair and green eyes.",
+    "beautiful fireworks in the sky with red, white and blue.",
+    "A dragon perched majestically on a craggy, smoke-wreathed mountain.",
+    "A photo of llama wearing sunglasses standing on the deck of a spaceship with the Earth in the background.",  # noqa
+    "Two pandas in fluffy slippers and bathrobes, lazily munching on bamboo.",
+]
+
+
+if __name__ == "__main__":
+    args = parse_args()
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu", args.device)
+    model_args = {"torch_dtype": getattr(torch, args.precision.lower()), "trust_remote_code": True}
+    pipe = URSAPipeline.from_pretrained(args.model, **model_args).to(device)
+
+    # Main Application.
+    app = gr.Blocks(css=css, theme="origin").__enter__()
+    container = gr.Column(elem_id="col-container").__enter__()
+    _, main_row = gr.Markdown(header), gr.Row().__enter__()
+
+    # Input.
+    input_col = gr.Column().__enter__()
+    prompt = gr.Text(
+        label="Prompt",
+        placeholder="Describe the video you want to generate",
+        value="A lone grizzly bear walks through a misty forest at dawn, sunlight catching its fur.",  # noqa
+        lines=5,
+    )
+    negative_prompt = gr.Text(
+        label="Negative Prompt",
+        placeholder="Describe what you don't want in the image",
+        value="worst quality, low quality, inconsistent motion, static, still, blurry, jittery, distorted, ugly",  # noqa
+        lines=5,
+    )
+    # fmt: off
+    options = gr.Accordion("Options", open=False).__enter__()
+    seed = gr.Slider(label="Seed", maximum=2147483647, step=1, value=0)
+    randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+    guidance_scale = gr.Slider(label="Guidance scale", minimum=1, maximum=10, step=0.1, value=7)
+    with gr.Row():
+        width = gr.Slider(label="Width", minimum=256, maximum=1024, step=32, value=1024)
+        height = gr.Slider(label="Height", minimum=256, maximum=1024, step=32, value=1024)
+        num_inference_steps = gr.Slider(label="Inference steps", minimum=1, maximum=50, step=1, value=25)  # noqa
+    options.__exit__()
+    generate_btn = gr.Button("Generate Image", variant="primary", size="lg")
+    input_col.__exit__()
+    # fmt: on
+
+    # Results.
+    result = gr.Image(label="Result", height=720, show_label=False)
+    main_row.__exit__()
+
+    # Examples.
+    with gr.Row():
+        gr.Examples(examples=examples, inputs=[prompt])
+
+    # Events.
+    container.__exit__()
+    gr.on(
+        triggers=[generate_btn.click, prompt.submit, negative_prompt.submit],
+        fn=generate_image,
+        inputs=[
+            prompt,
+            negative_prompt,
+            seed,
+            randomize_seed,
+            width,
+            height,
+            guidance_scale,
+            num_inference_steps,
+        ],
+        outputs=[result, seed],
+    )
+    app.__exit__(), app.launch(share=False)
diff --git a/URSA/scripts/app_ursa_ti2v.py b/URSA/scripts/app_ursa_ti2v.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b22c26ba76890795934454b6466352db31c82c1
--- /dev/null
+++ b/URSA/scripts/app_ursa_ti2v.py
@@ -0,0 +1,204 @@
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""URSA TI2V application."""
+
+import argparse
+import os
+
+import gradio as gr
+import numpy as np
+import PIL.Image
+import torch
+
+from diffnext.pipelines import URSAPipeline
+from diffnext.utils import export_to_image, export_to_video
+
+# Fix tokenizer fork issue.
+os.environ["TOKENIZERS_PARALLELISM"] = "true"
+# Switch to the allocator optimized for dynamic shape.
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+
+
+def parse_args():
+    """Parse arguments."""
+    parser = argparse.ArgumentParser(description="Serve URSA TI2V application")
+    parser.add_argument("--model", default="", help="model path")
+    parser.add_argument("--device", type=int, default=0, help="device index")
+    parser.add_argument("--precision", default="float16", help="compute precision")
+    return parser.parse_args()
+
+
+def crop_image(image, target_h, target_w):
+    """Center crop image to target size."""
+    h, w = image.height, image.width
+    aspect_ratio_target, aspect_ratio = target_w / target_h, w / h
+    if aspect_ratio > aspect_ratio_target:
+        new_w = int(h * aspect_ratio_target)
+        x_start = (w - new_w) // 2
+        image = image.crop((x_start, 0, x_start + new_w, h))
+    else:
+        new_h = int(w / aspect_ratio_target)
+        y_start = (h - new_h) // 2
+        image = image.crop((0, y_start, w, y_start + new_h))
+    return np.array(image.resize((target_w, target_h), PIL.Image.Resampling.BILINEAR))
+
+
+def generate_image(
+    prompt,
+    negative_prompt,
+    seed,
+    randomize_seed,
+    guidance_scale,
+    num_inference_steps=25,
+):
+    """Generate a video."""
+    args = {**locals(), **video_presets["t2i"]}
+    seed = np.random.randint(2147483647) if randomize_seed else seed
+    device = getattr(pipe, "_offload_device", pipe.device)
+    generator = torch.Generator(device=device).manual_seed(seed)
+    images = pipe(generator=generator, **args).frames
+    return [export_to_image(image, quality=95) for image in images] + [seed]
+
+
+def generate_video(
+    prompt,
+    negative_prompt,
+    image,
+    motion_score,
+    seed,
+    randomize_seed,
+    guidance_scale,
+    num_inference_steps,
+    output_type="np",
+):
+    """Generate a video."""
+    args = {**locals(), **video_presets["ti2v"]}
+    args["prompt"] = f"motion={motion_score:.1f}, {prompt}"
+    args["image"] = crop_image(image, args["height"], args["width"]) if image else None
+    seed = np.random.randint(2147483647) if randomize_seed else seed
+    device = getattr(pipe, "_offload_device", pipe.device)
+    generator = torch.Generator(device=device).manual_seed(seed)
+    frames = pipe(generator=generator, **args).frames[0]
+    return export_to_video(frames, fps=12), seed
+
+
+css = """#col-container {margin: 0 auto; max-width: 1366px}"""
+title = "Uniform Discrete Diffusion with Metric Path for Video Generation"
+header = (
+    "<div align='center'>"
+    "<h2>Uniform Discrete Diffusion with Metric Path for Video Generation</h2>"
+    "<h3><a href='https://arxiv.org/abs/2510.24717' target='_blank' rel='noopener'>[paper]</a>"
+    "<a href='https://github.com/baaivision/URSA' target='_blank' rel='noopener'>[code]</a></h3>"
+    "</div>"
+)
+
+video_presets = {
+    "t2i": {"width": 512, "height": 320, "num_frames": 1},
+    "ti2v": {"width": 512, "height": 320, "num_frames": 49},
+}
+
+prompts = [
+    "a lone grizzly bear walks through a misty forest at dawn, sunlight catching its fur.",
+    "Many spotted jellyfish pulsating under water. Their bodies are transparent and glowing in deep ocean.",  # noqa
+    "An intense close-up of a soldier’s face, covered in dirt and sweat, his eyes filled with determination as he surveys the battlefield.",  # noqa
+    "a close-up shot of a woman standing in a dimly lit room. she is wearing a traditional chinese outfit, which includes a red and gold dress with intricate designs and a matching headpiece. the woman has her hair styled in an updo, adorned with a gold accessory. her makeup is done in a way that accentuates her features, with red lipstick and dark eyeshadow. she is looking directly at the camera with a neutral expression. the room has a rustic feel, with wooden beams and a stone wall visible in the background. the lighting in the room is soft and warm, creating a contrast with the woman's vibrant attire. there are no texts or other objects in the video. the style of the video is a portrait, focusing on the woman and her attire.",  # noqa
+    "The camera slowly rotates around a massive stack of vintage televisions that are placed within a large New York museum gallery. Each of the televisions is showing a different program. There are 1950s sci-fi movies with their distinctive visuals, horror movies with their creepy scenes, news broadcasts with moving images and words, static on some screens, and a 1970s sitcom with its characteristic look. The televisions are of various sizes and designs, some with rounded edges and others with more angular shapes. The gallery is well-lit, with light falling on the stack of televisions and highlighting the different programs being shown. There are no people visible in the immediate vicinity, only the stack of televisions and the surrounding gallery space.",  # noqa
+]
+motion_scores = [9, 9, 9, 9, 9]
+videos = ["", "", "", "", ""]
+examples = [list(x) for x in zip(prompts, motion_scores)]
+
+
+if __name__ == "__main__":
+    args = parse_args()
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu", args.device)
+    model_args = {"torch_dtype": getattr(torch, args.precision.lower()), "trust_remote_code": True}
+    pipe = URSAPipeline.from_pretrained(args.model, **model_args).to(device)
+
+    # Application.
+    app = gr.Blocks(css=css, theme="origin").__enter__()
+    container = gr.Column(elem_id="col-container").__enter__()
+    _, main_row = gr.Markdown(header), gr.Row().__enter__()
+
+    # Input.
+    input_col = gr.Column().__enter__()
+    prompt = gr.Text(
+        label="Prompt",
+        placeholder="Describe the video you want to generate",
+        value="A lone grizzly bear walks through a misty forest at dawn, sunlight catching its fur.",  # noqa
+        lines=5,
+    )
+    negative_prompt = gr.Text(
+        label="Negative Prompt",
+        placeholder="Describe what you don't want in the video",
+        value="worst quality, low quality, inconsistent motion, static, still, blurry, jittery, distorted, ugly",  # noqa
+        lines=1,
+    )
+    with gr.Row():
+        generate_image_btn = gr.Button("Generate Image Prompt", variant="primary", size="lg")
+        generate_video_btn = gr.Button("Generate Video", variant="primary", size="lg")
+    image_prompt = gr.Image(label="Image Prompt", height=480, type="pil")
+
+    # fmt: off
+    options = gr.Accordion("Options", open=False).__enter__()
+    seed = gr.Slider(label="Seed", maximum=2147483647, step=1, value=0)
+    randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+    guidance_scale = gr.Slider(label="Guidance scale", minimum=1, maximum=10.0, step=0.1, value=7.0)
+    with gr.Row():
+        num_inference_steps = gr.Slider(label="Inference steps", minimum=1, maximum=100, step=1, value=50)  # noqa
+    options.__exit__(), input_col.__exit__()
+
+    # Results.
+    result_col = gr.Column().__enter__()
+    motion = gr.Slider(label="Motion Score", minimum=1, maximum=10, step=1, value=9)
+    result = gr.Video(label="Result", height=480, show_label=False, autoplay=True)
+    result_col.__exit__(), main_row.__exit__()
+    # fmt: on
+
+    # Examples.
+    with gr.Row():
+        gr.Examples(examples=examples, inputs=[prompt, motion])
+
+    # Events.
+    container.__exit__()
+    gr.on(
+        triggers=[generate_image_btn.click, prompt.submit, negative_prompt.submit],
+        fn=generate_image,
+        inputs=[
+            prompt,
+            negative_prompt,
+            seed,
+            randomize_seed,
+            guidance_scale,
+        ],
+        outputs=[image_prompt, seed],
+    )
+    gr.on(
+        triggers=[generate_video_btn.click, prompt.submit, negative_prompt.submit],
+        fn=generate_video,
+        inputs=[
+            prompt,
+            negative_prompt,
+            image_prompt,
+            motion,
+            seed,
+            randomize_seed,
+            guidance_scale,
+            num_inference_steps,
+        ],
+        outputs=[result, seed],
+    )
+    app.__exit__(), app.launch(share=False)
diff --git a/URSA/scripts/eval_distill_dimo.py b/URSA/scripts/eval_distill_dimo.py
new file mode 100644
index 0000000000000000000000000000000000000000..62906de50450f4bdfb32097fe139597adf3cfb03
--- /dev/null
+++ b/URSA/scripts/eval_distill_dimo.py
@@ -0,0 +1,458 @@
+#!/usr/bin/env python3
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# -----------------------------------------------------------------------
+"""Evaluation script for distill_dimo checkpoints.
+
+Generates videos from both the student (1-step) and teacher (multi-step)
+using checkpoints saved by train_distill_dimo.py.
+
+Verified native inference regime (from A/B testing — ground truth):
+  height=320, width=512, num_frames=49, guidance_scale=7, teacher_steps=50.
+  no_cfg (guidance_scale=1) does NOT produce valid output for this URSA
+  checkpoint — outputs are blank or blurry.
+
+Student generation modes
+------------------------
+  cfg      : 1-step, guidance_scale=7   (2× forward, inference-time CFG)
+
+Teacher generation modes
+------------------------
+  cfg      : 50-step, guidance_scale=7  (official working regime)
+
+Usage:
+  python scripts/eval_distill_dimo.py \
+      --teacher_ckpt /gfs/space/private/fengzl/World_Model/URSA-1.7B \
+      --student_ckpt ./experiments/distill_dimo_v3/checkpoints/checkpoint-200/student.pt \
+      --out_dir ./outputs/eval_distill_v3_200steps_49frames
+"""
+
+import argparse
+import os
+import sys
+
+import numpy as np
+import torch
+
+_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+if _REPO_ROOT not in sys.path:
+    sys.path.insert(0, _REPO_ROOT)
+
+from diffnext.pipelines import URSAPipeline
+from diffnext.pipelines.ursa.pipeline_ursa_distill_dimo import (
+    VERIFIED_NATIVE_DEFAULTS,
+    check_verified_regime,
+)
+from diffnext.utils import export_to_video
+
+
+# ---------------------------------------------------------------------------
+# Default prompts and seeds
+# ---------------------------------------------------------------------------
+
+DEFAULT_PROMPTS = [
+    "a lone grizzly bear walks through a misty forest at dawn, sunlight catching its fur.",
+    "beautiful fireworks in the sky with red, white and blue.",
+    "a wave crashes on a rocky shoreline at sunset, slow motion.",
+    "a hummingbird hovers in front of a red flower, wings a blur.",
+    "timelapse of clouds rolling over mountain peaks.",
+    "a neon-lit city street at night with rain-soaked reflections.",
+    "a kitten playing with a ball of yarn on a wooden floor.",
+    "astronaut floating weightlessly inside a space station.",
+]
+
+# Official URSA negative prompt (from README / app scripts)
+DEFAULT_NEGATIVE_PROMPT = (
+    "worst quality, low quality, inconsistent motion, static, still, "
+    "blurry, jittery, distorted, ugly"
+)
+
+DEFAULT_SEEDS = [0, 1, 2, 3]
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def parse_args():
+    p = argparse.ArgumentParser(
+        description="Evaluate distill_dimo student (1-step) vs teacher (multi-step)"
+    )
+
+    p.add_argument("--teacher_ckpt", required=True,
+                   help="URSA diffusers pipeline directory (teacher weights)")
+    p.add_argument("--student_ckpt", required=True,
+                   help="student.pt from train_distill_dimo.py checkpoint")
+    p.add_argument("--out_dir", default="./outputs/eval_distill")
+
+    # Geometry — verified native: 320×512×49 (from A/B testing)
+    p.add_argument("--num_frames", type=int, default=49)
+    p.add_argument("--height", type=int, default=320)
+    p.add_argument("--width", type=int, default=512)
+    p.add_argument("--fps", type=int, default=12)
+
+    # Student generation — default: cfg only (no_cfg is known to fail)
+    p.add_argument("--student_modes", nargs="+", default=["cfg"],
+                   choices=["no_cfg", "cfg", "baked"],
+                   help="Student generation modes to evaluate. "
+                        "Default: ['cfg']. no_cfg is known to produce blank/blurry "
+                        "output for this checkpoint.")
+    p.add_argument("--eval_cfg_scale", type=float, default=7.0,
+                   help="Guidance scale for 'cfg' mode (verified working value=7)")
+
+    # Teacher generation — default: cfg only (no_cfg is known to fail)
+    p.add_argument("--teacher_modes", nargs="+", default=["cfg"],
+                   choices=["no_cfg", "cfg"],
+                   help="Teacher generation modes. Default: ['cfg']. "
+                        "no_cfg is NOT a valid baseline for this URSA checkpoint.")
+    p.add_argument("--teacher_steps", type=int, default=50,
+                   help="Number of inference steps for teacher (verified default=50)")
+
+    # Shared generation params (match verified official defaults)
+    p.add_argument("--guidance_trunc", type=float, default=0.9,
+                   help="Truncation threshold for inference CFG")
+    p.add_argument("--negative_prompt", type=str, default=DEFAULT_NEGATIVE_PROMPT,
+                   help="Negative prompt for CFG (official URSA uses one)")
+    p.add_argument("--max_prompt_length", type=int, default=320)
+    p.add_argument("--vae_batch_size", type=int, default=1)
+
+    # Safety override for no_cfg
+    p.add_argument("--allow_bad_nocfg", action="store_true", default=False,
+                   help="Suppress the no_cfg warning/block. Use at your own risk.")
+
+    # Data
+    p.add_argument("--prompt_file", default=None,
+                   help="Text file with one prompt per line (overrides defaults)")
+    p.add_argument("--seeds", nargs="*", type=int, default=DEFAULT_SEEDS)
+
+    # Device
+    p.add_argument("--device", type=int, default=0)
+    p.add_argument("--mixed_precision", default="bf16",
+                   choices=["fp16", "bf16", "fp32"])
+
+    return p.parse_args()
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def slug(text: str, max_len: int = 40) -> str:
+    s = text.lower()
+    s = "".join(c if c.isalnum() or c == " " else "" for c in s)
+    s = "_".join(s.split())[:max_len]
+    return s or "prompt"
+
+
+def frames_to_mp4(frames, path: str, fps: int = 12):
+    os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
+    if isinstance(frames, np.ndarray) and frames.ndim == 4:
+        frames = list(frames)
+    export_to_video(frames, output_video_path=path, fps=fps)
+
+
+def _extract_frames(frames_output):
+    """Normalise pipeline output → list of uint8 numpy arrays [H, W, 3]."""
+    if isinstance(frames_output, np.ndarray):
+        frames_output = frames_output[0] if frames_output.ndim == 5 else frames_output
+        frames = list(frames_output)
+    elif isinstance(frames_output, list):
+        frames = [np.array(f) if not isinstance(f, np.ndarray) else f
+                  for f in frames_output]
+    else:
+        raise TypeError(f"Unexpected frames type: {type(frames_output)}")
+    result = []
+    for f in frames:
+        if f.dtype != np.uint8:
+            f = ((f * 255).clip(0, 255).astype(np.uint8)
+                 if f.max() <= 1.0 else f.astype(np.uint8))
+        result.append(f)
+    return result
+
+
+def _gen(pipe, prompt, negative_prompt, seed, num_frames, height, width,
+         guidance_scale, num_inference_steps, guidance_trunc,
+         max_prompt_length, vae_batch_size, device):
+    """Single generation call, returns list of uint8 frames."""
+    gen = torch.Generator(device=device).manual_seed(seed)
+    with torch.inference_mode():
+        out = pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            height=height,
+            width=width,
+            num_frames=num_frames,
+            guidance_scale=guidance_scale,
+            num_inference_steps=num_inference_steps,
+            guidance_trunc=guidance_trunc,
+            max_prompt_length=max_prompt_length,
+            vae_batch_size=vae_batch_size,
+            output_type="np",
+            generator=gen,
+        )
+    return _extract_frames(out.frames)
+
+
+def _debug_pipeline(pipe, label=""):
+    """Print diagnostic info about the pipeline state."""
+    print(f"\n{'='*60}")
+    print(f"  Pipeline diagnostics {label}")
+    print(f"{'='*60}")
+    print(f"  scheduler class     : {type(pipe.scheduler).__name__}")
+    print(f"  scheduler type      : {type(pipe.scheduler)}")
+    if hasattr(pipe.scheduler, 'config'):
+        print(f"  scheduler.config    : {dict(pipe.scheduler.config)}")
+    if hasattr(pipe.scheduler, 'path'):
+        print(f"  scheduler.path      : {type(pipe.scheduler.path).__name__}")
+        if hasattr(pipe.scheduler.path, 'emb'):
+            emb = pipe.scheduler.path.emb
+            print(f"  path.emb shape      : {emb.shape}")
+            print(f"  path.emb device     : {emb.device}")
+            print(f"  path.emb dtype      : {emb.dtype}")
+    else:
+        print(f"  scheduler.path      : MISSING (scheduler not fully loaded!)")
+    print(f"  codebook_size       : {getattr(pipe.scheduler, 'codebook_size', 'N/A')}")
+    print(f"  transformer class   : {type(pipe.transformer).__name__}")
+    print(f"  transformer device  : {next(pipe.transformer.parameters()).device}")
+    print(f"  vae class           : {type(pipe.vae).__name__}")
+    if hasattr(pipe, 'image_processor'):
+        print(f"  image_processor     : {type(pipe.image_processor).__name__}")
+    print(f"{'='*60}\n")
+
+
+def _debug_frames(frames, label=""):
+    """Print diagnostic info about generated frames."""
+    if not frames:
+        print(f"  [{label}] No frames generated!")
+        return
+    f0 = frames[0]
+    print(f"  [{label}] n_frames={len(frames)}  shape={f0.shape}  "
+          f"dtype={f0.dtype}  min={f0.min()}  max={f0.max()}")
+
+
+def _verify_state_dict_swap(pipe, state_dict, label=""):
+    """Verify transformer weights actually changed after load_state_dict."""
+    sample_key = next(iter(state_dict.keys()))
+    loaded_val = state_dict[sample_key].flatten()[:8]
+    current_val = pipe.transformer.state_dict()[sample_key].flatten()[:8]
+    match = torch.allclose(loaded_val.cpu().float(), current_val.cpu().float(), atol=1e-6)
+    print(f"  [{label}] state_dict match for '{sample_key}': {match}")
+    if not match:
+        print(f"    loaded  : {loaded_val[:4]}")
+        print(f"    current : {current_val[:4]}")
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+def main():
+    args = parse_args()
+
+    dtype_map = {"fp16": torch.float16, "bf16": torch.bfloat16, "fp32": torch.float32}
+    dtype = dtype_map[args.mixed_precision]
+    device = (torch.device("cuda", args.device)
+              if torch.cuda.is_available() else torch.device("cpu"))
+    os.makedirs(args.out_dir, exist_ok=True)
+
+    # -- Verified regime check --------------------------------------------
+    is_native = check_verified_regime(
+        height=args.height,
+        width=args.width,
+        num_frames=args.num_frames,
+        guidance_scale=args.eval_cfg_scale,
+        teacher_steps=args.teacher_steps,
+        label="eval",
+    )
+    print(f"[eval] verified_native_regime={is_native}")
+    print(f"[eval] geometry=({args.num_frames},{args.height},{args.width}), "
+          f"guidance_scale={args.eval_cfg_scale}, teacher_steps={args.teacher_steps}")
+
+    # -- no_cfg safety gate -----------------------------------------------
+    all_modes = list(args.student_modes) + list(args.teacher_modes)
+    if "no_cfg" in all_modes:
+        if args.allow_bad_nocfg:
+            print("[WARN] no_cfg is known to fail for this URSA checkpoint. "
+                  "Outputs may be blank or blurry. Proceeding because --allow_bad_nocfg is set.")
+        else:
+            print("[WARN] no_cfg is known to fail for this URSA checkpoint. "
+                  "Outputs may be blank or blurry. "
+                  "Pass --allow_bad_nocfg to override this warning.")
+
+    # -- Load prompts -----------------------------------------------------
+    if args.prompt_file:
+        with open(args.prompt_file, encoding="utf-8") as f:
+            prompts = [l.strip() for l in f if l.strip() and not l.startswith("#")]
+    else:
+        prompts = DEFAULT_PROMPTS
+
+    print(f"[eval] {len(prompts)} prompts × {len(args.seeds)} seeds  "
+          f"| student modes={args.student_modes}  "
+          f"| teacher modes={args.teacher_modes}")
+    print(f"[eval] guidance_scale={args.eval_cfg_scale}  "
+          f"guidance_trunc={args.guidance_trunc}  "
+          f"teacher_steps={args.teacher_steps}")
+    print(f"[eval] negative_prompt='{args.negative_prompt[:60]}...'")
+
+    # -- Load pipeline (teacher) ------------------------------------------
+    print(f"[eval] Loading pipeline from {args.teacher_ckpt} …")
+    # 【修改点 2】尝试启用 Flash Attention 2
+    try:
+        pipe = URSAPipeline.from_pretrained(
+            args.teacher_ckpt, 
+            torch_dtype=dtype, 
+            trust_remote_code=True,
+            attn_implementation="flash_attention_2" 
+        ).to(device)
+    except Exception:
+        # 如果环境不支持 FA2，降级到默认
+        pipe = URSAPipeline.from_pretrained(
+            args.teacher_ckpt, torch_dtype=dtype, trust_remote_code=True
+        ).to(device)
+    
+    if hasattr(pipe.vae, "disable_slicing"):
+        pipe.vae.disable_slicing()
+    if hasattr(pipe.vae, "disable_tiling"):
+        pipe.vae.disable_tiling()
+        
+    # print("[eval] Compiling transformer (this takes ~2 mins for the first time)...")
+    # pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead")
+
+    # Diagnostic: verify scheduler loaded correctly
+    _debug_pipeline(pipe, label="after from_pretrained + .to(device)")
+
+    # CRITICAL CHECK: scheduler must have .path with embeddings
+    if not hasattr(pipe.scheduler, 'path') or pipe.scheduler.path is None:
+        print("[ERROR] Scheduler path not loaded! This will cause blurry output.")
+        print("[ERROR] The scheduler needs scheduler_model.pth with codebook embeddings.")
+        return
+
+    if pipe.scheduler.codebook_size == 0:
+        print("[ERROR] codebook_size=0 — scheduler not properly initialized!")
+        return
+
+    # Save teacher state for switching back after student inference
+    teacher_state = {k: v.clone() for k, v in pipe.transformer.state_dict().items()}
+
+    # -- Load student checkpoint ------------------------------------------
+    print(f"[eval] Loading student weights from {args.student_ckpt} …")
+    student_state = torch.load(
+        args.student_ckpt, map_location=device, weights_only=True
+    )
+    print(f"[eval] student state_dict keys: {len(student_state)}  "
+          f"sample key: {next(iter(student_state.keys()))}")
+
+    # Common kwargs for every pipeline call
+    gen_kwargs = dict(
+        num_frames=args.num_frames,
+        height=args.height,
+        width=args.width,
+        guidance_trunc=args.guidance_trunc,
+        max_prompt_length=args.max_prompt_length,
+        vae_batch_size=args.vae_batch_size,
+    )
+
+    # Mode → guidance_scale mapping
+    student_guidance = {
+        "no_cfg": 1.0,
+        "cfg":    args.eval_cfg_scale,
+        "baked":  1.0,
+    }
+    teacher_guidance = {
+        "no_cfg": 1.0,
+        "cfg":    args.eval_cfg_scale,
+    }
+
+    # -- Evaluation loop --------------------------------------------------
+    for idx, prompt in enumerate(prompts):
+        p_slug = slug(prompt)
+        print(f"\n[{idx+1}/{len(prompts)}] {prompt[:70]}")
+
+        for seed in args.seeds:
+            # ---- Student: 1-step generation -----------------------------
+            for mode in args.student_modes:
+                g_scale = student_guidance[mode]
+                neg = args.negative_prompt if g_scale > 1 else None
+                pipe.transformer.load_state_dict(student_state, strict=True)
+                pipe.transformer.eval()
+
+                if idx == 0 and seed == args.seeds[0]:
+                    _verify_state_dict_swap(pipe, student_state, f"student/{mode}")
+
+                with torch.no_grad():
+                    frames = _gen(pipe, prompt, neg, seed,
+                                  guidance_scale=g_scale,
+                                  num_inference_steps=1,
+                                  device=device, **gen_kwargs)
+
+                if idx == 0 and seed == args.seeds[0]:
+                    _debug_frames(frames, f"student/{mode}")
+
+                path = os.path.join(
+                    args.out_dir,
+                    f"{idx:02d}_s{seed}_{p_slug}_student_1step_{mode}.mp4",
+                )
+                frames_to_mp4(frames, path, fps=args.fps)
+                print(f"  [student/{mode:6s}] seed={seed}  scale={g_scale}  → {path}")
+
+            # ---- Teacher: multi-step reference --------------------------
+            for t_mode in args.teacher_modes:
+                g_scale = teacher_guidance[t_mode]
+                neg = args.negative_prompt if g_scale > 1 else None
+                pipe.transformer.load_state_dict(teacher_state, strict=True)
+                pipe.transformer.eval()
+
+                if idx == 0 and seed == args.seeds[0]:
+                    _verify_state_dict_swap(pipe, teacher_state, f"teacher/{t_mode}")
+
+                with torch.no_grad():
+                    frames = _gen(pipe, prompt, neg, seed,
+                                  guidance_scale=g_scale,
+                                  num_inference_steps=args.teacher_steps,
+                                  device=device, **gen_kwargs)
+
+                if idx == 0 and seed == args.seeds[0]:
+                    _debug_frames(frames, f"teacher/{t_mode}")
+
+                path = os.path.join(
+                    args.out_dir,
+                    f"{idx:02d}_s{seed}_{p_slug}_teacher_{args.teacher_steps}step_{t_mode}.mp4",
+                )
+                frames_to_mp4(frames, path, fps=args.fps)
+                print(f"  [teacher/{t_mode:6s}] seed={seed}  scale={g_scale}  "
+                      f"steps={args.teacher_steps}  → {path}")
+
+    print(f"\n[eval] Done. Results in {args.out_dir}")
+    _print_guide(args)
+
+
+def _print_guide(args):
+    print(f"""
+╔══════════════════════════════════════════════════════════════╗
+║  Interpretation guide                                        ║
+╠══════════════════════════════════════════════════════════════╣
+║  student_1step_cfg      : 1-step, guidance_scale={args.eval_cfg_scale:<4}        ║
+║                           (verified working student mode)    ║
+║  student_1step_baked    : 1-step, guidance_scale=1           ║
+║                           (for students trained with CFG KD) ║
+║  teacher_{args.teacher_steps}step_cfg     : {args.teacher_steps}-step, guidance_scale={args.eval_cfg_scale:<4}  ║
+║                           (verified working teacher mode)    ║
+╠══════════════════════════════════════════════════════════════╣
+║  NOTE: no_cfg (guidance_scale=1) is NOT a valid baseline     ║
+║  for this URSA checkpoint. Use --allow_bad_nocfg to override.║
+╚══════════════════════════════════════════════════════════════╝""")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/URSA/scripts/eval_onestep_ursa.py b/URSA/scripts/eval_onestep_ursa.py
new file mode 100644
index 0000000000000000000000000000000000000000..00a500938cbff72c7a800a998c8923120db3b2ed
--- /dev/null
+++ b/URSA/scripts/eval_onestep_ursa.py
@@ -0,0 +1,336 @@
+#!/usr/bin/env python3
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# -----------------------------------------------------------------------
+"""Evaluation script: compare student 1-step variants vs multi-step teacher.
+
+Verified native inference regime (from A/B testing — ground truth):
+  height=320, width=512, num_frames=49, guidance_scale=7, teacher_steps=50.
+  no_cfg (guidance_scale=1) does NOT produce valid output for this URSA checkpoint.
+
+Student generation modes
+------------------------
+  cfg      : 1-step, guidance_scale=7   (verified working student mode)
+  baked    : 1-step, guidance_scale=1   (for students trained with CFG KD)
+
+Teacher generation modes
+------------------------
+  cfg      : 50-step, guidance_scale=7  (verified working teacher mode)
+
+Usage:
+  python scripts/eval_onestep_ursa.py \\
+      --teacher_ckpt /path/to/URSA \\
+      --student_ckpt ./outputs/dimo/final/student.pt \\
+      --modes cfg \\
+      --eval_cfg_scale 7.0 \\
+      --num_frames 49 --height 320 --width 512 \\
+      --teacher_steps 50 \\
+      --out_dir ./outputs/eval
+"""
+
+import argparse
+import os
+import sys
+
+import numpy as np
+import torch
+
+_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+if _REPO_ROOT not in sys.path:
+    sys.path.insert(0, _REPO_ROOT)
+
+from diffnext.pipelines import URSAPipeline
+from diffnext.utils import export_to_video
+
+
+# ---------------------------------------------------------------------------
+# Default prompts and seeds
+# ---------------------------------------------------------------------------
+
+DEFAULT_PROMPTS = [
+    "a lone grizzly bear walks through a misty forest at dawn, sunlight catching its fur.",
+    "beautiful fireworks in the sky with red, white and blue.",
+    "a wave crashes on a rocky shoreline at sunset, slow motion.",
+    "a hummingbird hovers in front of a red flower, wings a blur.",
+    "timelapse of clouds rolling over mountain peaks.",
+    "a neon-lit city street at night with rain-soaked reflections.",
+    "a kitten playing with a ball of yarn on a wooden floor.",
+    "astronaut floating weightlessly inside a space station.",
+]
+
+DEFAULT_SEEDS = [0, 1, 2, 3]
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def parse_args():
+    p = argparse.ArgumentParser(description="URSA 1-step student eval vs teacher")
+
+    p.add_argument("--teacher_ckpt", required=True, help="URSA diffusers pipeline dir")
+    p.add_argument("--student_ckpt", required=True,
+                   help="student.pt checkpoint from train_onestep_ursa_dimo.py")
+    p.add_argument("--out_dir", default="./outputs/eval")
+
+    # Geometry (verified native: 320×512×49)
+    p.add_argument("--num_frames", type=int, default=49)
+    p.add_argument("--height", type=int, default=320)
+    p.add_argument("--width", type=int, default=512)
+    p.add_argument("--fps", type=int, default=12)
+
+    # Generation — default: cfg only (no_cfg is known to fail)
+    p.add_argument("--modes", nargs="+", default=["cfg"],
+                   choices=["no_cfg", "cfg", "baked"],
+                   help="Student generation modes. Default: ['cfg']. "
+                        "no_cfg is known to produce blank/blurry output.")
+    p.add_argument("--eval_cfg_scale", type=float, default=7.0,
+                   help="Guidance scale for 'cfg' mode (verified working value=7)")
+    p.add_argument("--teacher_steps", type=int, default=50,
+                   help="Inference steps for teacher (verified default=50)")
+    p.add_argument("--teacher_modes", nargs="+", default=["cfg"],
+                   choices=["no_cfg", "cfg"],
+                   help="Teacher modes. Default: ['cfg']. "
+                        "no_cfg is NOT a valid baseline for this checkpoint.")
+    p.add_argument("--guidance_trunc", type=float, default=0.9,
+                   help="Truncation threshold for inference CFG (passed to pipeline)")
+    p.add_argument("--max_prompt_length", type=int, default=320)
+    p.add_argument("--vae_batch_size", type=int, default=1)
+
+    # Data
+    p.add_argument("--prompt_file", default=None,
+                   help="Optional: text file with one prompt per line")
+    p.add_argument("--seeds", nargs="*", type=int, default=DEFAULT_SEEDS)
+
+    # Device
+    p.add_argument("--device", type=int, default=0)
+    p.add_argument("--mixed_precision", default="bf16", choices=["fp16", "bf16", "fp32"])
+
+    return p.parse_args()
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def slug(text: str, max_len: int = 40) -> str:
+    s = text.lower()
+    s = "".join(c if c.isalnum() or c == " " else "" for c in s)
+    s = "_".join(s.split())[:max_len]
+    return s or "prompt"
+
+
+def frames_to_mp4(frames, path: str, fps: int = 12):
+    os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
+    if isinstance(frames, np.ndarray) and frames.ndim == 4:
+        frames = list(frames)
+    export_to_video(frames, output_video_path=path, fps=fps)
+
+
+def _extract_frames(frames_output):
+    """Normalise pipeline output → list of uint8 numpy arrays [H, W, 3]."""
+    if isinstance(frames_output, np.ndarray):
+        frames_output = frames_output[0] if frames_output.ndim == 5 else frames_output
+        frames = list(frames_output)
+    elif isinstance(frames_output, list):
+        frames = [np.array(f) if not isinstance(f, np.ndarray) else f for f in frames_output]
+    else:
+        raise TypeError(f"Unexpected frames type: {type(frames_output)}")
+    result = []
+    for f in frames:
+        if f.dtype != np.uint8:
+            f = (f * 255).clip(0, 255).astype(np.uint8) if f.max() <= 1.0 else f.astype(np.uint8)
+        result.append(f)
+    return result
+
+
+DEFAULT_NEGATIVE_PROMPT = (
+    "worst quality, low quality, inconsistent motion, static, still, "
+    "blurry, jittery, distorted, ugly"
+)
+
+
+def _gen(pipe, prompt, seed, num_frames, height, width, guidance_scale,
+         num_inference_steps, guidance_trunc, max_prompt_length, vae_batch_size,
+         device, negative_prompt=None):
+    """Single generation call, returns list of uint8 frames."""
+    gen = torch.Generator(device=device).manual_seed(seed)
+    out = pipe(
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        height=height,
+        width=width,
+        num_frames=num_frames,
+        guidance_scale=guidance_scale,
+        num_inference_steps=num_inference_steps,
+        guidance_trunc=guidance_trunc,
+        max_prompt_length=max_prompt_length,
+        vae_batch_size=vae_batch_size,
+        output_type="np",
+        generator=gen,
+    )
+    return _extract_frames(out.frames)
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+def main():
+    args = parse_args()
+
+    dtype_map = {"fp16": torch.float16, "bf16": torch.bfloat16, "fp32": torch.float32}
+    dtype = dtype_map[args.mixed_precision]
+    device = torch.device("cuda", args.device) if torch.cuda.is_available() else torch.device("cpu")
+    os.makedirs(args.out_dir, exist_ok=True)
+
+    # -- Verified regime validation ----------------------------------------
+    _NATIVE = dict(height=320, width=512, num_frames=49, guidance_scale=7.0, teacher_steps=50)
+    is_native = (
+        args.height == _NATIVE["height"]
+        and args.width == _NATIVE["width"]
+        and args.num_frames == _NATIVE["num_frames"]
+        and args.eval_cfg_scale == _NATIVE["guidance_scale"]
+        and args.teacher_steps == _NATIVE["teacher_steps"]
+    )
+    print(f"[eval] verified_native_regime={is_native}")
+    print(f"[eval] geometry=({args.num_frames},{args.height},{args.width}), "
+          f"guidance_scale={args.eval_cfg_scale}, teacher_steps={args.teacher_steps}")
+    if not is_native:
+        print(f"[WARN] Current config deviates from the verified native URSA regime "
+              f"({_NATIVE['num_frames']}×{_NATIVE['height']}×{_NATIVE['width']}, "
+              f"cfg={_NATIVE['guidance_scale']}, steps={_NATIVE['teacher_steps']}).")
+
+    all_modes = list(args.modes) + list(args.teacher_modes)
+    if "no_cfg" in all_modes:
+        print("[WARN] no_cfg is known to fail for this URSA checkpoint. "
+              "Outputs may be blank or blurry.")
+
+    # -- Load prompts -----------------------------------------------------
+    if args.prompt_file:
+        with open(args.prompt_file, encoding="utf-8") as f:
+            prompts = [l.strip() for l in f if l.strip() and not l.startswith("#")]
+    else:
+        prompts = DEFAULT_PROMPTS
+
+    print(f"[eval] {len(prompts)} prompts × {len(args.seeds)} seeds  "
+          f"| student modes={args.modes}  | teacher modes={args.teacher_modes}")
+
+    # -- Load pipeline ---------------------------------------------------
+    print(f"[eval] Loading pipeline from {args.teacher_ckpt} …")
+    pipe = URSAPipeline.from_pretrained(
+        args.teacher_ckpt, torch_dtype=dtype, trust_remote_code=True
+    ).to(device)
+
+    # -- Load student checkpoint -----------------------------------------
+    print(f"[eval] Loading student weights from {args.student_ckpt} …")
+    student_state = torch.load(args.student_ckpt, map_location=device, weights_only=True)
+    teacher_state = {k: v.clone() for k, v in pipe.transformer.state_dict().items()}
+
+    # Common kwargs passed to every pipeline call
+    gen_kwargs = dict(
+        num_frames=args.num_frames,
+        height=args.height,
+        width=args.width,
+        guidance_trunc=args.guidance_trunc,
+        max_prompt_length=args.max_prompt_length,
+        vae_batch_size=args.vae_batch_size,
+    )
+
+    # Mode → guidance_scale mapping
+    #   no_cfg  : single forward, no guidance
+    #   cfg     : dual forward, eval_cfg_scale
+    #   baked   : single forward, no guidance (student trained with guided KD)
+    student_guidance = {
+        "no_cfg": 1.0,
+        "cfg":    args.eval_cfg_scale,
+        "baked":  1.0,
+    }
+    teacher_guidance = {
+        "no_cfg": 1.0,
+        "cfg":    args.eval_cfg_scale,
+    }
+
+    # -- Evaluation loop -------------------------------------------------
+    for idx, prompt in enumerate(prompts):
+        p_slug = slug(prompt)
+        print(f"\n[{idx+1}/{len(prompts)}] {prompt[:70]}")
+
+        for seed in args.seeds:
+            # ---- Student: selected modes --------------------------------
+            for mode in args.modes:
+                g_scale = student_guidance[mode]
+                neg = DEFAULT_NEGATIVE_PROMPT if g_scale > 1 else None
+                pipe.transformer.load_state_dict(student_state, strict=True)
+                pipe.transformer.eval()
+
+                with torch.no_grad():
+                    frames = _gen(pipe, prompt, seed,
+                                  guidance_scale=g_scale,
+                                  num_inference_steps=1,
+                                  negative_prompt=neg,
+                                  device=device, **gen_kwargs)
+
+                path = os.path.join(
+                    args.out_dir,
+                    f"{idx:02d}_s{seed}_{p_slug}_student_1step_{mode}.mp4",
+                )
+                frames_to_mp4(frames, path, fps=args.fps)
+                print(f"  [student/{mode:6s}] seed={seed}  scale={g_scale}  → {path}")
+
+            # ---- Teacher: reference videos ------------------------------
+            for t_mode in args.teacher_modes:
+                g_scale = teacher_guidance[t_mode]
+                neg = DEFAULT_NEGATIVE_PROMPT if g_scale > 1 else None
+                pipe.transformer.load_state_dict(teacher_state, strict=True)
+                pipe.transformer.eval()
+
+                with torch.no_grad():
+                    frames = _gen(pipe, prompt, seed,
+                                  guidance_scale=g_scale,
+                                  num_inference_steps=args.teacher_steps,
+                                  negative_prompt=neg,
+                                  device=device, **gen_kwargs)
+
+                path = os.path.join(
+                    args.out_dir,
+                    f"{idx:02d}_s{seed}_{p_slug}_teacher_{args.teacher_steps}step_{t_mode}.mp4",
+                )
+                frames_to_mp4(frames, path, fps=args.fps)
+                print(f"  [teacher/{t_mode:6s}] seed={seed}  scale={g_scale}  "
+                      f"steps={args.teacher_steps}  → {path}")
+
+    print(f"\n[eval] Done. Results in {args.out_dir}")
+    _print_interpretation_guide(args)
+
+
+def _print_interpretation_guide(args):
+    print(f"""
+╔══════════════════════════════════════════════════════════════╗
+║  Interpretation guide for generated videos                   ║
+╠══════════════════════════════════════════════════════════════╣
+║  student_1step_cfg      : 1-step + CFG={args.eval_cfg_scale:<4}                   ║
+║                           (verified working student mode)    ║
+║  student_1step_baked    : 1-step, guidance_scale=1           ║
+║                           (for students trained with CFG KD) ║
+║  teacher_{args.teacher_steps}step_cfg     : {args.teacher_steps}-step + CFG={args.eval_cfg_scale:<4}            ║
+║                           (verified working teacher mode)    ║
+╠══════════════════════════════════════════════════════════════╣
+║  NOTE: no_cfg (guidance_scale=1) is NOT a valid baseline     ║
+║  for this URSA checkpoint — outputs are blank or blurry.     ║
+╚══════════════════════════════════════════════════════════════╝""")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/URSA/scripts/test_patches_mock.py b/URSA/scripts/test_patches_mock.py
new file mode 100644
index 0000000000000000000000000000000000000000..2cfbf39f14cc046864420486cd1c3886015c0e94
--- /dev/null
+++ b/URSA/scripts/test_patches_mock.py
@@ -0,0 +1,461 @@
+#!/usr/bin/env python3
+"""Self-contained mock test for all 6 patches in train_onestep_ursa_dimo.py.
+
+Does NOT require loading the real URSA pipeline.
+Exercises:
+  (1) Batch-concat [2B] forward — verified via forward call counts
+  (2) reward / adv detach — runtime assertions
+  (3) _stable_kl / _stable_jeffrey  (float32 + log_softmax)
+  (4) Separate loss_aux_cond / loss_aux_uncond / loss_kd_cond / loss_kd_uncond logging
+  (5) use_guided per-sample shape [B] and ratio
+  (6) flex_attn offsets probe / reset
+
+Run:
+  python scripts/test_patches_mock.py
+"""
+import sys, os
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+
+import types, copy
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# Import helpers from the training script directly
+import importlib.util
+spec = importlib.util.spec_from_file_location(
+    "train", os.path.join(os.path.dirname(__file__), "train_onestep_ursa_dimo.py"))
+train_mod = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(train_mod)
+
+_stable_kl        = train_mod._stable_kl
+_stable_jeffrey   = train_mod._stable_jeffrey
+_build_guided_logits = train_mod._build_guided_logits
+_select_target    = train_mod._select_target
+_cfg_warmup_prob  = train_mod._cfg_warmup_prob
+_compute_cfg_scale = train_mod._compute_cfg_scale
+_probe_flex_attn  = train_mod._probe_flex_attn
+_reset_flex_attn  = train_mod._reset_flex_attn
+_print_flex_attn_state = train_mod._print_flex_attn_state
+_token_histogram_entropy = train_mod._token_histogram_entropy
+
+print("=" * 70)
+print("URSA distillation patch self-test (mock)")
+print("=" * 70)
+
+device = torch.device("cpu")
+B, N, K = 2, 12, 64    # small numbers for speed
+
+# =========================================================================
+# Patch (3): _stable_kl / _stable_jeffrey — float32 + log_softmax
+# =========================================================================
+print("\n[3] Testing _stable_kl / _stable_jeffrey …")
+torch.manual_seed(0)
+z_p = torch.randn(B, N, K)
+z_q = torch.randn(B, N, K)
+
+kl_pq = _stable_kl(z_p, z_q)
+kl_qp = _stable_kl(z_q, z_p)
+jeff  = _stable_jeffrey(z_p, z_q)
+
+assert kl_pq.shape == (B,), f"kl_pq shape={kl_pq.shape}"
+assert (kl_pq >= 0).all(), "KL must be non-negative"
+assert (kl_qp >= 0).all(), "KL must be non-negative (reverse)"
+assert torch.allclose(jeff, kl_pq + kl_qp, atol=1e-5), "Jeffrey ≠ KL(p||q) + KL(q||p)"
+assert not torch.isnan(kl_pq).any(), "kl_pq has NaN"
+assert not torch.isinf(kl_pq).any(), "kl_pq has Inf"
+
+# KL(p||p) == 0
+kl_pp = _stable_kl(z_p, z_p)
+assert kl_pp.abs().max() < 1e-5, f"KL(p||p) should be ~0, got {kl_pp}"
+
+# Numerics with large logits (simulate s=3 amplification)
+z_large = z_p * 50.0
+kl_large = _stable_kl(z_large, z_q)
+assert not torch.isnan(kl_large).any(), "kl_large has NaN with large logits"
+assert not torch.isinf(kl_large).any(), "kl_large has Inf with large logits"
+
+print(f"  kl_pq  = {kl_pq.tolist()}  (both ≥0 ✓)")
+print(f"  jeffrey= {jeff.tolist()}  (= kl_pq + kl_qp ✓)")
+print(f"  kl(p,p)= {kl_pp.tolist()}  (≈0 ✓)")
+print(f"  kl with z*50: {kl_large.tolist()}  (finite ✓)")
+print("[3] _stable_kl / _stable_jeffrey PASSED ✓")
+
+# =========================================================================
+# Patch (3b): _build_guided_logits — float32, per-sample scale
+# =========================================================================
+print("\n[3b] Testing _build_guided_logits …")
+z_cond   = torch.randn(B, N, K)
+z_uncond = torch.randn(B, N, K)
+t        = torch.tensor([0.3, 0.95])  # one below, one above trunc=0.9
+z_guided = _build_guided_logits(z_cond, z_uncond, t, cfg_scale=3.0, trunc=0.9)
+
+assert z_guided.shape == (B, N, K), f"z_guided.shape={z_guided.shape}"
+assert not torch.isnan(z_guided).any(), "z_guided has NaN"
+assert not torch.isinf(z_guided).any(), "z_guided has Inf"
+
+# Sample 0: t=0.3 < trunc → scale=3
+# z_guided[0] = z_uncond[0] + 3*(z_cond[0] - z_uncond[0])
+expected_0 = z_uncond[0] + 3.0 * (z_cond[0] - z_uncond[0])
+assert torch.allclose(z_guided[0], expected_0, atol=1e-5), "sample 0 guided mismatch"
+# Sample 1: t=0.95 >= trunc → scale=1
+expected_1 = z_uncond[1] + 1.0 * (z_cond[1] - z_uncond[1])
+assert torch.allclose(z_guided[1], expected_1, atol=1e-5), "sample 1 (trunc) mismatch"
+
+g_min, g_max, g_mean = z_guided.min().item(), z_guided.max().item(), z_guided.mean().item()
+print(f"  z_T_guided shape={z_guided.shape}  min={g_min:.3f}  max={g_max:.3f}  mean={g_mean:.3f}")
+assert abs(g_min) < 1e4 and abs(g_max) < 1e4, f"guided logits exploded: [{g_min:.1e}, {g_max:.1e}]"
+print("[3b] _build_guided_logits PASSED ✓")
+
+# =========================================================================
+# Patch (5): use_guided per-sample [B] shape + ratio
+# =========================================================================
+print("\n[5] Testing per-sample use_guided …")
+torch.manual_seed(42)
+
+# After warmup (step >> warmup_steps) → p = cfg_prob = 1.0
+prob_full = _cfg_warmup_prob(step=10000, cfg_prob=1.0, warmup_steps=2000)
+assert abs(prob_full - 1.0) < 1e-6, f"full warmup prob={prob_full}"
+
+# During warmup at step=1000 with warmup_steps=2000 → p = 0.5
+prob_half = _cfg_warmup_prob(step=1000, cfg_prob=1.0, warmup_steps=2000)
+assert abs(prob_half - 0.5) < 1e-6, f"half warmup prob={prob_half}"
+
+# Per-sample sampling
+torch.manual_seed(0)
+use_guided = torch.rand(B) < 0.5          # [B] bool
+assert use_guided.shape == (B,), f"use_guided.shape={use_guided.shape}"
+use_guided_ratio = use_guided.float().mean().item()
+print(f"  use_guided={use_guided.tolist()}  ratio={use_guided_ratio:.2f}")
+
+# _select_target per-sample
+z_target = _select_target(z_guided, z_cond, use_guided)
+for b in range(B):
+    if use_guided[b]:
+        assert torch.allclose(z_target[b], z_guided[b]), f"sample {b}: guided not selected"
+    else:
+        assert torch.allclose(z_target[b], z_cond[b]),   f"sample {b}: cond not selected"
+print(f"  _select_target: per-sample selection correct ✓")
+print("[5] Per-sample use_guided PASSED ✓")
+
+# =========================================================================
+# Patch (1): Batch-concat [2B] — verified via a tiny linear net
+# =========================================================================
+print("\n[1] Testing batch-concat [2B] forward equivalence …")
+
+class TinyModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.lin = nn.Linear(K, K, bias=False)
+        self._call_count = 0
+    def forward(self, x):
+        self._call_count += 1
+        return self.lin(x.float())
+
+model = TinyModel()
+x_cond   = torch.randn(B, N, K)
+x_uncond = torch.randn(B, N, K)
+
+# Separate forward (old way: 2 calls)
+model._call_count = 0
+out_cond_sep   = model(x_cond)
+out_uncond_sep = model(x_uncond)
+calls_sep = model._call_count  # = 2
+
+# Batch-concat forward (new way: 1 call)
+model._call_count = 0
+x_dual = torch.cat([x_cond, x_uncond], dim=0)   # [2B, N, K]
+out_dual = model(x_dual)                          # [2B, N, K]
+out_cond_bat, out_uncond_bat = out_dual.chunk(2, dim=0)
+calls_bat = model._call_count  # = 1
+
+assert calls_sep == 2, f"sep calls={calls_sep}"
+assert calls_bat == 1, f"batch calls={calls_bat}"
+assert torch.allclose(out_cond_sep,   out_cond_bat,   atol=1e-5), "cond output mismatch"
+assert torch.allclose(out_uncond_sep, out_uncond_bat, atol=1e-5), "uncond output mismatch"
+print(f"  Separate: {calls_sep} calls → batch: {calls_bat} call (identical outputs ✓)")
+print("[1] Batch-concat forward PASSED ✓")
+
+# =========================================================================
+# Patch (2): reward / adv detach — no student gradient
+# =========================================================================
+print("\n[2] Testing reward/adv detach …")
+
+z_T = torch.randn(B, N, K).detach()        # teacher logits (no grad)
+z_S_with_grad = torch.randn(B, N, K, requires_grad=True)  # student logits (has grad)
+
+# Reward computation: z_S must be detached
+reward = -_stable_kl(z_T.detach(), z_S_with_grad.detach(), tau=1.0)  # [B]
+assert not reward.requires_grad, \
+    f"[BUG] reward.requires_grad={reward.requires_grad} — gradient leaked"
+
+baseline_ema = 0.0
+adv = (reward - baseline_ema).detach()
+assert not adv.requires_grad, \
+    f"[BUG] adv.requires_grad={adv.requires_grad} — detach failed"
+
+# Verify gradient DOES flow through logp (the differentiable path)
+logits_gen = torch.randn(B, N, K, requires_grad=True)
+p_gen  = F.softmax(logits_gen / 1.0, dim=-1)
+x_hat  = torch.multinomial(p_gen.view(-1, K).detach(), 1).view(B, N)
+logp   = p_gen.clamp(1e-8).log().gather(-1, x_hat.unsqueeze(-1)).squeeze(-1).sum(-1)  # [B]
+loss_pg = -(adv * logp).mean()
+loss_pg.backward()
+assert logits_gen.grad is not None, "logits_gen has no grad — REINFORCE broken"
+assert logits_gen.grad.abs().max() > 0, "logits_gen grad is all zeros"
+
+print(f"  reward.requires_grad={reward.requires_grad} (must be False ✓)")
+print(f"  adv.requires_grad={adv.requires_grad} (must be False ✓)")
+print(f"  logits_gen.grad max={logits_gen.grad.abs().max():.4f} (non-zero ✓)")
+print("[2] Reward/adv detach PASSED ✓")
+
+# =========================================================================
+# Patch (4): Separate loss logging keys
+# =========================================================================
+print("\n[4] Testing separate loss logging …")
+
+loss_aux_cond_v   = _stable_jeffrey(z_T, z_T + torch.randn_like(z_T) * 0.1, tau=1.0).mean()
+loss_aux_uncond_v = _stable_jeffrey(z_T, z_T + torch.randn_like(z_T) * 0.2, tau=1.0).mean()
+loss_kd_cond      = _stable_kl(z_T, z_S_with_grad, tau=1.0).mean()
+loss_kd_uncond_v  = _stable_kl(z_T, z_T + torch.randn_like(z_T) * 0.05, tau=1.0).mean()
+
+log_line = (
+    f"[step      1] "
+    f"loss_aux_cond={loss_aux_cond_v.item():.4f}  "
+    f"loss_aux_uncond={loss_aux_uncond_v.item():.4f}  "
+    f"loss_kd_cond={loss_kd_cond.item():.4f}  "
+    f"loss_kd_uncond={loss_kd_uncond_v.item():.4f}  "
+    f"loss_pg=0.1234  H=3.123  tok_H=4.500  "
+    f"guided_ratio=0.50  baseline=0.0000  mean_logp=-3.45"
+)
+print(f"  Sample log: {log_line}")
+assert "loss_aux_cond=" in log_line
+assert "loss_aux_uncond=" in log_line
+assert "loss_kd_cond=" in log_line
+assert "loss_kd_uncond=" in log_line
+assert "guided_ratio=" in log_line
+print("[4] Separate loss logging format PASSED ✓")
+
+# =========================================================================
+# Patch (6): flex_attn offsets probe / reset
+# =========================================================================
+print("\n[6] Testing flex_attn probe / reset …")
+
+# Case A: model without flex_attn
+class ModelNoFlex(nn.Module):
+    pass
+
+m_no_flex = ModelNoFlex()
+fa = _probe_flex_attn(m_no_flex, "no_flex")
+assert fa is None, f"Expected None, got {fa}"
+_reset_flex_attn(m_no_flex, "no_flex", verbose=True)   # should not raise
+print("  Model without flex_attn: probe=None, reset is no-op ✓")
+
+# Case B: model WITH flex_attn — simulate FlexAttentionCausal2D
+class FakeFlexAttn:
+    def __init__(self):
+        self.offsets    = None
+        self.block_mask = None
+        self.cu_offsets = None
+
+class ModelWithFlex(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.flex_attn = FakeFlexAttn()
+
+m_flex = ModelWithFlex()
+m_flex.flex_attn.offsets    = [0, 50, 370]   # simulate set offsets
+m_flex.flex_attn.block_mask = "some_mask"
+m_flex.flex_attn.cu_offsets = torch.tensor([0, 50, 370])
+
+print("  Before reset:")
+_print_flex_attn_state(m_flex, "test_model")
+_reset_flex_attn(m_flex, "test_model", verbose=True)
+print("  After reset:")
+_print_flex_attn_state(m_flex, "test_model")
+
+assert m_flex.flex_attn.offsets    is None, "offsets not reset"
+assert m_flex.flex_attn.block_mask is None, "block_mask not reset"
+assert m_flex.flex_attn.cu_offsets is None, "cu_offsets not reset"
+print("  flex_attn.offsets=None, block_mask=None, cu_offsets=None ✓")
+print("[6] flex_attn probe/reset PASSED ✓")
+
+# =========================================================================
+# z_T_guided explosion guard (from _run_assertions)
+# =========================================================================
+print("\n[3c] Testing z_T_guided explosion guard …")
+z_guided_ok = torch.randn(B, N, K) * 10       # normal magnitude
+z_guided_bad = torch.randn(B, N, K) * 2e4     # exploded
+
+assert not torch.isnan(z_guided_ok).any()
+assert not torch.isinf(z_guided_ok).any()
+assert abs(z_guided_ok.min().item()) < 1e4
+
+try:
+    big_min = z_guided_bad.min().item()
+    big_max = z_guided_bad.max().item()
+    assert abs(big_min) < 1e4 and abs(big_max) < 1e4, f"Explosion: [{big_min:.1e}, {big_max:.1e}]"
+    print("  ⚠️  explosion guard NOT triggered (unexpected)")
+except AssertionError as e:
+    print(f"  Explosion guard triggered correctly: {e} ✓")
+print("[3c] z_T_guided explosion guard PASSED ✓")
+
+# =========================================================================
+# Token histogram entropy
+# =========================================================================
+print("\n[misc] Testing _token_histogram_entropy …")
+# Uniform: entropy = log(K)
+x_uniform = torch.randint(0, K, (1, B * N))
+H_uniform = _token_histogram_entropy(x_uniform, K)
+print(f"  uniform entropy={H_uniform:.3f}  log(K)={K ** 0 * torch.tensor(K).float().log().item():.3f}")
+
+# Collapsed: all tokens = 0 → entropy = 0
+x_collapsed = torch.zeros(1, B * N, dtype=torch.long)
+H_collapsed = _token_histogram_entropy(x_collapsed, K)
+assert H_collapsed < 0.01, f"collapsed entropy={H_collapsed} should be ~0"
+print(f"  collapsed entropy={H_collapsed:.4f} (≈0 ✓)")
+print("[misc] _token_histogram_entropy PASSED ✓")
+
+# =========================================================================
+# Patch (7): extract_visual_logits — manual reconstruction
+# =========================================================================
+print("\n[7] extract_visual_logits end-to-end alignment (mock) …")
+import importlib.util as _ilu, sys as _sys
+_spec = _ilu.spec_from_file_location(
+    "_utils", os.path.join(os.path.dirname(__file__), "..", "src", "distill", "utils_ursa_inputs.py"))
+_utils = _ilu.module_from_spec(_spec)
+_spec.loader.exec_module(_utils)
+extract_visual_logits = _utils.extract_visual_logits
+
+# Case A: D == K (URSA default — lm_head outputs K logits directly)
+B7, N7, K7 = 1, 20, 64
+L7 = 8
+logits_full_A = torch.randn(B7, L7 + N7 + 1, K7)   # D == K
+z_vis_A = extract_visual_logits(logits_full_A, N7, K7)
+z_seq_A = logits_full_A[:, -(N7+1):-1]              # raw causal slice [B, N, D=K]
+delta_A = (z_vis_A - z_seq_A).abs().max().item()
+assert delta_A < 1e-6, f"Case A (D==K) delta={delta_A}"
+print(f"  [7a] D={K7}==K: extract == raw slice, delta={delta_A:.2e} ✓")
+
+# Case B: D > K  (lm_head larger than codebook — offset=D-K)
+D7B = K7 + 10
+logits_full_B = torch.randn(B7, L7 + N7 + 1, D7B)
+z_vis_B  = extract_visual_logits(logits_full_B, N7, K7)
+z_seq_B  = logits_full_B[:, -(N7+1):-1]              # [B, N, D]
+z_man_B  = z_seq_B[..., D7B - K7:]                   # [B, N, K]
+delta_B  = (z_vis_B - z_man_B).abs().max().item()
+assert delta_B < 1e-6, f"Case B (D>K) delta={delta_B}"
+print(f"  [7b] D={D7B}>K={K7}: extract == z[..., D-K:], delta={delta_B:.2e} ✓")
+
+# Case C: latent_shift test (D >= latent_shift + K — full-vocab head)
+latent_shift_C = 12
+D7C = latent_shift_C + K7
+logits_full_C = torch.randn(B7, L7 + N7 + 1, D7C)
+# extract_visual_logits with D7C == D7C: D == K? No, D7C=76, K7=64, D>K
+# internal: offset = D7C - K7 = 12 = latent_shift_C  → should match [..., latent_shift_C:]
+z_vis_C  = extract_visual_logits(logits_full_C, N7, K7)
+z_seq_C  = logits_full_C[:, -(N7+1):-1]
+z_man_C1 = z_seq_C[..., latent_shift_C:]               # using latent_shift as offset
+z_man_C2 = z_seq_C[..., D7C - K7:]                     # using D-K as offset (same)
+assert torch.allclose(z_man_C1, z_man_C2), "C1 != C2"
+delta_C  = (z_vis_C - z_man_C1).abs().max().item()
+assert delta_C < 1e-6, f"Case C (full-vocab) delta={delta_C}"
+print(f"  [7c] D={D7C}=latent_shift+K: extract == z[..., latent_shift:], delta={delta_C:.2e} ✓")
+print("[7] extract_visual_logits alignment PASSED ✓")
+
+# =========================================================================
+# Patch (8): flex_attn semantics sanity (mock — no real model)
+# =========================================================================
+print("\n[8] flex_attn semantics sanity (mock) …")
+# Verify that _reset_flex_attn clears offsets and block_mask
+
+class FakeFlexAttn2:
+    def __init__(self):
+        self.offsets = [0, 50, 370]
+        self.block_mask = "mask_obj"
+        self.cu_offsets = torch.tensor([0, 50, 370])
+    def set_offsets_by_lens(self, lens):
+        from itertools import accumulate
+        self.offsets = list(accumulate([0] + lens))
+        self.block_mask = None
+
+class ModelFlex2:
+    def __init__(self):
+        self.flex_attn = FakeFlexAttn2()
+
+m8 = ModelFlex2()
+print(f"  [8] before reset: offsets={m8.flex_attn.offsets}")
+_reset_flex_attn(m8, "m8", verbose=True)
+assert m8.flex_attn.offsets is None
+assert m8.flex_attn.block_mask is None
+assert m8.flex_attn.cu_offsets is None
+print(f"  [8] after reset: offsets={m8.flex_attn.offsets} ✓")
+
+# Verify set_offsets_by_lens changes the offsets
+m8.flex_attn.set_offsets_by_lens([16, 60])
+assert m8.flex_attn.offsets == [0, 16, 76], f"offsets={m8.flex_attn.offsets}"
+_reset_flex_attn(m8, "m8")
+assert m8.flex_attn.offsets is None
+print("  [8] set_offsets_by_lens → reset cycle ✓")
+print("[8] flex_attn semantics sanity PASSED (mock) ✓")
+
+# =========================================================================
+# Patch (9): logp/token reshape consistency
+# =========================================================================
+print("\n[9] logp/token reshape consistency …")
+import math as _math
+
+T9, H9, W9 = 3, 4, 5
+N9, B9, K9 = T9 * H9 * W9, 1, K
+
+torch.manual_seed(99)
+z9 = torch.randn(B9, N9, K9)
+p9 = F.softmax(z9 / 1.0, dim=-1)  # [1, 60, K]
+
+x_hat_flat = torch.multinomial(p9.view(-1, K9), 1)     # [N9, 1]
+x_hat_1d   = x_hat_flat.view(B9, N9)                   # [1, 60]
+x_hat_4d   = x_hat_1d.view(B9, T9, H9, W9)            # [1, 3, 4, 5]
+
+# reshape round-trip
+x_hat_back = x_hat_4d.view(B9, N9)
+assert torch.equal(x_hat_1d, x_hat_back), "reshape round-trip FAILED"
+
+# logp
+logp_all = p9.clamp(1e-8).log().gather(-1, x_hat_1d.unsqueeze(-1)).squeeze(-1)  # [1, 60]
+logp_sum = logp_all.sum(-1)
+
+# 10 spot-checks
+torch.manual_seed(7)
+positions = torch.randperm(N9)[:10].tolist()
+for pos in positions:
+    tok_id   = x_hat_1d[0, pos].item()
+    logp_man = _math.log(max(p9[0, pos, tok_id].item(), 1e-8))
+    logp_gat = logp_all[0, pos].item()
+    diff = abs(logp_man - logp_gat)
+    assert diff < 1e-6, f"pos={pos} tok={tok_id} diff={diff:.2e}"
+
+print(
+    f"  [9] T={T9},H={H9},W={W9}  N={N9}  K={K9}  "
+    f"reshape ✓  10 logp spots ✓  logp_sum={logp_sum.item():.3f}"
+)
+print("[9] logp/token reshape consistency PASSED ✓")
+
+# =========================================================================
+# Summary
+# =========================================================================
+print("\n" + "=" * 70)
+print("ALL 9 PATCHES PASSED ✓")
+print("=" * 70)
+print("""
+Patch summary:
+  (1) Batch-concat [2B]: single forward = identical results, half the calls ✓
+  (2) reward/adv detach: no student grad, REINFORCE still flows via logp ✓
+  (3) float32+log_softmax: KL≥0, KL(p,p)≈0, stable with large logits ✓
+  (3b) guided logits: per-sample trunc, finite, explosion guard ✓
+  (4) Separate loss log: loss_aux_cond/uncond + loss_kd_cond/uncond ✓
+  (5) use_guided [B]: per-sample Bernoulli, correct warmup ramp ✓
+  (6) flex_attn: probe returns None/object, reset clears all fields ✓
+  (7) extract_visual_logits: D==K, D>K, full-vocab paths all verified ✓
+  (8) flex_attn semantics: reset/set cycle correct (no real model needed) ✓
+  (9) logp/token reshape: round-trip exact, 10 logp spot-checks < 1e-6 ✓
+""")
diff --git a/URSA/scripts/train.py b/URSA/scripts/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a402bd255cd7bf2e88d2e3cefe2afcd36b83f1d
--- /dev/null
+++ b/URSA/scripts/train.py
@@ -0,0 +1,110 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Train a diffnext model."""
+
+import json
+import os
+
+from diffnext.engine.train_engine import Trainer
+from diffnext.engine.train_engine import engine_utils
+from diffnext.utils import accelerate_utils
+from diffnext.utils import omegaconf_utils
+
+
+def prepare_checkpoints(config):
+    """Prepare checkpoints for model resuming.
+
+    Args:
+        config (omegaconf.DictConfig)
+            The model config.
+    """
+    config.experiment.setdefault("resume_from_checkpoint", "")
+    ckpt_dir = os.path.abspath(os.path.join(config.experiment.output_dir, "checkpoints"))
+    resume_iter, _ = 0, os.makedirs(ckpt_dir, exist_ok=True)
+    if config.experiment.resume_from_checkpoint == "latest":
+        ckpts = [_ for _ in os.listdir(ckpt_dir) if _.startswith("checkpoint-")]
+        if ckpts:
+            resume_iter, ckpt = sorted((int(_.split("-")[-1]), _) for _ in ckpts)[-1]
+            config.experiment.resume_from_checkpoint = os.path.join(ckpt_dir, ckpt)
+    elif config.experiment.resume_from_checkpoint:
+        resume_iter = int(os.path.split(config.experiment.resume_from_checkpoint).split("-")[-1])
+    config.experiment.resume_iter = resume_iter
+    if resume_iter and not hasattr(config.model, "lora"):  # Override the pretrained path.
+        config.pipeline.paths.pretrained_path = config.experiment.resume_from_checkpoint
+
+
+def prepare_datasets(config, accelerator):
+    """Prepare datasets for model training.
+
+    Args:
+        config (omegaconf.DictConfig)
+            The model config.
+        accelerator (accelerate.Accelerator)
+            The accelerator instance.
+    """
+    dataset = config.train_dataloader.params.dataset
+    metadata = json.load(open(os.path.join(dataset, "METADATA")))
+    config.train_dataloader.params.max_examples = metadata["entries"]
+    if "batch_size" in metadata:
+        batch_size = metadata["batch_size"][accelerator.process_index]
+        bucket_dataset = dataset + "/" + str(accelerator.process_index).zfill(3)
+        config.train_dataloader.params.dataset = bucket_dataset
+        config.train_dataloader.params.batch_size = config.training.batch_size = batch_size
+        if "num_metrics" in metadata:
+            config.training.num_metrics = metadata["num_metrics"]
+    elif "shard_id" not in config.train_dataloader.params:
+        # By default, we use dataset shards across all processes.
+        config.train_dataloader.params.update(accelerate_utils.get_ddp_shards(accelerator))
+
+
+def run_train(config, accelerator, logger):
+    """Start a model training task.
+
+    Args:
+        config (omegaconf.DictConfig)
+            The model config.
+        accelerator (accelerate.Accelerator)
+            The accelerator instance.
+        logger (logging.Logger)
+            The logger instance.
+    """
+    trainer = Trainer(config, accelerator, logger)
+    if accelerator.is_main_process:  # Configs have already been determined.
+        config_path = os.path.join(config.experiment.output_dir, "config.yaml")
+        omegaconf_utils.save_config(config, config_path)
+    logger.info("#Params: %.2fM" % engine_utils.count_params(trainer.model))
+    logger.info("Start training...")
+    trainer.train_loop()
+    trainer.ema.update(trainer.model) if trainer.ema else None
+    trainer.save()
+
+
+def main():
+    """Main entry point."""
+    config = omegaconf_utils.get_config()
+    accelerator = accelerate_utils.build_accelerator(config, log_with="wandb")
+    accelerate_utils.build_wandb(config, accelerator=accelerator)
+    logger = accelerate_utils.set_logger(config.experiment.output_dir, accelerator=accelerator)
+    device_seed = config.training.seed + accelerator.process_index
+    config.training.gpu_id, config.training.seed = accelerator.device.index, device_seed
+    engine_utils.manual_seed(config.training.seed, (config.training.gpu_id, device_seed))
+    prepare_checkpoints(config), prepare_datasets(config, accelerator)
+    logger.info(f"Config:\n{omegaconf_utils.config_to_yaml(config)}")
+    run_train(config, accelerator, logger)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/URSA/scripts/train_distill_dimo.py b/URSA/scripts/train_distill_dimo.py
new file mode 100644
index 0000000000000000000000000000000000000000..529e538dcdd043504fa1dc393357c6b73cdca6c0
--- /dev/null
+++ b/URSA/scripts/train_distill_dimo.py
@@ -0,0 +1,1293 @@
+#!/usr/bin/env python3
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""URSA one-step distillation trainer (DiMO-style), 8-GPU distributed.
+
+Verified native inference regime (from A/B testing — ground truth):
+  height=320, width=512, num_frames=49, guidance_scale=7, teacher_steps=50.
+  no_cfg (guidance_scale=1) is NOT a valid baseline for this URSA checkpoint.
+  Defaults in configs/distill_dimo.yaml are aligned to this regime.
+
+Launch command:
+
+    accelerate launch --config_file accelerate_configs/deepspeed_zero2.yaml \\
+        --machine_rank 0 --num_machines 1 --num_processes 8 \\
+        scripts/train_distill_dimo.py \\
+        config="./configs/distill_dimo.yaml" \\
+        experiment.output_dir="./experiments/distill_dimo" \\
+        distill.teacher_ckpt="/path/to/URSA-1.7B-IBQ1024" \\
+        distill.prompt_source="/data/Koala_36M_*.csv" \\
+        distill.batch_size_per_gpu=1
+
+Smoke test (single-GPU, 50 steps):
+
+    accelerate launch --num_processes 1 \\
+        scripts/train_distill_dimo.py \\
+        config="./configs/distill_dimo.yaml" \\
+        experiment.output_dir="./experiments/smoke" \\
+        distill.teacher_ckpt="/path/to/URSA-1.7B-IBQ1024" \\
+        distill.prompt_source="prompts.txt" \\
+        training.max_train_steps=50
+
+Algorithm summary (9 stages per iteration)
+------------------------------------------
+Stage 1  Tokenize → txt_ids [B, L]  (CPU in worker, moved to GPU in run_step)
+Stage 2  x_init ~ Uniform(K) (+ p_init mixing from x_hat_prev)
+Stage 3  no_grad student(x_init) → x_hat [B, N], logp for PG
+Stage 4  x_t = scheduler.add_noise(x_hat_4d, t)          [B,T,H,W], long
+Stage 5  no_grad teacher(x_t)  → z_T_cond [B,N,K]        (+ uncond if CFG)
+Stage 6  aux update × fake_rounds:  Jeffrey(z_T_target, z_A_cond).backward()
+Stage 7  student KD forward on x_t → z_S_cond [B,N,K]
+Stage 8  reward = -KL(z_T_cond, z_S_cond) [detached]; adv = reward - baseline_ema
+Stage 9  Two-backward:
+           9a  _no_sync_backward(lambda_kd * loss_kd)      [frees KD graph]
+           9b  accelerator.backward(lambda_pg * loss_pg - lambda_ent * H_mean)
+         opt_student.step()
+"""
+
+import collections
+import copy
+import os
+import sys
+from typing import Optional
+
+import torch
+import torch.nn.functional as F
+from torch.utils.data import DataLoader
+
+_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+if _ROOT not in sys.path:
+    sys.path.insert(0, _ROOT)
+
+from diffnext.engine import engine_utils
+from diffnext.engine.lr_scheduler import CosineLR
+from diffnext.pipelines.ursa.pipeline_ursa_distill_dimo import (
+    URSADistillDiMOPipeline,
+    _get_logits,
+    _stable_kl,
+    _stable_jeffrey,
+    _build_guided_logits,
+    _cfg_warmup_prob,
+    _no_sync_backward,
+    _reset_flex_attn,
+    VERIFIED_NATIVE_DEFAULTS,
+    check_verified_regime,
+)
+from diffnext.utils import accelerate_utils
+from diffnext.utils import omegaconf_utils
+from diffnext.utils import profiler
+from src.distill.prompt_dataset import (
+    CSVSpec,
+    InfiniteDataLoader,
+    PromptDataset,
+    make_collate_fn,
+)
+
+
+# ---------------------------------------------------------------------------
+# DistillTwinModel — single nn.Module wrapping student + aux for DeepSpeed
+# ---------------------------------------------------------------------------
+
+
+class DistillTwinModel(torch.nn.Module):
+    """Wrapper that holds both student and aux as sub-modules.
+
+    DeepSpeed (via Accelerate) only allows a single model in
+    ``accelerator.prepare()``.  This container satisfies that constraint
+    while keeping student and aux as separately addressable sub-modules
+    with independent param groups.
+    """
+
+    def __init__(self, student: torch.nn.Module, aux: torch.nn.Module):
+        super().__init__()
+        self.student = student
+        self.aux = aux
+
+    def forward(self, which: str, input_ids, rope_pos=None, **kwargs):
+        if which == "student":
+            return self.student(input_ids, rope_pos=rope_pos, **kwargs)
+        elif which == "aux":
+            return self.aux(input_ids, rope_pos=rope_pos, **kwargs)
+        else:
+            raise ValueError(f"DistillTwinModel: unknown sub-model '{which}'")
+
+
+# ---------------------------------------------------------------------------
+# DistillTrainer
+# ---------------------------------------------------------------------------
+
+class DistillTrainer:
+    """Training orchestrator for on-policy one-step distillation.
+
+    Reuses the same accelerate / logger / checkpoint API as
+    ``diffnext.engine.train_engine.Trainer`` so the distributed setup is
+    identical to the original training framework.
+
+    Key differences from standard Trainer:
+    - Three models (teacher frozen, student + aux trainable)
+    - Student and aux are wrapped in a single ``DistillTwinModel`` so that
+      only one ``accelerator.prepare()`` call is needed (DeepSpeed requirement)
+    - One optimizer with two param_groups: [0]=student, [1]=aux
+    - LR schedulers for both param groups
+    - Two-backward strategy within each step
+    - PromptDataset (no video latents; prompt-only)
+    - Stage 6 freezes student / unfreezes aux; Stages 7-9 do the reverse
+    """
+
+    def __init__(self, config, accelerator, logger):
+        self.config = config
+        self.accelerator = accelerator
+        self.logger = logger
+
+        cfg = config.distill
+        dtype = accelerate_utils.precision_to_dtype(config.training.mixed_precision)
+        self.device = accelerator.device
+
+        # -------- Pipeline (teacher + student + aux) ----------------------
+        logger.info(f"[init] Loading teacher from {cfg.teacher_ckpt} ...")
+        self.pipe = URSADistillDiMOPipeline(
+            teacher_ckpt=cfg.teacher_ckpt,
+            compute_dtype=dtype,
+            aux_noise_std=float(cfg.get("aux_noise_std", 0.0)),
+        )
+
+        # Move teacher to GPU (not prepared by accelerate — frozen).
+        self.pipe.teacher = self.pipe.teacher.to(self.device)
+        self.pipe.scheduler.to(device=self.device)
+
+        # Compute latents shape from video geometry.
+        from src.distill.utils_ursa_inputs import compute_latents_shape
+
+        # Read VAE strides from pipeline (falls back to URSA defaults 4/8).
+        vae_t = int(getattr(self.pipe, "vae_temporal_stride", 4))
+        vae_s = int(getattr(self.pipe, "vae_spatial_stride", 8))
+        self.latents_shape = compute_latents_shape(
+            cfg.num_frames, cfg.height, cfg.width, vae_t, vae_s
+        )
+        T, H, W = self.latents_shape
+        self.N = T * H * W
+        self.K = self.pipe.codebook_size
+        logger.info(
+            f"[init] latents_shape=({T},{H},{W})  N={self.N}  K={self.K}  "
+            f"CFG={'ON' if cfg.enable_teacher_cfg else 'OFF'}"
+        )
+
+        # Pre-compute uncond token IDs (empty string, [1, L]) on CPU.
+        self.txt_uncond_base_cpu = self.pipe.tokenizer(
+            [""],
+            max_length=int(cfg.max_prompt_length),
+            padding="max_length",
+            padding_side="left",
+            truncation=True,
+            return_tensors="pt",
+        ).input_ids  # [1, L] CPU
+
+        # -------- Optimizers (before accelerate.prepare) ------------------
+        # Single optimizer with two param groups:
+        #   group[0] = student params, group[1] = aux params
+        opt_cls = torch.optim.AdamW
+        opt_s_params = dict(
+            lr=float(config.optimizer_student.params.lr),
+            betas=tuple(config.optimizer_student.params.get("betas", [0.9, 0.95])),
+            weight_decay=float(config.optimizer_student.params.get("weight_decay", 0.01)),
+        )
+        opt_a_params = dict(
+            lr=float(config.optimizer_aux.params.lr),
+            betas=tuple(config.optimizer_aux.params.get("betas", [0.9, 0.95])),
+            weight_decay=float(config.optimizer_aux.params.get("weight_decay", 0.01)),
+        )
+
+        def _enable_gcpt(m):
+            # m.model.layers 是 Qwen3Model 的层列表
+            for layer in m.model.layers:
+                layer.gradient_checkpointing = True
+                layer.self_attn.gradient_checkpointing = True
+                layer.mlp.gradient_checkpointing = True
+
+        _enable_gcpt(self.pipe.student)
+        _enable_gcpt(self.pipe.aux)
+        
+        # -------- 断点续传：在 ZeRO-3 切分参数前加载权重 ------------------
+        self.global_step = int(config.experiment.get("resume_iter", 0))
+        if self.global_step > 0:
+            ckpt_dir = os.path.join(
+                config.experiment.output_dir, "checkpoints", f"checkpoint-{self.global_step}"
+            )
+            if os.path.exists(ckpt_dir):
+                logger.info(f"[Resume] 正在从 {ckpt_dir} 恢复 Student 和 Aux 的权重...")
+                # 必须在 map_location="cpu" 下加载，防止爆显存，随后 prepare 会自动分配
+                self.pipe.student.load_state_dict(torch.load(os.path.join(ckpt_dir, "student.pt"), map_location="cpu"))
+                self.pipe.aux.load_state_dict(torch.load(os.path.join(ckpt_dir, "aux.pt"), map_location="cpu"))
+            else:
+                logger.warning(f"[Resume] 找不到检查点 {ckpt_dir}，将从随机初始状态起步！")
+
+        # -------- Wrap student + aux into a single DistillTwinModel --------
+        twin_model = DistillTwinModel(self.pipe.student, self.pipe.aux)
+
+        opt_raw = opt_cls([
+            {"params": list(self.pipe.student.parameters()), **opt_s_params},
+            {"params": list(self.pipe.aux.parameters()), **opt_a_params},
+        ])
+
+        # -------- accelerate.prepare: single model + single optimizer ------
+        # Teacher is NOT prepared (frozen; no grad sync needed).
+        self.model, self.optimizer = accelerator.prepare(twin_model, opt_raw)
+
+        # LR schedulers (step() called manually at end of each step).
+        self.scheduler_s = CosineLR(
+            lr_max=float(config.optimizer_student.params.lr),
+            lr_min=float(config.lr_scheduler.params.get("lr_min", 1e-6)),
+            max_steps=int(config.training.max_train_steps),
+            warmup_steps=int(config.lr_scheduler.params.get("warmup_steps", 500)),
+        )
+        self.scheduler_a = CosineLR(
+            lr_max=float(config.optimizer_aux.params.lr),
+            lr_min=float(config.lr_scheduler.params.get("lr_min", 1e-6)),
+            max_steps=int(config.training.max_train_steps),
+            warmup_steps=int(config.lr_scheduler.params.get("warmup_steps", 500)),
+        )
+
+        # -------- Dataset / DataLoader ------------------------------------
+        dataloader_cfg = config.get("prompt_dataloader", {})
+        dataset = PromptDataset(
+            prompt_source=str(cfg.prompt_source),
+            shuffle_files=bool(dataloader_cfg.get("shuffle_files", True)),
+            shuffle_buffer=int(dataloader_cfg.get("shuffle_buffer", 0)),
+            seed=int(config.training.seed),
+            infinite=True,
+            csv=CSVSpec(caption_field=str(dataloader_cfg.get("caption_field", "caption"))),
+        )
+
+        # collate_fn: tokenize on CPU (no CUDA in workers).
+        collate_fn = make_collate_fn(
+            self.pipe.tokenizer,
+            max_prompt_length=int(cfg.max_prompt_length),
+            device=torch.device("cpu"),  # CPU output — moved to GPU in run_step
+        )
+
+        loader = DataLoader(
+            dataset,
+            batch_size=int(cfg.batch_size_per_gpu),
+            shuffle=False,  # IterableDataset: no shuffle flag
+            drop_last=True,
+            num_workers=int(dataloader_cfg.get("num_workers", 2)),
+            collate_fn=collate_fn,
+            pin_memory=True,
+        )
+        # DataLoader is NOT prepared by accelerate because PromptDataset
+        # handles per-rank file sharding internally via torch.distributed.
+        self._inf_loader = InfiniteDataLoader(loader)
+
+        # -------- Training state ------------------------------------------
+        # self.global_step = int(config.experiment.get("resume_iter", 0))
+        self.baseline_ema: float = 0.0
+        self.x_hat_prev: Optional[torch.Tensor] = None
+        self.metrics = collections.OrderedDict()
+
+        # -------- Verified regime validation --------------------------------
+        native = VERIFIED_NATIVE_DEFAULTS
+        is_native = check_verified_regime(
+            height=int(cfg.height),
+            width=int(cfg.width),
+            num_frames=int(cfg.num_frames),
+            guidance_scale=float(cfg.teacher_cfg_scale) if cfg.enable_teacher_cfg else None,
+            label="train",
+        )
+        logger.info(
+            f"[init] verified_native_regime={is_native}  "
+            f"geometry=({cfg.num_frames}×{cfg.height}×{cfg.width})  "
+            f"teacher_cfg_scale={cfg.teacher_cfg_scale if cfg.enable_teacher_cfg else 'OFF'}"
+        )
+        if not cfg.enable_teacher_cfg:
+            logger.warning(
+                "[WARN] Teacher CFG is DISABLED.  no_cfg is known to produce "
+                "blank/blurry output for this URSA checkpoint.  "
+                "Distillation without CFG is unlikely to produce useful results."
+            )
+        elif float(cfg.teacher_cfg_scale) != native["guidance_scale"]:
+            logger.warning(
+                f"[WARN] teacher_cfg_scale={cfg.teacher_cfg_scale} differs from "
+                f"the verified working value ({native['guidance_scale']}).  "
+                "Outputs may deviate from the official inference working point."
+            )
+
+        logger.info(
+            f"[init] student params: {engine_utils.count_params(self.pipe.student):.2f}M"
+        )
+        logger.info(
+            f"[init] max_train_steps={config.training.max_train_steps}  "
+            f"batch_size_per_gpu={cfg.batch_size_per_gpu}  "
+            f"num_processes={accelerator.num_processes}"
+        )
+
+    # -----------------------------------------------------------------------
+    # run_step: Stages 1-9
+    # -----------------------------------------------------------------------
+
+    def run_step(self, step: int) -> dict:
+        """Execute one distillation step (Stages 1-9)."""
+        cfg = self.config.distill
+        T, H, W = self.latents_shape
+        N, K = self.N, self.K
+        device = self.device
+        stats = {"step": step}
+
+        timer = profiler.Timer().tic()
+
+        # Update LR from cosine schedulers.
+        # param_groups[0] = student, param_groups[1] = aux
+        lr_s = self.scheduler_s.get_lr()
+        lr_a = self.scheduler_a.get_lr()
+        stats["lr_student"] = lr_s
+        stats["lr_aux"] = lr_a
+        self.optimizer.param_groups[0]["lr"] = lr_s
+        self.optimizer.param_groups[1]["lr"] = lr_a
+
+        # ----------------------------------------------------------------
+        # Stage 1: Get tokenised batch (CPU → GPU)
+        # ----------------------------------------------------------------
+        txt_ids = next(self._inf_loader)      # [B, L] CPU tensor
+        txt_ids = txt_ids.to(device, non_blocking=True)
+        B = txt_ids.size(0)
+
+        txt_uncond = None
+        if cfg.enable_teacher_cfg:
+            txt_uncond = self.txt_uncond_base_cpu.expand(B, -1).to(device)
+
+        # # ----------------------------------------------------------------
+        # # Stage 2: Sample x_init ~ Uniform(K) with optional p_init mixing
+        # # ----------------------------------------------------------------
+        # x_init = torch.randint(0, K, (B, T, H, W), device=device, dtype=torch.long)
+        # if self.x_hat_prev is not None and float(cfg.p_init_mix_ratio) > 0:
+        #     n_mix = max(1, int(B * float(cfg.p_init_mix_ratio)))
+        #     x_init[:n_mix] = self.pipe.corrupt_tokens(
+        #         self.x_hat_prev[:n_mix], r=float(cfg.p_mix_corrupt_frac)
+        #     )
+        # ----------------------------------------------------------------
+        # Stage 2: Sample x_init ~ Uniform(K) with optional p_init mixing
+        # ----------------------------------------------------------------
+        x_init = torch.randint(0, K, (B, T, H, W), device=device, dtype=torch.long)
+        
+        # 修复：使用概率触发，确保小 Batch 时模型依然能充分学习处理纯噪声
+        if self.x_hat_prev is not None and float(cfg.p_init_mix_ratio) > 0:
+            if torch.rand(1).item() < float(cfg.p_init_mix_ratio):
+                # 如果触发，只混合 batch 里的第一个样本
+                x_init[0] = self.pipe.corrupt_tokens(
+                    self.x_hat_prev[0:1], r=float(cfg.p_mix_corrupt_frac)
+                ).squeeze(0)
+
+        # ----------------------------------------------------------------
+        # Stage 3: Student 1-step on x_init — no_grad (only sample x_hat)
+        #
+        # Gradient-enabled forward on x_init is deferred to Stage 9b so
+        # the KD computation graph (Stage 7, x_t) can be freed first.
+        # ----------------------------------------------------------------
+        with torch.no_grad():
+            ids_init, rpos_init, _ = self.pipe.build_inputs(
+                txt_ids, x_init, self.latents_shape
+            )
+            logits_s_init = _get_logits(
+                self.model("student", ids_init, rope_pos=rpos_init)
+            )
+            z_s = self.pipe.extract_logits(logits_s_init, N)    # [B, N, K]
+            p_s = F.softmax(z_s / float(cfg.tau), dim=-1)       # [B, N, K]
+            x_hat = torch.multinomial(p_s.view(-1, K), 1).view(B, N)  # [B, N]
+            
+            # if step == 1:
+            #     # 只抽 8 个 token 做 sum=1 检查，别全量
+            #     idx = torch.randint(0, N, (8,), device=device)
+            #     p_err = (p_s[:, idx].sum(-1) - 1).abs().max().item()
+            #     assert p_err < 1e-3, f"p_s subset not normalised: {p_err}"
+            del p_s, z_s, logits_s_init
+
+        x_hat_4d = x_hat.view(B, T, H, W)
+
+        # ----------------------------------------------------------------
+        # Stage 4: Pseudo-intermediate  x_t = add_noise(x_hat, t)
+        # ----------------------------------------------------------------
+        t = self.pipe.sample_t_curriculum(
+            B, device, step, int(cfg.t_curriculum_steps)
+        )  # [B] float ∈ (0.05, 0.995)
+        with torch.no_grad():
+            x_t = self.pipe.scheduler.add_noise(x_hat_4d, t)  # [B,T,H,W] long
+
+        # # ----------------------------------------------------------------
+        # # Stage 5: Teacher forward — single [2B] forward when CFG enabled
+        # # ----------------------------------------------------------------
+        # with torch.no_grad():
+        #     if cfg.enable_teacher_cfg:
+        #         txt_dual = torch.cat([txt_ids, txt_uncond], dim=0)   # [2B, L]
+        #         x_t_dual = torch.cat([x_t, x_t], dim=0)               # [2B,T,H,W]
+        #         ids_dual, rpos_dual, _ = self.pipe.build_inputs(
+        #             txt_dual, x_t_dual, self.latents_shape
+        #         )
+        #         logits_T_dual = _get_logits(
+        #             self.pipe.teacher(ids_dual, rope_pos=rpos_dual)
+        #         )
+        #         z_T_dual = self.pipe.extract_logits(logits_T_dual, N)  # [2B,N,K]
+        #         z_T_cond, z_T_uncond = z_T_dual.chunk(2, dim=0)         # [B,N,K]
+                
+        #         del logits_T_dual, z_T_dual 
+        #         torch.cuda.empty_cache()
+                
+        #         ids_t, rpos_t = ids_dual[:B], rpos_dual[:B]
+        #     else:
+        #         ids_t, rpos_t, _ = self.pipe.build_inputs(
+        #             txt_ids, x_t, self.latents_shape
+        #         )
+        #         logits_T = _get_logits(
+        #             self.pipe.teacher(ids_t, rope_pos=rpos_t)
+        #         )
+        #         z_T_cond = self.pipe.extract_logits(logits_T, N)  # [B,N,K]
+        #         z_T_uncond = None
+        #         ids_dual, rpos_dual = ids_t, rpos_t
+
+        # # CFG guided target with per-sample Bernoulli warmup.
+        # z_T_guided = None
+        # use_guided_ratio = 0.0
+        # if cfg.enable_teacher_cfg:
+        #     p_guided = _cfg_warmup_prob(
+        #         step,
+        #         float(cfg.teacher_cfg_prob),
+        #         int(cfg.teacher_cfg_warmup_steps),
+        #     )
+        #     use_guided = torch.rand(B, device=device) < p_guided  # [B] bool
+        #     use_guided_ratio = float(use_guided.float().mean().item())
+        #     z_T_guided = _build_guided_logits(
+        #         z_T_cond, z_T_uncond,
+        #         t, float(cfg.teacher_cfg_scale), float(cfg.teacher_cfg_trunc),
+        #     )
+        #     mask = use_guided.view(-1, 1, 1).expand_as(z_T_cond)
+        #     z_T_target = torch.where(mask, z_T_guided, z_T_cond.float())
+        # else:
+        #     z_T_target = z_T_cond
+
+        # z_T_target = z_T_target.detach()  # NO grad path to teacher
+
+        # # # ----------------------------------------------------------------
+        # # # Stage 6: Aux update — fake_rounds iterations
+        # # #
+        # # # Freeze student so only aux gets gradients.  With a single
+        # # # DeepSpeed-wrapped optimizer this is the cleanest way to ensure
+        # # # only aux params are updated.
+        # # # ----------------------------------------------------------------
+        # # raw_twin = self.accelerator.unwrap_model(self.model)
+        # # raw_twin.student.requires_grad_(False)
+        # # raw_twin.aux.requires_grad_(True)
+
+        # # loss_aux_cond_last = torch.tensor(0.0, device=device)
+        # # loss_aux_uncond_last = torch.tensor(0.0, device=device)
+        # # loss_aux_cond_sample_last = None
+
+        # # for _fr in range(int(cfg.fake_rounds)):
+        # #     self.optimizer.zero_grad(set_to_none=True)
+
+        # #     if cfg.enable_teacher_cfg:
+        # #         logits_A_dual = _get_logits(
+        # #             self.model("aux", ids_dual.detach(), rope_pos=rpos_dual.detach())
+        # #         )
+        # #         z_A_dual = self.pipe.extract_logits(logits_A_dual, N)   # [2B,N,K]
+        # #         z_A_cond, z_A_uncond = z_A_dual.chunk(2, dim=0)
+
+        # #         loss_aux_cond_sample = _stable_jeffrey(
+        # #             z_T_target, z_A_cond, float(cfg.tau_kd),chunk_size=1024
+        # #         )                                                        # [B]
+        # #         loss_aux_cond_v = loss_aux_cond_sample.mean()
+        # #         loss_aux_uncond_v = _stable_jeffrey(
+        # #             z_T_uncond.float().detach(), z_A_uncond, float(cfg.tau_kd),chunk_size=1024
+        # #         ).mean()
+        # #         loss_aux_v = (
+        # #             loss_aux_cond_v
+        # #             + float(cfg.lambda_kd_uncond) * loss_aux_uncond_v
+        # #         )
+        # #     else:
+        # #         logits_A = _get_logits(
+        # #             self.model("aux", ids_t.detach(), rope_pos=rpos_t.detach())
+        # #         )
+        # #         z_A_cond = self.pipe.extract_logits(logits_A, N)
+        # #         loss_aux_cond_sample = _stable_jeffrey(
+        # #             z_T_target, z_A_cond, float(cfg.tau_kd),chunk_size=1024
+        # #         )
+        # #         loss_aux_cond_v = loss_aux_cond_sample.mean()
+        # #         loss_aux_uncond_v = torch.tensor(0.0, device=device)
+        # #         loss_aux_v = loss_aux_cond_v
+
+        # #     self.accelerator.backward(loss_aux_v)
+        # #     if float(cfg.grad_clip) > 0:
+        # #         torch.nn.utils.clip_grad_norm_(
+        # #             raw_twin.aux.parameters(), float(cfg.grad_clip)
+        # #         )
+        # #     self.optimizer.step()
+        # #     self.optimizer.zero_grad(set_to_none=True)
+
+        # # loss_aux_cond_last = loss_aux_cond_v.detach()
+        # # loss_aux_uncond_last = loss_aux_uncond_v.detach()
+        # # loss_aux_cond_sample_last = loss_aux_cond_sample.detach()  # [B]
+
+        # # # ----------------------------------------------------------------
+        # # # Stage 7: Student KD forward on x_t (with grad)
+        # # #
+        # # # Switch: freeze aux, unfreeze student for Stages 7-9.
+        # # # ----------------------------------------------------------------
+        # # raw_twin.student.requires_grad_(True)
+        # # raw_twin.aux.requires_grad_(False)
+        # # self.optimizer.zero_grad(set_to_none=True)
+
+        # # if cfg.enable_teacher_cfg:
+        # #     logits_S_dual = _get_logits(
+        # #         self.model("student", ids_dual.detach(), rope_pos=rpos_dual.detach())
+        # #     )
+        # #     z_S_dual = self.pipe.extract_logits(logits_S_dual, N)
+        # #     z_S_cond, z_S_uncond = z_S_dual.chunk(2, dim=0)
+        # #     loss_kd_cond = _stable_kl(
+        # #         z_T_target, z_S_cond, float(cfg.tau_kd), chunk_size=2048
+        # #     ).mean()
+        # #     loss_kd_uncond = _stable_kl(
+        # #         z_T_uncond.float().detach(), z_S_uncond, float(cfg.tau_kd), chunk_size=2048
+        # #     ).mean()
+        # #     loss_kd = loss_kd_cond + float(cfg.lambda_kd_uncond) * loss_kd_uncond
+        # # else:
+        # #     logits_S = _get_logits(
+        # #         self.model("student", ids_t.detach(), rope_pos=rpos_t.detach())
+        # #     )
+        # #     z_S_cond = self.pipe.extract_logits(logits_S, N)
+        # #     loss_kd_cond = _stable_kl(
+        # #         z_T_target, z_S_cond, float(cfg.tau_kd), chunk_size=2048
+        # #     ).mean()
+        # #     loss_kd_uncond = torch.tensor(0.0, device=device)
+        # #     loss_kd = loss_kd_cond
+
+        # # # ----------------------------------------------------------------
+        # # # Stage 8: Reward + advantage (fully detached — no student grad)
+        # # #
+        # # # INVARIANT: reward and adv must never carry student gradients.
+        # # # ----------------------------------------------------------------
+        # # if cfg.enable_teacher_cfg and cfg.reward_use_guided:
+        # #     z_T_for_rew = z_T_target          # already detached
+        # # else:
+        # #     z_T_for_rew = z_T_cond.detach()
+
+        # # # reward[b] = -KL(z_T_cond || z_S_cond)  with BOTH inputs detached
+        # # with torch.no_grad():
+        # #     reward = -_stable_kl(
+        # #         z_T_for_rew.detach(), z_S_cond.detach(), float(cfg.tau), chunk_size=1024
+        # #     )  # [B]
+        # # assert not reward.requires_grad, (
+        # #     "[BUG] reward.requires_grad=True — student grad leaked into reward. "
+        # #     "z_S_cond must be detached before KL for reward."
+        # # )
+        # # self.baseline_ema = (
+        # #     0.99 * self.baseline_ema + 0.01 * float(reward.mean().item())
+        # # )
+        # # adv = (reward - self.baseline_ema).detach()  # [B]
+        # # assert not adv.requires_grad, "[BUG] adv.requires_grad=True"
+
+        # # # ----------------------------------------------------------------
+        # # # Stage 9: Two-backward student update
+        # # #
+        # # # 9a) KD backward first — frees the KD graph to save memory.
+        # # #     Uses no_sync() (no DDP all-reduce) so gradients are not
+        # # #     double-reduced when the PG backward syncs in 9b.
+        # # # 9b) Fresh forward on x_init WITH grad → PG + entropy backward.
+        # # #     DDP all-reduce happens here (normal backward).
+        # # # ----------------------------------------------------------------
+
+        # # # 9a: KD backward (no sync — first of two backwards)
+        # # _no_sync_backward(
+        # #     self.accelerator, self.model, float(cfg.lambda_kd) * loss_kd
+        # # )
+
+        # # # 9b: Policy + entropy — fresh forward on x_init WITH grad
+        # # ids_init, rpos_init, _ = self.pipe.build_inputs(
+        # #     txt_ids, x_init, self.latents_shape
+        # # )
+        # # logits_s_pol = _get_logits(
+        # #     self.model("student", ids_init, rope_pos=rpos_init)
+        # # )
+        # # z_s_pol = self.pipe.extract_logits(logits_s_pol, N)      # [B, N, K]
+
+        # # logp_tok = F.log_softmax(z_s_pol / float(cfg.tau), dim=-1)  # [B, N, K]
+        # # p_s_pol = logp_tok.exp()
+
+        # # # per-token average log-prob (recommended over log-prob sum)
+        # # logp_sum = (
+        # #     logp_tok.gather(-1, x_hat.unsqueeze(-1)).squeeze(-1).sum(-1)
+        # # )  # [B]
+        # # logp = logp_sum / N  # [B] per-token logp
+
+        # # H_mean = -(p_s_pol * logp_tok).sum(-1).mean()
+
+        # # loss_pg = -(adv * logp).mean()
+        # # lambda_ent_eff = float(cfg.lambda_ent) * (1.0 + 2.0 * use_guided_ratio)
+
+        # # # Second backward: DDP all-reduce happens here.
+        # # self.accelerator.backward(
+        # #     float(cfg.lambda_pg) * loss_pg - lambda_ent_eff * H_mean
+        # # )
+
+        # # if float(cfg.grad_clip) > 0:
+        # #     torch.nn.utils.clip_grad_norm_(
+        # #         raw_twin.student.parameters(), float(cfg.grad_clip)
+        # #     )
+        # # self.optimizer.step()
+
+        # # # Restore both sub-modules to trainable for next step.
+        # # raw_twin.student.requires_grad_(True)
+        # # raw_twin.aux.requires_grad_(True)
+
+        # # # p_init mixing: store x_hat_4d (detached) for next step.
+        # # self.x_hat_prev = x_hat_4d.detach()
+        
+        # # ----------------------------------------------------------------
+        # # Stage 6: Aux update — Fit sampled pseudo-target (x_hat) from student
+        # # ----------------------------------------------------------------
+        # raw_twin = self.accelerator.unwrap_model(self.model)
+        # raw_twin.student.requires_grad_(False)
+        # raw_twin.aux.requires_grad_(True)
+
+        # target_tokens = x_hat.detach()   # [B, N] - 学生在 Stage 3 盲猜出来的画面
+
+        # for _fr in range(int(cfg.fake_rounds)):
+        #     self.optimizer.zero_grad(set_to_none=True)
+
+        #     if cfg.enable_teacher_cfg:
+        #         logits_A_dual = _get_logits(
+        #             self.model("aux", ids_dual.detach(), rope_pos=rpos_dual.detach())
+        #         )
+        #         z_A_dual = self.pipe.extract_logits(logits_A_dual, N)   # [2B,N,K]
+        #         z_A_cond, z_A_uncond = z_A_dual.chunk(2, dim=0)
+
+        #         # Aux 拟合学生的假 token (Cross Entropy)
+        #         loss_aux_cond_v = F.cross_entropy(
+        #             z_A_cond.reshape(B * N, K),
+        #             target_tokens.reshape(B * N),
+        #             reduction="mean",
+        #         )
+        #         loss_aux_uncond_v = F.cross_entropy(
+        #             z_A_uncond.reshape(B * N, K),
+        #             target_tokens.reshape(B * N),
+        #             reduction="mean",
+        #         )
+        #         loss_aux_v = loss_aux_cond_v + float(cfg.lambda_kd_uncond) * loss_aux_uncond_v
+        #     else:
+        #         logits_A = _get_logits(
+        #             self.model("aux", ids_t.detach(), rope_pos=rpos_t.detach())
+        #         )
+        #         z_A_cond = self.pipe.extract_logits(logits_A, N)
+
+        #         loss_aux_cond_v = F.cross_entropy(
+        #             z_A_cond.reshape(B * N, K),
+        #             target_tokens.reshape(B * N),
+        #             reduction="mean",
+        #         )
+        #         loss_aux_uncond_v = torch.tensor(0.0, device=device)
+        #         loss_aux_v = loss_aux_cond_v
+
+        #     self.accelerator.backward(loss_aux_v)
+
+        #     if float(cfg.grad_clip) > 0:
+        #         torch.nn.utils.clip_grad_norm_(
+        #             raw_twin.aux.parameters(), float(cfg.grad_clip)
+        #         )
+        #     self.optimizer.step()
+
+        # loss_aux_cond_last = loss_aux_cond_v.detach()
+
+        # # ----------------------------------------------------------------
+        # # Stage 7 & 8: Student KD update & Aux Bridge (Gradient Injection)
+        # # ----------------------------------------------------------------
+        # raw_twin.student.requires_grad_(True)
+        # raw_twin.aux.requires_grad_(False)
+        # self.optimizer.zero_grad(set_to_none=True)
+
+        # # 7a. Student KD forward on x_t (保持原样)
+        # if cfg.enable_teacher_cfg:
+        #     logits_S_dual = _get_logits(
+        #         self.model("student", ids_dual.detach(), rope_pos=rpos_dual.detach())
+        #     )
+        #     z_S_dual = self.pipe.extract_logits(logits_S_dual, N)
+        #     z_S_cond, z_S_uncond = z_S_dual.chunk(2, dim=0)
+            
+        #     # --- [新增] 立刻释放显存 ---
+        #     del logits_S_dual, z_S_dual
+            
+        #     loss_kd_cond = _stable_kl(
+        #         z_T_target, z_S_cond, float(cfg.tau_kd), chunk_size=256 #2048
+        #     ).mean()
+        #     loss_kd_uncond = _stable_kl(
+        #         z_T_uncond.float().detach(), z_S_uncond, float(cfg.tau_kd), chunk_size=256 #2048
+        #     ).mean()
+        #     loss_kd = loss_kd_cond + float(cfg.lambda_kd_uncond) * loss_kd_uncond
+        # else:
+        #     logits_S = _get_logits(
+        #         self.model("student", ids_t.detach(), rope_pos=rpos_t.detach())
+        #     )
+        #     z_S_cond = self.pipe.extract_logits(logits_S, N)
+        #     loss_kd_cond = _stable_kl(
+        #         z_T_target, z_S_cond, float(cfg.tau_kd), chunk_size=256 #2048
+        #     ).mean()
+        #     loss_kd_uncond = torch.tensor(0.0, device=device)
+        #     loss_kd = loss_kd_cond
+
+        # # 7b. 获取 Aux 的预测 (无梯度) 作为计算桥梁
+        # with torch.no_grad():
+        #     if cfg.enable_teacher_cfg:
+        #         logits_A_dual = _get_logits(
+        #             self.model("aux", ids_dual.detach(), rope_pos=rpos_dual.detach())
+        #         )
+        #         z_A_dual = self.pipe.extract_logits(logits_A_dual, N)
+        #         z_A_cond, _ = z_A_dual.chunk(2, dim=0)
+                
+        #         # --- [新增] 立刻释放显存 ---
+        #         del logits_A_dual, z_A_dual
+        #     else:
+        #         logits_A = _get_logits(
+        #             self.model("aux", ids_t.detach(), rpos_t.detach())
+        #         )
+        #         z_A_cond = self.pipe.extract_logits(logits_A, N)
+
+        # # 8. Student 对初始噪声 x_init 进行带梯度的前向传播
+        # ids_init, rpos_init, _ = self.pipe.build_inputs(
+        #     txt_ids, x_init, self.latents_shape
+        # )
+        # logits_s_pol = _get_logits(
+        #     self.model("student", ids_init, rope_pos=rpos_init)
+        # )
+        # z_s_pol = self.pipe.extract_logits(logits_s_pol, N)
+
+        # # --- 核心数学修正：将 Logits 转换为概率，防止梯度爆炸 ---
+        # p_T = F.softmax(z_T_target / float(cfg.tau_kd), dim=-1)
+        # p_A = F.softmax(z_A_cond / float(cfg.tau_kd), dim=-1)
+        
+        # # 目标方向：Teacher 概率 - Aux 概率 (遵循论文公式推导)
+        # bridge_target = (p_T - p_A).detach()
+
+        # # 利用 MSE Trick 强制注入梯度
+        # loss_bridge = 0.5 * F.mse_loss(
+        #     z_s_pol.float(),
+        #     (z_s_pol.float() + bridge_target).detach()
+        # )
+
+        # # 9. 单次反向传播 (合并 KD 和 Bridge)
+        # # 借用原来的 lambda_pg 参数来控制 bridge 损失的权重
+        # loss_student = float(cfg.lambda_kd) * loss_kd + float(cfg.lambda_pg) * loss_bridge
+        # self.accelerator.backward(loss_student)
+
+        # if float(cfg.grad_clip) > 0:
+        #     torch.nn.utils.clip_grad_norm_(
+        #         raw_twin.student.parameters(), float(cfg.grad_clip)
+        #     )
+        # self.optimizer.step()
+
+        # # 恢复两者的可训练状态
+        # raw_twin.student.requires_grad_(True)
+        # raw_twin.aux.requires_grad_(True)
+
+        # # --- 兼容原始日志输出的占位符 ---
+        # H_mean = torch.tensor(0.0, device=device)
+        # loss_pg = loss_bridge.detach()  # 将 bridge 损失映射给 pg 显示
+        # logp = torch.tensor(0.0, device=device)
+        # self.baseline_ema = 0.0
+        
+        # ----------------------------------------------------------------
+        # Stage 5: Teacher forward — 破除视图死锁，生成目标后立刻释放
+        # ----------------------------------------------------------------
+        with torch.no_grad():
+            if cfg.enable_teacher_cfg:
+                txt_dual = torch.cat([txt_ids, txt_uncond], dim=0)   # [2B, L]
+                x_t_dual = torch.cat([x_t, x_t], dim=0)              # [2B,T,H,W]
+                ids_dual, rpos_dual, _ = self.pipe.build_inputs(
+                    txt_dual, x_t_dual, self.latents_shape
+                )
+                logits_T_dual = _get_logits(
+                    self.pipe.teacher(ids_dual, rope_pos=rpos_dual)
+                )
+                z_T_dual = self.pipe.extract_logits(logits_T_dual, N)  # [2B,N,K]
+                
+                # 【显存救星 1】使用 .clone() 打断视图依赖，使得原始巨型张量可以被回收
+                z_T_cond = z_T_dual[0:1].clone()     # [1,N,K]
+                z_T_uncond = z_T_dual[1:2].clone()   # [1,N,K]
+                ids_t, rpos_t = ids_dual[:B], rpos_dual[:B]
+                
+                # 立刻释放 17 GB 的双路缓冲
+                del logits_T_dual, z_T_dual
+                torch.cuda.empty_cache()
+            else:
+                ids_t, rpos_t, _ = self.pipe.build_inputs(txt_ids, x_t, self.latents_shape)
+                logits_T = _get_logits(self.pipe.teacher(ids_t, rope_pos=rpos_t))
+                z_T_cond = self.pipe.extract_logits(logits_T, N)
+                z_T_uncond = None
+
+        # 计算 CFG guided target
+        z_T_guided = None
+        use_guided_ratio = 0.0
+        if cfg.enable_teacher_cfg:
+            p_guided = _cfg_warmup_prob(step, float(cfg.teacher_cfg_prob), int(cfg.teacher_cfg_warmup_steps))
+            use_guided = torch.rand(B, device=device) < p_guided
+            use_guided_ratio = float(use_guided.float().mean().item())
+            
+            z_T_guided = _build_guided_logits(
+                z_T_cond, z_T_uncond,
+                t, float(cfg.teacher_cfg_scale), float(cfg.teacher_cfg_trunc),
+            )
+            mask = use_guided.view(-1, 1, 1).expand_as(z_T_cond)
+            # 【显存救星 2】保持为 bf16 类型，避免膨胀到 8.5GB
+            z_T_target = torch.where(mask, z_T_guided, z_T_cond).to(dtype=z_T_cond.dtype).detach()
+            
+            # 立刻清理所有中间推导变量
+            del z_T_cond, z_T_uncond, z_T_guided
+            torch.cuda.empty_cache()
+        else:
+            z_T_target = z_T_cond.detach()
+
+        # ----------------------------------------------------------------
+        # Stage 6: Aux update — 【显存救星 3】强行降维为单路前向传播 (Batch=1)
+        # ----------------------------------------------------------------
+        raw_twin = self.accelerator.unwrap_model(self.model)
+        raw_twin.student.requires_grad_(False)
+        raw_twin.aux.requires_grad_(True)
+
+        target_tokens = x_hat.detach()
+
+        for _fr in range(int(cfg.fake_rounds)):
+            self.optimizer.zero_grad(set_to_none=True)
+
+            # 只处理单路 ids_t，不处理 dual，砍掉 Aux 50% 显存！
+            logits_A = _get_logits(
+                self.model("aux", ids_t.detach(), rope_pos=rpos_t.detach())
+            )
+            z_A_cond = self.pipe.extract_logits(logits_A, N)
+
+            loss_aux_cond_v = F.cross_entropy(
+                z_A_cond.reshape(B * N, K),
+                target_tokens.reshape(B * N),
+                reduction="mean",
+            )
+            
+            self.accelerator.backward(loss_aux_cond_v)
+            if float(cfg.grad_clip) > 0:
+                torch.nn.utils.clip_grad_norm_(raw_twin.aux.parameters(), float(cfg.grad_clip))
+            self.optimizer.step()
+            
+            # 必须立刻释放
+            del logits_A, z_A_cond
+            torch.cuda.empty_cache()
+
+        loss_aux_cond_last = loss_aux_cond_v.detach()
+
+        # ----------------------------------------------------------------
+        # Stage 7 & 8: Student KD update & Aux Bridge
+        # ----------------------------------------------------------------
+        raw_twin.student.requires_grad_(True)
+        raw_twin.aux.requires_grad_(False)
+        self.optimizer.zero_grad(set_to_none=True)
+
+        # 7a. Student KD (强行降维为单路前向传播 Batch=1)
+        logits_S = _get_logits(
+            self.model("student", ids_t.detach(), rope_pos=rpos_t.detach())
+        )
+        z_S_cond = self.pipe.extract_logits(logits_S, N)
+        
+        # 使用 128 chunk size，确保极致安全
+        loss_kd = _stable_kl(
+            z_T_target, z_S_cond, float(cfg.tau_kd), chunk_size=128
+        ).mean()
+
+        del logits_S, z_S_cond
+        torch.cuda.empty_cache()
+
+        # 7b. 获取 Aux 的预测作为桥梁 (依然单路)
+        with torch.no_grad():
+            logits_A = _get_logits(
+                self.model("aux", ids_t.detach(), rope_pos=rpos_t.detach())
+            )
+            z_A_cond = self.pipe.extract_logits(logits_A, N)
+
+        # 8. Student 对 x_init 进行前向传播
+        ids_init, rpos_init, _ = self.pipe.build_inputs(txt_ids, x_init, self.latents_shape)
+        logits_s_pol = _get_logits(
+            self.model("student", ids_init, rope_pos=rpos_init)
+        )
+        z_s_pol = self.pipe.extract_logits(logits_s_pol, N)
+
+        # 【显存救星 4】在 bf16 精度下计算 Softmax 概率，防止 float32 炸存
+        p_T = F.softmax(z_T_target / float(cfg.tau_kd), dim=-1).to(z_s_pol.dtype)
+        p_A = F.softmax(z_A_cond / float(cfg.tau_kd), dim=-1).to(z_s_pol.dtype)
+        
+        bridge_target = (p_T - p_A).detach()
+
+        # 拿到 bridge_target 后，前面所有百兆甚至 G 级的张量统统干掉
+        del p_T, p_A, logits_A, z_A_cond, z_T_target
+        torch.cuda.empty_cache()
+
+        # 伪梯度注入
+        loss_bridge = 0.5* K * F.mse_loss(
+            z_s_pol.float(),
+            (z_s_pol.float() + bridge_target.float()).detach()
+        )
+
+        # 9. 统一反向传播
+        loss_student = float(cfg.lambda_kd) * loss_kd + float(cfg.lambda_pg) * loss_bridge
+        self.accelerator.backward(loss_student)
+
+        if float(cfg.grad_clip) > 0:
+            torch.nn.utils.clip_grad_norm_(raw_twin.student.parameters(), float(cfg.grad_clip))
+        self.optimizer.step()
+
+        # 恢复状态
+        raw_twin.student.requires_grad_(True)
+        raw_twin.aux.requires_grad_(True)
+        
+        # 最后的清理
+        del logits_s_pol, z_s_pol, bridge_target
+        torch.cuda.empty_cache()
+
+        H_mean = torch.tensor(0.0, device=device)
+        loss_pg = loss_bridge.detach()
+        logp = torch.tensor(0.0, device=device)
+        self.baseline_ema = 0.0
+
+        # Advance LR schedulers.
+        self.scheduler_s.step()
+        self.scheduler_a.step()
+
+        # ----------------------------------------------------------------
+        # Step 1 sanity assertions (lightweight; runs only at step 1)
+        # ----------------------------------------------------------------
+        # if step == 1:
+        #     self._step1_assertions(
+        #         x_init, ids_init, rpos_init, z_s, p_s, logp,
+        #         z_T_cond, z_S_cond, x_t, B, T, H, W,
+        #     )
+
+        # ----------------------------------------------------------------
+        # Token-level collapse detection
+        # ----------------------------------------------------------------
+        tok_entropy = self._token_entropy(x_hat)
+        if not hasattr(self, "_init_tok_entropy"):
+            self._init_tok_entropy = tok_entropy
+        collapse_frac = float(cfg.get("collapse_warn_frac", 0.2))
+        if tok_entropy < collapse_frac * self._init_tok_entropy:
+            self.logger.warning(
+                f"[COLLAPSE] step={step}  tok_H={tok_entropy:.3f}  "
+                f"init={self._init_tok_entropy:.3f}  "
+                f"ratio={tok_entropy / max(self._init_tok_entropy, 1e-8):.2f} "
+                f"< {collapse_frac}. Try increasing lambda_ent."
+            )
+
+        stats["time"] = timer.toc()
+        stats["metrics"] = collections.OrderedDict(
+            sorted(
+                {
+                    "loss_aux_cond": float(loss_aux_cond_last.item()),
+                    "loss_kd_cond": float(loss_kd.item()),
+                    "loss_pg": float(loss_pg.item()),
+                    "H_mean": float(H_mean.item()),
+                    "tok_entropy": float(tok_entropy),
+                    "mean_logp_tok": float(logp.mean().item()),
+                    "baseline_ema": float(self.baseline_ema),
+                    "use_guided_ratio": float(use_guided_ratio),
+                }.items()
+            )
+        )
+        return stats
+
+    # -----------------------------------------------------------------------
+    # Train loop
+    # -----------------------------------------------------------------------
+
+    def train_loop(self):
+        """Main training loop (mirrors diffnext.engine.train_engine.Trainer)."""
+        cfg_exp = self.config.experiment
+        max_steps = int(self.config.training.max_train_steps)
+        log_every = int(cfg_exp.log_every)
+        save_every = int(cfg_exp.save_every)
+
+        self.global_step = int(self.config.experiment.get("resume_iter", 0))
+        # Sync LR schedulers to resume step (set _step_count directly;
+        # CosineLR uses _step_count internally in get_decay()).
+        self.scheduler_s._step_count = self.global_step
+        self.scheduler_a._step_count = self.global_step
+        
+        # [可选补充] 如果是续传，让 accelerator 自动恢复被切分的 Optimizer 等状态
+        if self.global_step > 0:
+            ckpt_dir = os.path.join(self.config.experiment.output_dir, "checkpoints", f"checkpoint-{self.global_step}")
+            if os.path.exists(ckpt_dir):
+                self.accelerator.load_state(ckpt_dir)
+                self.logger.info(f"✅ ZeRO-3 完整状态 (包含 Optimizer) 已从 {ckpt_dir} 恢复")
+
+        timer = profiler.Timer()
+        self.logger.info(
+            f"[train] Starting from step {self.global_step} / {max_steps}"
+        )
+
+        while self.global_step < max_steps:
+            self.global_step += 1
+            with timer.tic_and_toc():
+                stats = self.run_step(self.global_step)
+            self._add_metrics(stats)
+
+            if self.global_step % log_every == 0:
+                self._log_metrics(stats)
+
+            if self.global_step % (10 * log_every) == 0:
+                self.logger.info(
+                    profiler.get_progress(timer, self.global_step, max_steps)
+                )
+
+            if self.global_step % save_every == 0:
+                self.save(self.global_step)
+
+        # Final log + save (only when loop ran at least one step).
+        if self.global_step > int(self.config.experiment.get("resume_iter", 0)):
+            self._log_metrics({**stats, "step": self.global_step})  # noqa: F821
+        self.accelerator.wait_for_everyone()
+        self.save(self.global_step, suffix="final")
+        self.accelerator.end_training()
+
+    # -----------------------------------------------------------------------
+    # Checkpoint helpers
+    # -----------------------------------------------------------------------
+
+    # def save(self, step: int, suffix: str = None) -> None:
+    #     """Save student + aux state_dicts (rank0 only).
+
+    #     Saved as:
+    #         <output_dir>/checkpoints/checkpoint-<step>/student.pt
+    #         <output_dir>/checkpoints/checkpoint-<step>/aux.pt
+
+    #     The student.pt can be used for inference by replacing the
+    #     transformer weights in a URSAPipeline (see README).
+    #     """
+    #     if not self.accelerator.is_main_process:
+    #         return
+
+    #     folder = f"checkpoint-{suffix}" if suffix else f"checkpoint-{step}"
+    #     ckpt_dir = os.path.join(
+    #         self.config.experiment.output_dir, "checkpoints", folder
+    #     )
+    #     os.makedirs(ckpt_dir, exist_ok=True)
+
+    #     raw_student = self.accelerator.unwrap_model(self.model).student
+    #     raw_aux = self.accelerator.unwrap_model(self.model).aux
+
+    #     student_path = os.path.join(ckpt_dir, "student.pt")
+    #     aux_path = os.path.join(ckpt_dir, "aux.pt")
+
+    #     torch.save(raw_student.state_dict(), student_path)
+    #     torch.save(raw_aux.state_dict(), aux_path)
+
+    #     # Also save training state for resuming.
+    #     state = {
+    #         "global_step": step,
+    #         "baseline_ema": self.baseline_ema,
+    #         "optimizer": self.optimizer.state_dict(),
+    #     }
+    #     torch.save(state, os.path.join(ckpt_dir, "train_state.pt"))
+    #     self.logger.info(f"[save] step={step} → {ckpt_dir}")
+    
+    def save(self, step: int, suffix: str = None) -> None:
+        """Save student + aux state_dicts (支持 DeepSpeed ZeRO-3 自动聚合)."""
+        
+        # ⚠️ 【极其重要】：get_state_dict 必须由所有 8 张卡共同执行！
+        # 绝对不能把它放在 is_main_process 判断的里面，否则会触发跨卡死锁！
+        full_state_dict = self.accelerator.get_state_dict(self.model)
+
+        # 只有主进程（0号卡）负责把聚合好的完整参数写进硬盘
+        if not self.accelerator.is_main_process:
+            return
+
+        folder = f"checkpoint-{suffix}" if suffix else f"checkpoint-{step}"
+        ckpt_dir = os.path.join(
+            self.config.experiment.output_dir, "checkpoints", folder
+        )
+        os.makedirs(ckpt_dir, exist_ok=True)
+
+        # 从 TwinModel 的完整字典中，根据前缀拆分出 student 和 aux 的独立权重
+        student_state = {k.replace("student.", ""): v for k, v in full_state_dict.items() if k.startswith("student.")}
+        aux_state = {k.replace("aux.", ""): v for k, v in full_state_dict.items() if k.startswith("aux.")}
+
+        student_path = os.path.join(ckpt_dir, "student.pt")
+        aux_path = os.path.join(ckpt_dir, "aux.pt")
+
+        torch.save(student_state, student_path)
+        torch.save(aux_state, aux_path)
+
+        # 保存辅助状态
+        state = {
+            "global_step": step,
+            "baseline_ema": self.baseline_ema,
+        }
+        torch.save(state, os.path.join(ckpt_dir, "train_state.pt"))
+        self.logger.info(f"[save] step={step} → {ckpt_dir} (ZeRO-3 Gathered)")
+
+    # -----------------------------------------------------------------------
+    # Logging helpers (same API as original Trainer)
+    # -----------------------------------------------------------------------
+
+    def _add_metrics(self, stats: dict) -> None:
+        for k, v in stats["metrics"].items():
+            if k not in self.metrics:
+                self.metrics[k] = profiler.SmoothedValue()
+            self.metrics[k].update(v)
+
+    def _log_metrics(self, stats: dict) -> None:
+        iter_template = "Iteration %d, lr_s=%.2e lr_a=%.2e, time=%.2fs"
+        self.logger.info(
+            iter_template
+            % (
+                stats["step"],
+                stats.get("lr_student", 0.0),
+                stats.get("lr_aux", 0.0),
+                stats.get("time", 0.0),
+            )
+        )
+        metric_template = "    Train %s: %s"
+        for k, v in self.metrics.items():
+            self.logger.info(metric_template % (k, v))
+        tracker_logs = {k: v.median for k, v in self.metrics.items()}
+        tracker_logs.update(
+            {
+                "lr_student": stats.get("lr_student", 0.0),
+                "time": stats.get("time", 0.0),
+            }
+        )
+        self.accelerator.log(tracker_logs, step=stats["step"])
+        self.metrics.clear()
+
+    # -----------------------------------------------------------------------
+    # Sanity checks (step 1 only)
+    # -----------------------------------------------------------------------
+
+    def _step1_assertions(
+        self, x_init, ids_init, rpos_init, z_s, p_s, logp,
+        z_T_cond, z_S_cond, x_t, B, T, H, W,
+    ) -> None:
+        """Shape / value-domain assertions (mirrors single-card script)."""
+        N, K = self.N, self.K
+        lm_vocab = self.pipe.teacher.config.lm_vocab_size
+        L_plus_N1 = ids_init.size(1)
+        txt_len = L_plus_N1 - (N + 1)
+
+        assert x_init.dtype == torch.long
+        assert x_init.min() >= 0 and x_init.max() < K
+
+        assert ids_init.shape == (B, L_plus_N1), ids_init.shape
+        txt_part = ids_init[:, :txt_len]
+        vis_part = ids_init[:, -N:]
+        assert (txt_part < lm_vocab).all(), "text tokens in visual range"
+        assert (vis_part >= lm_vocab).all(), "visual tokens not shifted"
+        assert (vis_part < lm_vocab + K).all(), "visual tokens exceed lm_vocab+K"
+
+        assert rpos_init.shape == (B, L_plus_N1, 3), rpos_init.shape
+        assert z_s.shape == (B, N, K), z_s.shape
+        p_err = float((p_s.sum(-1) - 1).abs().max().item())
+        assert p_err < 1e-3, f"p_s not normalised: max_dev={p_err:.2e}"
+
+        assert not torch.isnan(logp).any(), "logp has NaN"
+        assert not torch.isinf(logp).any(), "logp has Inf"
+        assert x_t.min() >= 0 and x_t.max() < K
+
+        assert z_T_cond.shape == z_S_cond.shape == (B, N, K), (
+            f"z_T_cond={z_T_cond.shape}  z_S_cond={z_S_cond.shape}"
+        )
+
+        # Teacher has no grad.
+        teacher_grads = [
+            p for p in self.pipe.teacher.parameters() if p.grad is not None
+        ]
+        assert len(teacher_grads) == 0, "teacher has grads — not frozen"
+
+        # Student has grad (from PG backward).
+        raw_s = self.accelerator.unwrap_model(self.model).student
+        student_grad_norms = [
+            float(p.grad.norm().item())
+            for p in raw_s.parameters()
+            if p.grad is not None
+        ]
+        assert len(student_grad_norms) > 0, "student has NO grads — grad flow broken"
+        
+        # ##########################
+        # raw_t = self.pipe.teacher
+        # raw_s = self.accelerator.unwrap_model(self.model).student
+
+        # # (a) 共享存储检查：零开销
+        # pt0 = next(raw_t.parameters())
+        # ps0 = next(raw_s.parameters())
+        # self.logger.info(f"[assert] shared_storage={pt0.data_ptr() == ps0.data_ptr()}")
+
+        # # (b) 参数差异：只采样前 4096 个元素，避免巨型临时张量
+        # with torch.no_grad():
+        #     a = pt0.view(-1)[:4096].float()
+        #     b = ps0.view(-1)[:4096].float()
+        #     self.logger.info(f"[assert] param_delta_sample_max={float((a-b).abs().max().item()):.3e}")
+
+        # # (c) logits 差异：只采样小子块（64 token × 256 vocab）
+        # with torch.no_grad():
+        #     idx_n = torch.randint(0, self.N, (64,), device=z_T_cond.device)
+        #     idx_k = torch.randint(0, self.K, (256,), device=z_T_cond.device)
+        #     subT = z_T_cond[0, idx_n][:, idx_k].float()
+        #     subS = z_S_cond[0, idx_n][:, idx_k].float()
+        #     self.logger.info(f"[assert] logits_delta_sub_max={float((subT-subS).abs().max().item()):.3e}")
+        # ###########################
+
+        self.logger.info("[assert] Step-1 shape/grad assertions PASSED ✓")
+        self.logger.info(
+            f"[assert] z_T_cond  shape={z_T_cond.shape}  "
+            f"min={float(z_T_cond.min().item()):.3f}  "
+            f"max={float(z_T_cond.max().item()):.3f}"
+        )
+        self.logger.info(
+            f"[assert] z_S_cond  shape={z_S_cond.shape}  "
+            f"min={float(z_S_cond.min().item()):.3f}  "
+            f"max={float(z_S_cond.max().item()):.3f}"
+        )
+
+    @staticmethod
+    def _token_entropy(x_hat: torch.Tensor) -> float:
+        """Histogram entropy of sampled token indices (collapse detection)."""
+        counts = x_hat.flatten().bincount(minlength=1).float()
+        p = counts / counts.sum()
+        p = p[p > 0]
+        return float(-(p * p.log()).sum().item())
+
+
+def main():
+    """Entry point — identical pattern to scripts/train.py."""
+    config = omegaconf_utils.get_config()
+    os.makedirs(config.experiment.output_dir, exist_ok=True)
+
+    accelerator = accelerate_utils.build_accelerator(config)
+    accelerate_utils.build_wandb(config, accelerator=accelerator)
+    logger = accelerate_utils.set_logger(
+        config.experiment.output_dir, accelerator=accelerator
+    )
+
+    device_seed = int(config.training.seed) + accelerator.process_index
+    engine_utils.manual_seed(device_seed, (accelerator.device.index, device_seed))
+
+    if accelerator.is_main_process:
+        config_path = os.path.join(config.experiment.output_dir, "config.yaml")
+        omegaconf_utils.save_config(config, config_path)
+
+    logger.info(f"Config:\n{omegaconf_utils.config_to_yaml(config)}")
+
+    trainer = DistillTrainer(config, accelerator, logger)
+    trainer.train_loop()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/URSA/scripts/train_onestep.md b/URSA/scripts/train_onestep.md
new file mode 100644
index 0000000000000000000000000000000000000000..1ed5a35651f24426b8ea9da2d826a40d5222b96b
--- /dev/null
+++ b/URSA/scripts/train_onestep.md
@@ -0,0 +1,78 @@
+代码解读（关键设计决策）
+utils_ursa_inputs.py
+build_ursa_inputs(transformer, txt_ids, visual_tokens, latents_shape, device)
+严格复刻 URSAPipeline.__call__ 的 token 拼接逻辑：
+img_ids = pad(latents_flat + lm_vocab_size, (1,0), value=bov_token_id)input_ids = cat([txt_ids, img_ids], dim=1)blk_pos = flex_rope.get_pos(latents_shape, L)rope_pos = cat([txt_pos, blk_pos[0]]).unsqueeze(0).expand(B,-1,-1)
+extract_visual_logits(logits, N, K)
+坑 1 防护：z = logits[:, -(N+1):-1]（causal slice），然后根据最后一维是否等于 K 决定是否再切 slice。
+sample_t_curriculum — 前 10k 步用 t = 1-(1-u)^2 偏大，之后恢复均匀采样。
+train_onestep_ursa_dimo.py 训练循环
+每一步的 9 个 stage 对应 DiMO 论文的完整流程：
+Stage	操作	梯度
+1-2	tokenize + 采样 x_init (80% uniform / 20% corrupt)	无
+3	student 在 x_init 上 1-step forward → x_hat, logp, H	✅ student
+4	add_noise(x_hat, t) → x_t	无（离散采样截断）
+5	teacher 在 x_t → p_T	无 (no_grad)
+6	aux 在 x_t → Jeffrey(p_T, p_A) → backward → aux update	✅ aux only
+7	student 在 x_t → KL(p_T ‖ p_S_t)	✅ student
+8	REINFORCE: r=-loss_aux, adv=r-EMA, loss_pg=-(adv·logp)	✅ student (via logp)
+9	L_s = λ_pg·loss_pg + λ_kd·loss_kd - λ_ent·H → student update	✅ student
+运行命令示例
+端到端冒烟测试（单卡，17帧256×256，2000步）：
+python scripts/train_onestep_ursa_dimo.py \
+    --teacher_ckpt /gfs/space/private/fengzl/World_Model/URSA-1.7B/ \
+    --prompt_file  /gfs/space/private/fengzl/World_Model/Koala-36M-v1/ \
+    --num_frames 17 --height 256 --width 256 \
+    --batch_size 1  --num_steps 2000 \
+    --log_every 50  --save_every 500 \
+    --out_dir ./outputs/dimo_test
+
+评估（1-step student vs 25-step teacher）：
+python scripts/eval_onestep_ursa.py \
+    --teacher_ckpt /gfs/space/private/fengzl/World_Model/URSA-1.7B/ \
+    --student_ckpt ./outputs/dimo_test/final/student.pt \
+    --num_frames 17 --height 256 --width 256 \
+    --teacher_steps 25 \
+    --out_dir ./outputs/eval
+
+扩展到完整分辨率（49帧 320×512）：
+python scripts/train_onestep_ursa_dimo.py \
+    --teacher_ckpt /gfs/space/private/fengzl/World_Model/URSA-1.7B/ \
+    --prompt_file  /gfs/space/private/fengzl/World_Model/Koala-36M-v1/ \
+    --num_frames 49 --height 320 --width 512 \
+    --batch_size 2  --num_steps 50000 \
+    --lambda_ent 0.01  --t_curriculum_steps 10000 \
+    --mixed_precision bf16 --out_dir ./outputs/dimo_full
+    
+三大稳定性机制（缺一不可）
+t curriculum — 前 10k 步 t 偏大，teacher 分布更尖锐，KD 信号更强，避免早期 student 随机游走
+p_init mixing — 20% batch 用 corrupt(x_hat_prev, r=0.2)，让 student 学会"一步修复"
+熵正则 λ_ent — 初始 0.01，若检测到 tok_entropy 下降就升到 0.05
+
+
+8 卡启动命令
+accelerate launch --config_file accelerate_configs/deepspeed_zero2.yaml --machine_rank 0 --num_machines 1 --num_processes 8 scripts/train_distill_dimo.py config=./configs/distill_dimo.yaml experiment.output_dir=./experiments/distill_dimo distill.teacher_ckpt=/gfs/space/private/fengzl/World_Model/URSA-1.7B distill.prompt_source=/gfs/space/private/fengzl/World_Model/Koala-36M-v1 distill.batch_size_per_gpu=1
+
+Smoke Test（50 步，保存 checkpoint）
+accelerate launch --num_processes 8 --mixed_precision bf16 \
+    scripts/train_distill_dimo.py \
+    config="./configs/distill_dimo.yaml" \
+    experiment.output_dir="./experiments/smoke" \
+    distill.teacher_ckpt="/gfs/space/private/fengzl/World_Model/URSA-1.7B" \
+    distill.prompt_source="/gfs/space/private/fengzl/World_Model/Koala-36M-v1" \
+    training.max_train_steps=50 \
+    experiment.save_every=50
+
+
+加载 student.pt 做 1-step 推理
+from diffnext.pipelines import URSAPipelineimport torchpipe = URSAPipeline.from_pretrained(    "/path/to/URSA-1.7B-IBQ1024", torch_dtype=torch.bfloat16, trust_remote_code=True).to("cuda")# 替换 transformer 权重为 studentstate = torch.load("experiments/distill_dimo/checkpoints/final/student.pt", map_location="cuda")pipe.transformer.load_state_dict(state, strict=True)# 1-step 生成（num_inference_steps=1）frames = pipe(    prompt="a dog running on a beach",    height=256, width=256, num_frames=17,    num_inference_steps=1,    guidance_scale=3.0,).frames
+
+
+最新 修改分辨率和cfg后
+accelerate launch --config_file accelerate_configs/deepspeed_zero2.yaml \
+    --machine_rank 0 --num_machines 1 --num_processes 8 \
+    scripts/train_distill_dimo.py \
+    config="./configs/distill_dimo.yaml" \
+    experiment.output_dir="./experiments/distill_dimo" \
+    distill.teacher_ckpt="/gfs/space/private/fengzl/World_Model/URSA-1.7B" \
+    distill.prompt_source="/gfs/space/private/fengzl/World_Model/Koala-36M-v1"
\ No newline at end of file
diff --git a/URSA/scripts/train_onestep_ursa_dimo.py b/URSA/scripts/train_onestep_ursa_dimo.py
new file mode 100644
index 0000000000000000000000000000000000000000..18a07d992f27f04350cc3c4b3368f44ab529e633
--- /dev/null
+++ b/URSA/scripts/train_onestep_ursa_dimo.py
@@ -0,0 +1,1303 @@
+#!/usr/bin/env python3
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# -----------------------------------------------------------------------
+"""URSA → URSA one-step distillation via Di[M]O-style on-policy training.
+
+Verified native inference regime (from A/B testing — ground truth):
+  height=320, width=512, num_frames=49, guidance_scale=7, teacher_steps=50.
+  no_cfg (guidance_scale=1) does NOT produce valid output for this URSA checkpoint.
+  All defaults below align to this verified regime.
+
+Algorithm (9 stages per iteration)
+------------------------------------
+  teacher  : frozen URSA — provides supervision at pseudo-intermediate x_t.
+  student  : trainable copy — 1-step target.
+  aux      : trainable copy — approximates teacher at x_t; reduces REINFORCE variance.
+
+  Stage 1  : tokenise prompts (cond + uncond when CFG enabled) → txt_ids [B,L]
+  Stage 2  : sample x_init [B,T,H,W] ~ Uniform(K) (+ optional p_init mixing)
+  Stage 3  : student 1-step forward on x_init (cond only) → x_hat, logp, H
+  Stage 4  : pseudo-intermediate x_t = scheduler.add_noise(x_hat, t)
+  Stage 5  : teacher forward on x_t (CFG=7 dual-branch is the default)
+  Stage 6  : aux forward → Jeffrey KD
+  Stage 7  : student forward on x_t → KL KD
+  Stage 8  : reward = -KL(z_T_cond, z_S_cond)  [detached]
+  Stage 9  : two-backward student update
+
+Usage:
+  # Smoke test (verified native regime):
+  python scripts/train_onestep_ursa_dimo.py \\
+      --teacher_ckpt /path/to/URSA --prompt_file prompts.txt \\
+      --enable_teacher_cfg --teacher_cfg_scale 7.0 \\
+      --num_frames 49 --height 320 --width 512 --dry_run
+
+  # Full training:
+  python scripts/train_onestep_ursa_dimo.py \\
+      --teacher_ckpt /path/to/URSA --prompt_file prompts.txt \\
+      --enable_teacher_cfg --teacher_cfg_scale 7.0 \\
+      --num_frames 49 --height 320 --width 512 \\
+      --batch_size 1 --num_steps 10000 --out_dir ./outputs/dimo_cfg
+"""
+
+import argparse
+import copy
+import json
+import math
+import os
+import sys
+
+import torch
+import torch.nn.functional as F
+from torch.utils.data import DataLoader
+
+_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+if _REPO_ROOT not in sys.path:
+    sys.path.insert(0, _REPO_ROOT)
+
+from diffnext.pipelines import URSAPipeline
+from src.distill.prompt_dataset import InfiniteDataLoader, PromptDataset, make_collate_fn, CSVSpec
+from src.distill.utils_ursa_inputs import (
+    build_ursa_inputs,
+    compute_latents_shape,
+    corrupt_tokens,
+    extract_visual_logits,
+    sample_t_curriculum,
+)
+
+def _get_logits(out):
+    if isinstance(out, (tuple, list)):
+        return out[0]
+    if hasattr(out, "sample"):
+        return out.sample
+    if hasattr(out, "logits"):
+        return out.logits
+    return out
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def parse_args():
+    p = argparse.ArgumentParser(description="URSA DiMO one-step distillation")
+
+    # Model / data
+    p.add_argument("--teacher_ckpt", required=True)
+    p.add_argument("--prompt_file", required=True)
+    p.add_argument("--out_dir", default="./outputs/dimo")
+
+    # Video geometry (verified native: 320×512×49)
+    p.add_argument("--num_frames", type=int, default=49)
+    p.add_argument("--height", type=int, default=320)
+    p.add_argument("--width", type=int, default=512)
+    p.add_argument("--max_prompt_length", type=int, default=320)
+
+    # Training
+    p.add_argument("--batch_size", type=int, default=1)
+    p.add_argument("--num_steps", type=int, default=10_000)
+    p.add_argument("--lr_student", type=float, default=1e-5)
+    p.add_argument("--lr_aux", type=float, default=1e-5)
+    p.add_argument("--weight_decay", type=float, default=0.01)
+    p.add_argument("--grad_clip", type=float, default=1.0)
+    p.add_argument("--mixed_precision", default="bf16", choices=["fp16", "bf16", "fp32"])
+    p.add_argument("--seed", type=int, default=42)
+    p.add_argument("--log_every", type=int, default=50)
+    p.add_argument("--save_every", type=int, default=1000)
+
+    # Loss weights
+    p.add_argument("--lambda_pg", type=float, default=1.0)
+    p.add_argument("--lambda_kd", type=float, default=0.5)
+    p.add_argument("--lambda_ent", type=float, default=0.01)
+    p.add_argument("--tau", type=float, default=1.0, help="Student sampling temperature")
+    p.add_argument("--tau_kd", type=float, default=1.0, help="KD softmax temperature")
+
+    # ---- Teacher CFG (DiMO true_cfg style) ----------------------------
+    p.add_argument("--enable_teacher_cfg", action="store_true", default=False,
+                   help="Enable teacher-side CFG for KD target. "
+                        "False → prior single-branch behavior (fallback).")
+    p.add_argument("--teacher_cfg_scale", type=float, default=7.0,
+                   help="CFG scale s (verified working value=7)")
+    p.add_argument("--teacher_cfg_prob", type=float, default=1.0,
+                   help="Max prob of using guided target per sample (after warmup)")
+    p.add_argument("--teacher_cfg_warmup_steps", type=int, default=2000,
+                   help="Steps to ramp teacher_cfg_prob 0 → teacher_cfg_prob")
+    p.add_argument("--teacher_cfg_trunc", type=float, default=0.9,
+                   help="t threshold: when t >= trunc, s=1. Set >=1.0 to disable.")
+    p.add_argument("--lambda_kd_uncond", type=float, default=0.3,
+                   help="Weight for uncond-branch KD / aux loss")
+    p.add_argument("--reward_use_guided", action="store_true", default=False,
+                   help="[RISKY] Use guided teacher logits for REINFORCE reward.")
+    # ---- Eval CFG (inference-time) -----------------------------------
+    p.add_argument("--eval_cfg_scale", type=float, default=7.0)
+    p.add_argument("--use_cfg_eval", action="store_true", default=True)
+
+    # DiMO extensions
+    p.add_argument("--use_surrogate_grad", action="store_true",
+                   help="DiMO surrogate MSE trick applied to Stage-3 logits")
+    p.add_argument("--lambda_surr", type=float, default=1.0)
+    p.add_argument("--fake_rounds", type=int, default=1,
+                   help="Aux updates per generator update (DiMO=2)")
+
+    # Stability
+    p.add_argument("--t_curriculum_steps", type=int, default=10_000)
+    p.add_argument("--p_mix_corrupt_frac", type=float, default=0.2)
+    p.add_argument("--p_init_mix_ratio", type=float, default=0.2)
+    p.add_argument("--collapse_warn_frac", type=float, default=0.2)
+
+    # Debug
+    p.add_argument("--dry_run", action="store_true",
+                   help="Run 1 step + grad-flow check, then exit")
+    p.add_argument("--debug_dump", type=int, default=0,
+                   help="Dump token histogram + x_hat every N steps (0=off)")
+
+    p.add_argument("--device", type=int, default=0)
+    return p.parse_args()
+
+
+# ---------------------------------------------------------------------------
+# Checkpoint
+# ---------------------------------------------------------------------------
+
+def save_checkpoint(model, path: str, name: str = "student"):
+    os.makedirs(path, exist_ok=True)
+    ckpt_path = os.path.join(path, f"{name}.pt")
+    torch.save(model.state_dict(), ckpt_path)
+    print(f"[save] {ckpt_path}")
+
+
+# ---------------------------------------------------------------------------
+# Stable KL / Jeffrey divergence helpers (float32 + log_softmax)
+# ---------------------------------------------------------------------------
+
+def _stable_kl(z_p: torch.Tensor, z_q: torch.Tensor, tau: float = 1.0) -> torch.Tensor:
+    """KL(p||q) from raw logits, float32 + log_softmax.  → [B] (mean over N tokens).
+
+    p = softmax(z_p/tau),  q = softmax(z_q/tau)
+    KL(p||q) = sum_k  p_k * (log p_k - log q_k)
+
+    Both log_p and log_q are computed via log_softmax to avoid
+    log(softmax(...)) numerical issues.
+    """
+    lp = F.log_softmax(z_p.float() / tau, dim=-1)   # [B, N, K]
+    lq = F.log_softmax(z_q.float() / tau, dim=-1)   # [B, N, K]
+    return (lp.exp() * (lp - lq)).sum(-1).mean(-1)  # [B]
+
+
+def _stable_jeffrey(z_p: torch.Tensor, z_q: torch.Tensor, tau: float = 1.0) -> torch.Tensor:
+    """Symmetric KL (Jeffrey) from logits, float32 + log_softmax. → [B]."""
+    return _stable_kl(z_p, z_q, tau) + _stable_kl(z_q, z_p, tau)
+
+
+# ---------------------------------------------------------------------------
+# Batch-concat input builder (ONE forward for cond + uncond)
+# ---------------------------------------------------------------------------
+
+def _build_dual_inputs(teacher_ref, txt_cond, txt_uncond, x_t, latents_shape, device):
+    """Concatenate cond+uncond into a single [2B] forward-pass input.
+
+    Returns (ids_dual [2B, L+N+1], rpos_dual [2B, L+N+1, 3], N).
+    After the forward: chunk(2, dim=0) → (z_cond [B], z_uncond [B]).
+
+    All three models (teacher/aux/student) share the SAME ids_dual / rpos_dual
+    so the tokens are constructed only once per step.
+    """
+    txt_dual = torch.cat([txt_cond, txt_uncond], dim=0)  # [2B, L]
+    x_t_dual = torch.cat([x_t, x_t], dim=0)             # [2B, T, H, W]
+    return build_ursa_inputs(teacher_ref, txt_dual, x_t_dual, latents_shape, device)
+
+
+# ---------------------------------------------------------------------------
+# flex_attn probe / reset helpers
+# ---------------------------------------------------------------------------
+
+def _probe_flex_attn(model, label: str = "") -> object:
+    """Return the FlexAttentionCausal2D object if present, else None."""
+    return getattr(model, "flex_attn", None)
+
+
+def _print_flex_attn_state(model, label: str):
+    fa = _probe_flex_attn(model, label)
+    if fa is None:
+        print(f"  [flex_attn/{label}] not present on model")
+        return
+    print(
+        f"  [flex_attn/{label}] offsets={fa.offsets!r}  "
+        f"block_mask={'set' if fa.block_mask is not None else 'None'}  "
+        f"cu_offsets={'set' if fa.cu_offsets is not None else 'None'}"
+    )
+
+
+def _reset_flex_attn(model, label: str = "", verbose: bool = False):
+    """Reset flex_attn to None offsets so standard causal attention is used.
+
+    Our distillation training processes each sample independently (batch dim)
+    so block-packed attention (offsets != None) is not needed and must be cleared
+    to avoid cross-sample mask contamination.
+    """
+    fa = _probe_flex_attn(model, label)
+    if fa is None:
+        return
+    old_offsets = fa.offsets
+    fa.offsets = None
+    fa.block_mask = None
+    fa.cu_offsets = None
+    if verbose:
+        print(f"  [flex_attn/{label}] reset: was={old_offsets!r} → None (standard causal)")
+
+
+# ---------------------------------------------------------------------------
+# Teacher CFG target construction
+# ---------------------------------------------------------------------------
+
+def _compute_cfg_scale(t: torch.Tensor, cfg_scale: float, trunc: float) -> torch.Tensor:
+    """Per-sample CFG scale [B]: s=cfg_scale when t < trunc, else s=1."""
+    s = torch.full_like(t, cfg_scale)
+    if trunc < 1.0:
+        s = torch.where(t >= trunc, torch.ones_like(t), s)
+    return s
+
+
+def _cfg_warmup_prob(step: int, cfg_prob: float, warmup_steps: int) -> float:
+    """Linear warmup: 0 → cfg_prob over warmup_steps steps."""
+    if warmup_steps <= 0:
+        return cfg_prob
+    return cfg_prob * min(1.0, step / warmup_steps)
+
+
+def _build_guided_logits(
+    z_T_cond: torch.Tensor,    # [B, N, K] float32
+    z_T_uncond: torch.Tensor,  # [B, N, K] float32
+    t: torch.Tensor,           # [B] ∈ (0,1)
+    cfg_scale: float,
+    trunc: float,
+) -> torch.Tensor:
+    """z_guided = z_uncond + s*(z_cond - z_uncond), per-sample s [B,1,1]."""
+    s = _compute_cfg_scale(t, cfg_scale, trunc).view(-1, 1, 1)  # [B,1,1]
+    return z_T_uncond + s * (z_T_cond - z_T_uncond)             # [B, N, K]
+
+
+def _select_target(
+    z_guided: torch.Tensor,   # [B, N, K]
+    z_cond: torch.Tensor,     # [B, N, K]
+    use_guided: torch.Tensor, # [B] bool — per-sample selection
+) -> torch.Tensor:
+    """Per-sample: z_guided where use_guided[b]=True, else z_cond."""
+    mask = use_guided.view(-1, 1, 1).expand_as(z_cond)
+    return torch.where(mask, z_guided, z_cond)
+
+
+# ---------------------------------------------------------------------------
+# Gradient-flow debug
+# ---------------------------------------------------------------------------
+
+def debug_grad_flow(
+    teacher, student, aux,
+    txt_cond, txt_uncond, x_t, latents_shape, device, K, N, tau, tau_kd,
+    enable_teacher_cfg,
+):
+    """One fwd+bwd without optimizer.step().
+
+    Asserts:
+      - teacher: zero grads (frozen)
+      - aux:     non-zero grads after loss_aux.backward()
+      - student: non-zero grads after loss_student.backward()
+
+    All cond/uncond forwards are batch-concatenated per requirement (1).
+    """
+    print("\n" + "=" * 64)
+    print("[grad_flow] Starting gradient flow debug …")
+    B = txt_cond.size(0)
+
+    # -- Stage 3: student on x_init (cond only) ----------------------
+    x_init_dbg = torch.randint(0, K, x_t.shape, device=device, dtype=torch.long)
+    ids_init, rpos_init, _ = build_ursa_inputs(teacher, txt_cond, x_init_dbg, latents_shape, device)
+    logits_s = student(ids_init, rope_pos=rpos_init).sample
+    z_s = extract_visual_logits(logits_s.float(), N, K)
+    p_s = F.softmax(z_s / tau, dim=-1)
+    x_hat = torch.multinomial(p_s.view(-1, K), 1).view(B, N)
+    logp = p_s.clamp(1e-8).log().gather(-1, x_hat.unsqueeze(-1)).squeeze(-1).sum(-1)
+    H_mean = -(p_s * p_s.clamp(1e-8).log()).sum(-1).mean()
+
+    # -- Stage 5: teacher forward — [2B] if CFG, else [B] ------------
+    if enable_teacher_cfg and txt_uncond is not None:
+        ids_dual, rpos_dual, _ = _build_dual_inputs(teacher, txt_cond, txt_uncond, x_t, latents_shape, device)
+        with torch.no_grad():
+            logits_T_dual = teacher(ids_dual, rope_pos=rpos_dual).sample.float()
+        z_T_dual = extract_visual_logits(logits_T_dual, N, K)
+        z_T_cond_dbg, z_T_uncond_dbg = z_T_dual.chunk(2, dim=0)
+        t_dbg = torch.full((B,), 0.5, device=device, dtype=torch.float32)
+        z_T_guided_dbg = _build_guided_logits(
+            z_T_cond_dbg.float(), z_T_uncond_dbg.float(), t_dbg, 3.0, 0.9)
+        z_T_target_dbg = z_T_guided_dbg.detach()
+        print(f"  [grad_flow] z_T_cond   shape={z_T_cond_dbg.shape}  "
+              f"min={z_T_cond_dbg.min():.3f}  max={z_T_cond_dbg.max():.3f}")
+        print(f"  [grad_flow] z_T_uncond shape={z_T_uncond_dbg.shape}  "
+              f"min={z_T_uncond_dbg.min():.3f}  max={z_T_uncond_dbg.max():.3f}")
+        print(f"  [grad_flow] z_T_guided shape={z_T_guided_dbg.shape}  "
+              f"min={z_T_guided_dbg.min():.3f}  max={z_T_guided_dbg.max():.3f}")
+        ids_t_ref    = ids_dual[:B]
+        rpos_t_ref   = rpos_dual[:B]
+        ids_fwd      = ids_dual
+        rpos_fwd     = rpos_dual
+    else:
+        ids_t_ref, rpos_t_ref, _ = build_ursa_inputs(teacher, txt_cond, x_t, latents_shape, device)
+        with torch.no_grad():
+            logits_T = teacher(ids_t_ref, rope_pos=rpos_t_ref).sample.float()
+        z_T_target_dbg = extract_visual_logits(logits_T, N, K).detach()
+        ids_fwd  = ids_t_ref
+        rpos_fwd = rpos_t_ref
+
+    # Dual-path shape check (teacher vs student, same input)
+    with torch.no_grad():
+        z_T_ref2 = extract_visual_logits(
+            teacher(ids_t_ref, rope_pos=rpos_t_ref).sample.float(), N, K)
+    z_S_ref2 = extract_visual_logits(
+        student(ids_t_ref.detach(), rope_pos=rpos_t_ref.detach()).sample.float(), N, K)
+    if z_T_ref2.shape != z_S_ref2.shape:
+        raise RuntimeError(
+            f"[FATAL] Dual-path shape mismatch: z_T={z_T_ref2.shape} z_S={z_S_ref2.shape}"
+        )
+    print(f"  [grad_flow] Dual-path check OK: shape={z_T_ref2.shape}")
+
+    # -- Aux backward — [2B] if CFG, else [B] -------------------------
+    logits_A = aux(ids_fwd.detach(), rope_pos=rpos_fwd.detach()).sample
+    if enable_teacher_cfg and txt_uncond is not None:
+        z_A_dual2 = extract_visual_logits(logits_A.float(), N, K)
+        z_A_cond_dbg, _ = z_A_dual2.chunk(2, dim=0)
+    else:
+        z_A_cond_dbg = extract_visual_logits(logits_A.float(), N, K)
+    loss_aux_sample = _stable_jeffrey(z_T_target_dbg, z_A_cond_dbg, tau_kd)
+    loss_aux = loss_aux_sample.mean()
+    loss_aux.backward()
+
+    teacher_grads = [p.grad for p in teacher.parameters() if p.grad is not None]
+    aux_grads     = [p.grad.norm().item() for p in aux.parameters() if p.grad is not None]
+    print(f"  [grad_flow] teacher grads with non-None grad: {len(teacher_grads)} (must be 0)")
+    if aux_grads:
+        print(f"  [grad_flow] aux grad norm  min={min(aux_grads):.3e}  "
+              f"mean={sum(aux_grads)/len(aux_grads):.3e}  max={max(aux_grads):.3e}")
+    else:
+        print("  [grad_flow] ⚠️ aux has NO grads")
+    for param in aux.parameters():
+        param.grad = None
+
+    # -- Student backward — [B] (cond only for simplicity) ------------
+    logits_S = student(ids_t_ref.detach(), rope_pos=rpos_t_ref.detach()).sample
+    z_S_cond = extract_visual_logits(logits_S.float(), N, K)
+    loss_kd = _stable_kl(z_T_target_dbg, z_S_cond, tau_kd).mean()
+    adv = (loss_aux_sample.detach() * 0 + 1.0)   # dummy advantage (shape check)
+    assert not adv.requires_grad, "[BUG] adv must be detached"
+    loss_student = -(adv * logp).mean() + loss_kd - 0.01 * H_mean
+    loss_student.backward()
+
+    student_grads = [p.grad.norm().item() for p in student.parameters() if p.grad is not None]
+    if student_grads:
+        print(f"  [grad_flow] student grad norm  min={min(student_grads):.3e}  "
+              f"mean={sum(student_grads)/len(student_grads):.3e}  "
+              f"max={max(student_grads):.3e}")
+    else:
+        print("  [grad_flow] ⚠️ student has NO grads — diagnosing:")
+        print(f"    logp.requires_grad={logp.requires_grad}")
+        print(f"    z_s.requires_grad={z_s.requires_grad}")
+
+    assert len(teacher_grads) == 0,   "teacher has grads — not frozen"
+    assert len(aux_grads)     > 0,    "aux has no grads after loss_aux.backward()"
+    assert len(student_grads) > 0,    "student has no grads — grad flow broken"
+
+    for m in (student, aux):
+        for param in m.parameters():
+            param.grad = None
+
+    print("  [grad_flow] All gradient assertions PASSED ✓")
+    print("=" * 64 + "\n")
+
+
+# ---------------------------------------------------------------------------
+# Main training loop
+# ---------------------------------------------------------------------------
+
+def main():
+    args = parse_args()
+
+    device = torch.device("cuda", args.device) if torch.cuda.is_available() else torch.device("cpu")
+    dtype_map = {"fp16": torch.float16, "bf16": torch.bfloat16, "fp32": torch.float32}
+    compute_dtype = dtype_map[args.mixed_precision]
+    torch.manual_seed(args.seed)
+    os.makedirs(args.out_dir, exist_ok=True)
+
+    # -- Verified regime validation ----------------------------------------
+    _NATIVE = dict(height=320, width=512, num_frames=49, guidance_scale=7.0)
+    is_native = (
+        args.height == _NATIVE["height"]
+        and args.width == _NATIVE["width"]
+        and args.num_frames == _NATIVE["num_frames"]
+    )
+    print(f"[init] verified_native_regime={is_native}  "
+          f"geometry=({args.num_frames}×{args.height}×{args.width})  "
+          f"teacher_cfg_scale={args.teacher_cfg_scale if args.enable_teacher_cfg else 'OFF'}")
+    if not is_native:
+        print(f"[WARN] Current geometry ({args.num_frames}×{args.height}×{args.width}) "
+              f"is not the verified native URSA regime "
+              f"({_NATIVE['num_frames']}×{_NATIVE['height']}×{_NATIVE['width']}). "
+              "Distillation quality may degrade or become invalid.")
+    if not args.enable_teacher_cfg:
+        print("[WARN] Teacher CFG is DISABLED.  no_cfg is known to produce "
+              "blank/blurry output for this URSA checkpoint.  "
+              "Distillation without CFG is unlikely to produce useful results.")
+    elif args.teacher_cfg_scale != _NATIVE["guidance_scale"]:
+        print(f"[WARN] teacher_cfg_scale={args.teacher_cfg_scale} differs from "
+              f"the verified working value ({_NATIVE['guidance_scale']}).")
+
+    if args.enable_teacher_cfg and args.reward_use_guided:
+        print("[WARN] --reward_use_guided is ON — can cause mode collapse, watch tok_entropy.")
+
+    # -- Load pipeline ---------------------------------------------------
+    print(f"[init] Loading from {args.teacher_ckpt} …")
+    pipe = URSAPipeline.from_pretrained(
+        args.teacher_ckpt, torch_dtype=compute_dtype, trust_remote_code=True
+    ).to(device)
+
+    tokenizer = pipe.tokenizer
+    scheduler = pipe.scheduler
+    scheduler.to(device=device)
+
+    vae_t_stride = getattr(pipe.vae.config, "temporal_stride", 4)
+    vae_s_stride = getattr(pipe.vae.config, "spatial_stride", 8)
+    latents_shape = compute_latents_shape(
+        args.num_frames, args.height, args.width, vae_t_stride, vae_s_stride
+    )
+    T, H, W = latents_shape
+    N = T * H * W
+    K = scheduler.codebook_size
+    print(
+        f"[init] latents_shape=({T},{H},{W})  N={N}  K={K}  "
+        f"CFG={'ON' if args.enable_teacher_cfg else 'OFF'}"
+    )
+
+    # -- Pre-compute uncond token IDs (empty string, [1, L]) --------------
+    txt_uncond_base = tokenizer(
+        [""], max_length=args.max_prompt_length, padding="max_length",
+        padding_side="left", truncation=True, return_tensors="pt",
+    ).input_ids.to(device)   # [1, L]
+
+    # -- Three models ----------------------------------------------------
+    teacher = pipe.transformer.eval().requires_grad_(False)
+    student = copy.deepcopy(teacher).train().requires_grad_(True)
+    aux     = copy.deepcopy(teacher).train().requires_grad_(True)
+
+    # -- flex_attn: reset offsets to None (standard causal attn) ---------
+    # Our training processes B independent sequences in a batch, so block-packed
+    # offsets are not needed and must be cleared before any forward call.
+    if args.dry_run:
+        print("[init] flex_attn state before reset:")
+        for m, lbl in ((teacher, "teacher"), (student, "student"), (aux, "aux")):
+            _print_flex_attn_state(m, lbl)
+    for m, lbl in ((teacher, "teacher"), (student, "student"), (aux, "aux")):
+        _reset_flex_attn(m, lbl, verbose=True)
+    if args.dry_run:
+        print("[init] flex_attn state after reset:")
+        for m, lbl in ((teacher, "teacher"), (student, "student"), (aux, "aux")):
+            _print_flex_attn_state(m, lbl)
+
+    opt_student = torch.optim.AdamW(
+        student.parameters(), lr=args.lr_student, weight_decay=args.weight_decay
+    )
+    opt_aux = torch.optim.AdamW(
+        aux.parameters(), lr=args.lr_aux, weight_decay=args.weight_decay
+    )
+
+    # -- Dataset ----------------------------------------------------------
+    # dataset = PromptDataset(args.prompt_file, shuffle=True, seed=args.seed)
+    collate = make_collate_fn(tokenizer, args.max_prompt_length, device)
+    # loader  = DataLoader(
+    #     dataset, batch_size=args.batch_size, shuffle=True,
+    #     drop_last=True, num_workers=0, collate_fn=collate,
+    # )
+    dataset = PromptDataset(
+        args.prompt_file,
+        shuffle_files=True,
+        shuffle_buffer=50000,   # 例如 50k buffer，够用且不占太多内存
+        seed=args.seed,
+        infinite=True,
+        csv=CSVSpec(caption_field="caption"),  # Koala 默认就是 caption
+    )
+
+    loader = DataLoader(
+        dataset,
+        batch_size=args.batch_size,
+        shuffle=False,          # IMPORTANT for IterableDataset
+        drop_last=True,
+        num_workers=2,          # 视 IO 调大
+        collate_fn=collate,
+        pin_memory=True,
+    )
+    inf_loader = InfiniteDataLoader(loader)
+
+    # -- Pre-training sanity check ---------------------------------------
+    _sanity_check_forward(teacher, scheduler, latents_shape, device, K, args.dry_run)
+
+    # -- Training state --------------------------------------------------
+    baseline_ema: float = 0.0
+    x_hat_prev = None
+    initial_tok_entropy: float = None
+    dump_dir = os.path.join(args.out_dir, "debug_dumps") if args.debug_dump > 0 else None
+
+    num_steps = 1 if args.dry_run else args.num_steps
+    print(f"[train] {'DRY RUN' if args.dry_run else f'{num_steps} steps'} "
+          f"| CFG={args.enable_teacher_cfg}")
+
+    for step in range(1, num_steps + 1):
+
+        # ----------------------------------------------------------------
+        # Stage 1: Tokenise → txt_cond [B, L], txt_uncond [B, L]
+        # ----------------------------------------------------------------
+        txt_cond = next(inf_loader)    # [B, L]
+        txt_cond = txt_cond.to(device, non_blocking=True)
+        B = txt_cond.size(0)
+
+        txt_uncond = None
+        if args.enable_teacher_cfg:
+            txt_uncond = txt_uncond_base.expand(B, -1)  # [B, L]
+
+        # ----------------------------------------------------------------
+        # Stage 2: x_init ~ Uniform(K) (+ optional p_init mixing)
+        # ----------------------------------------------------------------
+        x_init = _sample_x_init(B, T, H, W, K, device, x_hat_prev, args)
+
+        # ----------------------------------------------------------------
+        # Stage 3: Student 1-step forward on x_init — COND only.
+        #
+        # Gradient needed: logp and H flow back through p_s → student.
+        # ----------------------------------------------------------------
+        with torch.no_grad():
+            ids_init, rpos_init, _ = build_ursa_inputs(
+                teacher, txt_cond, x_init, latents_shape, device)
+            logits_s_init = student(ids_init, rope_pos=rpos_init).sample     # [B, L+N+1, D]
+            z_s  = extract_visual_logits(logits_s_init.float(), N, K)        # [B, N, K]
+            p_s  = F.softmax(z_s / args.tau, dim=-1)                         # [B, N, K]
+            x_hat = torch.multinomial(p_s.view(-1, K), 1).view(B, N)         # [B, N]
+            # logp  = p_s.clamp(1e-8).log().gather(
+            #     -1, x_hat.unsqueeze(-1)).squeeze(-1).sum(-1)                  # [B]
+            # H_mean = -(p_s * p_s.clamp(1e-8).log()).sum(-1).mean()
+        x_hat_4d = x_hat.view(B, T, H, W)
+
+        # ----------------------------------------------------------------
+        # Stage 4: Pseudo-intermediate x_t
+        # ----------------------------------------------------------------
+        t = sample_t_curriculum(B, device, step, warmup_steps=args.t_curriculum_steps)
+        with torch.no_grad():
+            x_t = scheduler.add_noise(x_hat_4d, t)   # [B, T, H, W], long
+
+        # ----------------------------------------------------------------
+        # Stage 5: Teacher forward — single [2B] forward when CFG enabled.
+        #
+        # ids_dual / rpos_dual are SHARED by teacher, aux, and student to
+        # avoid redundant input construction.
+        # ----------------------------------------------------------------
+        with torch.no_grad():
+            if args.enable_teacher_cfg:
+                # ONE [2B] forward = cond (first B) + uncond (last B)
+                ids_dual, rpos_dual, _ = _build_dual_inputs(
+                    teacher, txt_cond, txt_uncond, x_t, latents_shape, device)
+                logits_T_dual = teacher(ids_dual, rope_pos=rpos_dual).sample.float()
+                z_T_dual      = extract_visual_logits(logits_T_dual, N, K)  # [2B, N, K]
+                z_T_cond, z_T_uncond = z_T_dual.chunk(2, dim=0)             # [B, N, K] each
+                ids_t   = ids_dual[:B]    # cond half — alias (no copy)
+                rpos_t  = rpos_dual[:B]
+            else:
+                ids_t, rpos_t, _ = build_ursa_inputs(
+                    teacher, txt_cond, x_t, latents_shape, device)
+                logits_T  = teacher(ids_t, rope_pos=rpos_t).sample.float()
+                z_T_cond  = extract_visual_logits(logits_T, N, K)  # [B, N, K]
+                z_T_uncond = None
+                ids_dual  = ids_t
+                rpos_dual = rpos_t
+
+        # -- CFG guided target (float32, per-sample Bernoulli) ----------
+        z_T_guided = None
+        if args.enable_teacher_cfg:
+            z_T_cond_f   = z_T_cond.float()
+            z_T_uncond_f = z_T_uncond.float()
+            z_T_guided   = _build_guided_logits(
+                z_T_cond_f, z_T_uncond_f, t,
+                args.teacher_cfg_scale, args.teacher_cfg_trunc)
+
+            # per-sample Bernoulli: use_guided[b] ~ Bernoulli(p_guided)
+            p_guided      = _cfg_warmup_prob(
+                step, args.teacher_cfg_prob, args.teacher_cfg_warmup_steps)
+            use_guided     = torch.rand(B, device=device) < p_guided   # [B] bool
+            use_guided_ratio = use_guided.float().mean().item()
+            z_T_target    = _select_target(z_T_guided, z_T_cond_f, use_guided)  # [B, N, K]
+        else:
+            use_guided      = torch.zeros(B, dtype=torch.bool, device=device)
+            use_guided_ratio = 0.0
+            z_T_target      = z_T_cond.float()
+
+        # z_T_target is the KD target — must have no grad path to teacher
+        z_T_target = z_T_target.detach()
+
+        # ----------------------------------------------------------------
+        # Stage 6: Aux forward (fake_rounds) — single [2B] forward when CFG.
+        # ----------------------------------------------------------------
+        loss_aux_cond_v_last   = None
+        loss_aux_uncond_v_last = None
+        loss_aux_cond_sample_last = None
+
+        for _fr in range(args.fake_rounds):
+            opt_aux.zero_grad()
+
+            if args.enable_teacher_cfg:
+                # ONE [2B] forward: cond+uncond in one shot
+                logits_A_dual = aux(ids_dual.detach(), rope_pos=rpos_dual.detach()).sample
+                z_A_dual      = extract_visual_logits(logits_A_dual.float(), N, K)  # [2B, N, K]
+                z_A_cond, z_A_uncond = z_A_dual.chunk(2, dim=0)
+
+                # Cond: Jeffrey(z_T_target, z_A_cond)
+                loss_aux_cond_sample = _stable_jeffrey(z_T_target, z_A_cond, args.tau_kd)  # [B]
+                loss_aux_cond_v      = loss_aux_cond_sample.mean()
+
+                # Uncond: Jeffrey(z_T_uncond, z_A_uncond)
+                z_T_uncond_det = z_T_uncond.float().detach()
+                loss_aux_uncond_sample = _stable_jeffrey(z_T_uncond_det, z_A_uncond, args.tau_kd)
+                loss_aux_uncond_v      = loss_aux_uncond_sample.mean()
+
+                loss_aux_v = loss_aux_cond_v + args.lambda_kd_uncond * loss_aux_uncond_v
+            else:
+                logits_A  = aux(ids_t.detach(), rope_pos=rpos_t.detach()).sample
+                z_A_cond  = extract_visual_logits(logits_A.float(), N, K)
+
+                loss_aux_cond_sample = _stable_jeffrey(z_T_target, z_A_cond, args.tau_kd)  # [B]
+                loss_aux_cond_v      = loss_aux_cond_sample.mean()
+                loss_aux_uncond_v    = torch.tensor(0.0, device=device)
+                loss_aux_v           = loss_aux_cond_v
+
+            loss_aux_v.backward()
+            if args.grad_clip > 0:
+                torch.nn.utils.clip_grad_norm_(aux.parameters(), args.grad_clip)
+            opt_aux.step()
+            # make sure aux grads are cleared and no graph is retained
+            for p in aux.parameters():
+                p.grad = None
+
+        loss_aux_cond_v_last      = loss_aux_cond_v.detach()
+        loss_aux_uncond_v_last    = loss_aux_uncond_v.detach()
+        loss_aux_cond_sample_last = loss_aux_cond_sample.detach()   # [B]
+
+        # # ----------------------------------------------------------------
+        # # Stage 7: Student KD forward on x_t — single [2B] when CFG.
+        # # Dual-path consistency check included.
+        # # ----------------------------------------------------------------
+        # if args.enable_teacher_cfg:
+        #     # ONE [2B] forward
+        #     logits_S_dual = student(ids_dual.detach(), rope_pos=rpos_dual.detach()).sample
+        #     z_S_dual      = extract_visual_logits(logits_S_dual.float(), N, K)  # [2B, N, K]
+        #     z_S_cond, z_S_uncond = z_S_dual.chunk(2, dim=0)
+        # else:
+        #     logits_S = student(ids_t.detach(), rope_pos=rpos_t.detach()).sample
+        #     z_S_cond = extract_visual_logits(logits_S.float(), N, K)   # [B, N, K]
+        #     z_S_uncond = None
+
+        # # Dual-path shape consistency check
+        # if z_T_cond.shape != z_S_cond.shape:
+        #     raise RuntimeError(
+        #         f"[FATAL] Dual-path shape mismatch: "
+        #         f"z_T_cond={z_T_cond.shape}  z_S_cond={z_S_cond.shape} — "
+        #         "vocab slicing inconsistency."
+        #     )
+
+        # # KD losses (from raw logits, float32 + log_softmax)
+        # loss_kd_cond   = _stable_kl(z_T_target, z_S_cond, args.tau_kd).mean()
+        # loss_kd_uncond_v = torch.tensor(0.0, device=device)
+
+        # if args.enable_teacher_cfg and z_S_uncond is not None:
+        #     z_T_uncond_det2 = z_T_uncond.float().detach()
+        #     loss_kd_uncond_v = _stable_kl(z_T_uncond_det2, z_S_uncond, args.tau_kd).mean()
+
+        # loss_kd = loss_kd_cond + args.lambda_kd_uncond * loss_kd_uncond_v
+
+        # # ----------------------------------------------------------------
+        # # Stage 8: REINFORCE reward + advantage
+        # #
+        # # INVARIANT: reward and adv MUST NOT carry student gradients.
+        # #   - z_S_cond is detached before entering reward computation.
+        # #   - adv is explicitly detached.
+        # #   - Runtime assertions enforce this.
+        # # ----------------------------------------------------------------
+        # if args.enable_teacher_cfg:
+        #     if args.reward_use_guided:
+        #         z_T_for_rew = z_T_target   # already detached (guided, see §5)
+        #     else:
+        #         z_T_for_rew = z_T_cond.float().detach()  # non-guided cond (stable default)
+        #     # Both inputs are detached: no student gradient leaks into reward.
+        #     reward = -_stable_kl(
+        #         z_T_for_rew.detach(), z_S_cond.detach(), args.tau)   # [B]
+        # else:
+        #     reward = -loss_aux_cond_sample_last   # [B], already detached
+
+        # # Mandatory detach assertions: catch reward/adv gradient leaks early.
+        # assert not reward.requires_grad, (
+        #     "[BUG] reward.requires_grad=True — student gradient leaked into reward. "
+        #     "Ensure z_S_cond is detached in reward computation."
+        # )
+        # baseline_ema = 0.99 * baseline_ema + 0.01 * reward.mean().item()
+        # adv = (reward - baseline_ema).detach()   # [B]
+        # assert not adv.requires_grad, "[BUG] adv.requires_grad=True — explicit detach failed"
+
+        # loss_pg = -(adv * logp).mean()
+
+        # # ----------------------------------------------------------------
+        # # Stage 9: Student loss + update
+        # # ----------------------------------------------------------------
+        # opt_student.zero_grad()
+
+        # lambda_ent_eff = args.lambda_ent * (1.0 + 2.0 * use_guided_ratio)
+        # loss_student = (
+        #     args.lambda_pg * loss_pg
+        #     + args.lambda_kd * loss_kd
+        #     - lambda_ent_eff * H_mean
+        # )
+
+        # # Optional surrogate gradient (DiMO MSE trick — applied to Stage-3 logits z_s)
+        # loss_surr = None
+        # if args.use_surrogate_grad:
+        #     with torch.no_grad():
+        #         logits_A_ref = aux(ids_t.detach(), rope_pos=rpos_t.detach()).sample
+        #         z_A_ref      = extract_visual_logits(logits_A_ref.float(), N, K)
+        #     # grad_surr = (p_A - p_T): pushes z_s toward teacher distribution
+        #     p_A_ref  = F.softmax(z_A_ref.float() / args.tau_kd, dim=-1).detach()
+        #     p_T_surr = F.softmax(z_T_target / args.tau_kd, dim=-1).detach()
+        #     grad_surr = (p_A_ref - p_T_surr).detach()
+        #     loss_surr = 0.5 * F.mse_loss(z_s, (z_s - grad_surr).detach())
+        #     loss_student = loss_student + args.lambda_surr * loss_surr
+
+        # loss_student.backward()
+        # if args.grad_clip > 0:
+        #     torch.nn.utils.clip_grad_norm_(student.parameters(), args.grad_clip)
+        # opt_student.step()
+
+        # # p_init mixing: save x_hat_4d for next step
+        # x_hat_prev = x_hat_4d.detach().clone()
+        
+        # ----------------------------------------------------------------
+        # Stage 7: Student KD forward on x_t — single [2B] when CFG.
+        # ----------------------------------------------------------------
+        if args.enable_teacher_cfg:
+            logits_S_dual = _get_logits(student(ids_dual.detach(), rope_pos=rpos_dual.detach())).float()
+            z_S_dual = extract_visual_logits(logits_S_dual, N, K)  # [2B, N, K]
+            z_S_cond, z_S_uncond = z_S_dual.chunk(2, dim=0)
+        else:
+            logits_S = _get_logits(student(ids_t.detach(), rope_pos=rpos_t.detach())).float()
+            z_S_cond = extract_visual_logits(logits_S, N, K)
+            z_S_uncond = None
+
+        if z_T_cond.shape != z_S_cond.shape:
+            raise RuntimeError(f"[FATAL] Dual-path shape mismatch: z_T_cond={z_T_cond.shape} z_S_cond={z_S_cond.shape}")
+
+        loss_kd_cond = _stable_kl(z_T_target, z_S_cond, args.tau_kd).mean()
+        loss_kd_uncond_v = torch.tensor(0.0, device=device)
+        if args.enable_teacher_cfg and (z_S_uncond is not None):
+            loss_kd_uncond_v = _stable_kl(z_T_uncond.float().detach(), z_S_uncond, args.tau_kd).mean()
+        loss_kd = loss_kd_cond + args.lambda_kd_uncond * loss_kd_uncond_v
+
+        # ----------------------------------------------------------------
+        # Stage 8: reward + advantage (detached)
+        # ----------------------------------------------------------------
+        if args.enable_teacher_cfg and args.reward_use_guided:
+            z_T_for_rew = z_T_target  # already detached
+        else:
+            z_T_for_rew = z_T_cond.float().detach()
+
+        reward = -_stable_kl(z_T_for_rew.detach(), z_S_cond.detach(), args.tau)  # [B]
+        assert not reward.requires_grad
+
+        baseline_ema = 0.99 * baseline_ema + 0.01 * reward.mean().item()
+        adv = (reward - baseline_ema).detach()
+        assert not adv.requires_grad
+
+        # ----------------------------------------------------------------
+        # Stage 9: update student in two backward passes (KD then PG/Ent)
+        # ----------------------------------------------------------------
+        opt_student.zero_grad(set_to_none=True)
+
+        # (9a) KD backward first (frees KD graph)
+        (args.lambda_kd * loss_kd).backward()
+
+        # (9b) Policy + entropy: need a fresh forward on x_init WITH grad
+        ids_init, rpos_init, _ = build_ursa_inputs(teacher, txt_cond, x_init, latents_shape, device)
+        logits_s_pol = _get_logits(student(ids_init, rope_pos=rpos_init)).float()
+        z_s_pol = extract_visual_logits(logits_s_pol, N, K)
+
+        logp_tok = F.log_softmax(z_s_pol / args.tau, dim=-1)   # [B,N,K]
+        p_s_pol = logp_tok.exp()
+
+        # fixed action: x_hat sampled in Stage 3 (no_grad)
+        logp_sum = logp_tok.gather(-1, x_hat.unsqueeze(-1)).squeeze(-1).sum(-1)  # [B], sum over N tokens
+        logp = logp_sum / N                                                      # [B], per-token average logp (RECOMMENDED)
+        
+        H_mean = -(p_s_pol * logp_tok).sum(-1).mean()
+
+        loss_pg = -(adv * logp).mean()
+        
+        lambda_ent_eff = args.lambda_ent * (1.0 + 2.0 * use_guided_ratio)
+        (loss_pg * args.lambda_pg - H_mean * lambda_ent_eff).backward()
+
+        # (optional) surrogate grad — put it here; WARNING: extra forward makes it heavier
+        loss_surr = None
+        if args.use_surrogate_grad:
+            with torch.no_grad():
+                logits_A_ref = _get_logits(aux(ids_t.detach(), rope_pos=rpos_t.detach())).float()
+                z_A_ref = extract_visual_logits(logits_A_ref, N, K)
+            p_A_ref = F.softmax(z_A_ref / args.tau_kd, dim=-1).detach()
+            p_T_ref = F.softmax(z_T_target / args.tau_kd, dim=-1).detach()
+            grad_surr = (p_A_ref - p_T_ref).detach()
+            loss_surr = 0.5 * F.mse_loss(z_s_pol, (z_s_pol - grad_surr).detach())
+            (args.lambda_surr * loss_surr).backward()
+
+        if args.grad_clip > 0:
+            torch.nn.utils.clip_grad_norm_(student.parameters(), args.grad_clip)
+        opt_student.step()
+        
+        # p_init mixing: save x_hat_4d for next step
+        x_hat_prev = x_hat_4d.detach() #.clone()
+
+        # ----------------------------------------------------------------
+        # Post-step: assertions (step 1), collapse detection, logging
+        # ----------------------------------------------------------------
+        if step == 1:
+            _run_assertions(
+                x_init, ids_init, rpos_init,
+                z_s, p_s, logp,
+                z_T_cond, z_S_cond, x_t, K, N, B, T, H, W,
+                teacher.config.lm_vocab_size,
+                z_T_uncond=z_T_uncond,
+                z_T_guided=z_T_guided,
+                dry_run=args.dry_run,
+            )
+
+        tok_entropy = _token_histogram_entropy(x_hat, K)
+        if initial_tok_entropy is None:
+            initial_tok_entropy = tok_entropy
+
+        if tok_entropy < args.collapse_warn_frac * initial_tok_entropy:
+            print(
+                f"[COLLAPSE WARNING] step={step}  tok_entropy={tok_entropy:.3f}  "
+                f"initial={initial_tok_entropy:.3f}  "
+                f"ratio={tok_entropy/max(initial_tok_entropy, 1e-8):.2f} < "
+                f"{args.collapse_warn_frac}. "
+                "Increase --lambda_ent (try 0.05) or --tau."
+            )
+
+        if step % args.log_every == 0 or args.dry_run:
+            surr_str = f"  loss_surr={loss_surr.item():.4f}" if loss_surr is not None else ""
+            print(
+                f"[step {step:>6d}] "
+                f"loss_aux_cond={loss_aux_cond_v_last.item():.3e}  "
+                f"loss_aux_uncond={loss_aux_uncond_v_last.item():.3e}  "
+                f"loss_kd_cond={loss_kd_cond.item():.4f}  "
+                f"loss_kd_uncond={loss_kd_uncond_v.item():.4f}  "
+                f"loss_pg={loss_pg.item():.4f}"
+                f"{surr_str}  "
+                f"H={H_mean.item():.3f}  tok_H={tok_entropy:.3f}  "
+                f"guided_ratio={use_guided_ratio:.2f}  "
+                f"baseline={baseline_ema:.4f}  "
+                f"mean_logp_tok={logp.mean().item():.3f}"
+            )
+
+        if args.debug_dump > 0 and step % args.debug_dump == 0:
+            _dump_debug(dump_dir, step, x_hat, K)
+
+        if not args.dry_run and step % args.save_every == 0:
+            ckpt_dir = os.path.join(args.out_dir, f"step_{step:06d}")
+            save_checkpoint(student, ckpt_dir, "student")
+            save_checkpoint(aux, ckpt_dir, "aux")
+
+    # -- dry_run: full grad-flow check after the single training step ----
+    if args.dry_run:
+        print("\n[dry_run] Running gradient flow debug …")
+        txt_dbg   = next(inf_loader)
+        B_dbg     = txt_dbg.size(0)
+        x_t_dbg   = torch.randint(0, K, (B_dbg, T, H, W), device=device, dtype=torch.long)
+        txt_u_dbg = (txt_uncond_base.expand(B_dbg, -1)
+                     if args.enable_teacher_cfg else None)
+        debug_grad_flow(
+            teacher, student, aux,
+            txt_dbg, txt_u_dbg, x_t_dbg, latents_shape, device, K, N,
+            args.tau, args.tau_kd, args.enable_teacher_cfg,
+        )
+        _dry_run_patches_789(teacher, latents_shape, K, N, device)
+        print("[dry_run] Done. All checks (1-9) PASSED. Exiting.")
+        return
+
+    # Final save
+    final_dir = os.path.join(args.out_dir, "final")
+    save_checkpoint(student, final_dir, "student")
+    save_checkpoint(aux, final_dir, "aux")
+    print("[done] Training complete.")
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _sample_x_init(B, T, H, W, K, device, x_hat_prev, args):
+    x_init = torch.randint(0, K, (B, T, H, W), device=device, dtype=torch.long)
+    if x_hat_prev is not None and args.p_init_mix_ratio > 0:
+        n_mix = max(1, int(B * args.p_init_mix_ratio))
+        x_init[:n_mix] = corrupt_tokens(x_hat_prev[:n_mix], r=args.p_mix_corrupt_frac, K=K)
+    return x_init
+
+
+def _token_histogram_entropy(x_hat: torch.Tensor, K: int) -> float:
+    counts = x_hat.flatten().bincount(minlength=K).float()
+    p = counts / counts.sum()
+    p = p[p > 0]
+    return float(-(p * p.log()).sum().item())
+
+
+def _dump_debug(dump_dir: str, step: int, x_hat: torch.Tensor, K: int):
+    os.makedirs(dump_dir, exist_ok=True)
+    counts = x_hat.flatten().bincount(minlength=K).tolist()
+    with open(os.path.join(dump_dir, f"step_{step:06d}_hist.json"), "w") as fh:
+        json.dump({"step": step, "counts": counts}, fh)
+    torch.save(x_hat.cpu(), os.path.join(dump_dir, f"step_{step:06d}_xhat.pt"))
+    print(f"[debug_dump] step={step} saved to {dump_dir}")
+
+
+def _run_assertions(
+    x_init, ids_init, rpos_init,
+    z_s, p_s, logp,
+    z_T_cond, z_S_cond, x_t,
+    K, N, B, T, H, W, lm_vocab_size,
+    z_T_uncond=None, z_T_guided=None,
+    dry_run=False,
+):
+    """Full shape / value-domain / consistency assertions (run at step=1)."""
+    print("[assert] Running shape/value assertions …")
+
+    L_plus_N1 = ids_init.size(1)
+    txt_len   = L_plus_N1 - (N + 1)
+
+    # x_init
+    assert x_init.dtype == torch.long, f"x_init dtype={x_init.dtype}"
+    assert x_init.min() >= 0 and x_init.max() < K, \
+        f"x_init out of [0,K): [{x_init.min()}, {x_init.max()}]"
+
+    # input_ids shape & token value ranges
+    assert ids_init.shape == (B, L_plus_N1), f"ids_init.shape={ids_init.shape}"
+    txt_part = ids_init[:, :txt_len]
+    vis_part = ids_init[:, -N:]
+    assert (txt_part < lm_vocab_size).all(), \
+        f"text tokens bleed into visual range (max={txt_part.max()})"
+    assert (vis_part >= lm_vocab_size).all(), \
+        f"visual tokens not shifted (min={vis_part.min()}, lm_vocab_size={lm_vocab_size})"
+    assert (vis_part < lm_vocab_size + K).all(), \
+        f"visual tokens exceed lm_vocab_size+K (max={vis_part.max()})"
+
+    # rope_pos
+    assert rpos_init.shape == (B, L_plus_N1, 3), \
+        f"rope_pos shape={rpos_init.shape} expected ({B},{L_plus_N1},3)"
+
+    # z_s
+    assert z_s.shape == (B, N, K), f"z_s.shape={z_s.shape}"
+    p_err = (p_s.sum(-1) - 1).abs().max().item()
+    assert p_err < 1e-3, f"p_s not normalised: max deviation={p_err:.2e}"
+
+    # logp
+    assert not torch.isnan(logp).any(), "logp contains NaN"
+    assert not torch.isinf(logp).any(), "logp contains Inf"
+
+    # x_t
+    assert x_t.min() >= 0 and x_t.max() < K, \
+        f"x_t out of [0,K) after add_noise: [{x_t.min()}, {x_t.max()}]"
+
+    # Dual-path shape check
+    assert z_T_cond.shape == z_S_cond.shape, \
+        f"Dual-path mismatch: z_T_cond={z_T_cond.shape} z_S_cond={z_S_cond.shape}"
+    assert z_T_cond.shape == (B, N, K), f"z_T_cond.shape={z_T_cond.shape}"
+
+    # z_T logits printout (always in dry_run; also when uncond is available)
+    if dry_run or z_T_uncond is not None:
+        print(
+            f"[assert] z_T_cond   shape={z_T_cond.shape}  "
+            f"min={z_T_cond.min():.3f}  max={z_T_cond.max():.3f}  "
+            f"mean={z_T_cond.mean():.3f}"
+        )
+    if z_T_uncond is not None:
+        assert z_T_uncond.shape == (B, N, K), f"z_T_uncond.shape={z_T_uncond.shape}"
+        print(
+            f"[assert] z_T_uncond shape={z_T_uncond.shape}  "
+            f"min={z_T_uncond.min():.3f}  max={z_T_uncond.max():.3f}  "
+            f"mean={z_T_uncond.mean():.3f}"
+        )
+    if z_T_guided is not None:
+        assert z_T_guided.shape == (B, N, K), f"z_T_guided.shape={z_T_guided.shape}"
+        g_min = z_T_guided.min().item()
+        g_max = z_T_guided.max().item()
+        g_mean = z_T_guided.mean().item()
+        print(
+            f"[assert] z_T_guided shape={z_T_guided.shape}  "
+            f"min={g_min:.3f}  max={g_max:.3f}  mean={g_mean:.3f}"
+        )
+        # Explosion guard: guided logits must be finite and not excessively large.
+        assert not torch.isnan(z_T_guided).any(), "z_T_guided contains NaN"
+        assert not torch.isinf(z_T_guided).any(), "z_T_guided contains Inf"
+        assert abs(g_min) < 1e4 and abs(g_max) < 1e4, (
+            f"z_T_guided magnitude too large: min={g_min:.1e}  max={g_max:.1e}. "
+            f"Reduce --teacher_cfg_scale (currently may amplify outlier logits)."
+        )
+
+    print("[assert] All assertions PASSED ✓")
+
+
+def _sanity_check_forward(teacher, scheduler, latents_shape, device, K, verbose=False):
+    print("[init] Checking logit dimensions …")
+    T, H, W = latents_shape
+    N, B, L = T * H * W, 1, 16
+    dummy_txt = torch.zeros(B, L, dtype=torch.long, device=device)
+    dummy_vis = torch.zeros(B, T, H, W, dtype=torch.long, device=device)
+    with torch.no_grad():
+        ids, rpos, _ = build_ursa_inputs(teacher, dummy_txt, dummy_vis, latents_shape, device)
+        logits = teacher(ids, rope_pos=rpos).sample
+    lm_head_size = teacher.config.lm_head_size
+    lm_vocab     = teacher.config.lm_vocab_size
+    print(
+        f"[init] logits={logits.shape}  K={K}  "
+        f"lm_head={lm_head_size}  lm_vocab={lm_vocab}"
+    )
+    assert ids.shape    == (B, L + N + 1),         f"ids shape {ids.shape}"
+    assert rpos.shape   == (B, L + N + 1, 3),      f"rpos shape {rpos.shape}"
+    z = extract_visual_logits(logits.float(), N, K)
+    assert z.shape      == (B, N, K),               f"z shape {z.shape}"
+    assert lm_head_size >= K,                       f"lm_head_size={lm_head_size} < K={K}"
+    if verbose:
+        print("[init] flex_attn state during sanity check:")
+        _print_flex_attn_state(teacher, "teacher")
+    print("[init] Forward check OK ✓")
+
+
+# ---------------------------------------------------------------------------
+# Dry-run patches 7 / 8 / 9
+# ---------------------------------------------------------------------------
+
+def _dry_run_patches_789(teacher, latents_shape, K, N, device):
+    """Three deep self-checks executed only during --dry_run.
+
+    Patch 7 — extract_visual_logits end-to-end alignment:
+        Run a real teacher forward, manually reconstruct z_manual from raw logits
+        using the latent_shift / codebook_size convention, and assert the result
+        matches extract_visual_logits().  Handles the common URSA case where
+        lm_head outputs K logits directly (latent_shift not applied to logit dim).
+
+    Patch 8 — flex_attn semantics sanity:
+        If the model exposes set_offsets_by_lens, compare visual-logit mean-delta
+        between offsets=None (standard causal) and a single-block offset.  A large
+        delta is expected and confirms that our training correctly uses offsets=None.
+        Gracefully skips when flex_attention is unavailable at runtime.
+
+    Patch 9 — logp / token reshape consistency:
+        With a small (T=3, H=4, W=5) shape, verify x_hat reshape round-trips and
+        spot-check 10 token positions against manually computed log-probability.
+    """
+    T, H, W = latents_shape
+    L_test, B_test = 16, 1
+
+    print("\n" + "=" * 64)
+    print("[patch 7/8/9] Running additional dry_run self-checks …")
+
+    # -------------------------------------------------------------------------
+    # Build shared dummy inputs used by both patch 7 and patch 8
+    # -------------------------------------------------------------------------
+    dummy_txt = torch.zeros(B_test, L_test, dtype=torch.long, device=device)
+    dummy_vis = torch.zeros(B_test, T, H, W, dtype=torch.long, device=device)
+    with torch.no_grad():
+        ids_test, rpos_test, _ = build_ursa_inputs(
+            teacher, dummy_txt, dummy_vis, latents_shape, device)
+        logits_full = teacher(ids_test, rope_pos=rpos_test).sample.float()  # [1, L+N+1, D]
+
+    D            = logits_full.size(-1)          # actual logit last-dim (lm_head_size)
+    latent_shift = teacher.config.lm_vocab_size  # text-vocab offset for input token IDs
+
+    # =========================================================================
+    # Patch 7 — extract_visual_logits end-to-end alignment
+    # =========================================================================
+    print("\n[7] extract_visual_logits end-to-end alignment …")
+    z_vis = extract_visual_logits(logits_full, N, K)   # [1, N, K]
+    assert z_vis.shape == (B_test, N, K), f"z_vis.shape={z_vis.shape}"
+
+    if D >= latent_shift + K:
+        # Full-vocab head: logit dim covers text (0..latent_shift) + visual tokens.
+        z_seq    = logits_full[:, -(N + 1) : -1]            # [1, N, D]
+        z_manual = z_seq[..., latent_shift : latent_shift + K]  # [1, N, K]
+        delta    = (z_vis - z_manual).abs().max().item()
+        print(f"  [7] path=full-vocab  D={D}  latent_shift+K={latent_shift + K}")
+        print(f"  [7] z_vis.shape={z_vis.shape}  max|z_vis - z_manual|={delta:.2e}")
+        assert delta < 1e-5, (
+            f"extract_visual_logits mismatch (full-vocab path): delta={delta:.2e}. "
+            "The function should return logits[..., latent_shift:latent_shift+K]."
+        )
+        print("[7] extract_visual_logits alignment PASSED ✓")
+
+    else:
+        # Common URSA case: lm_head outputs K logits directly (lm_head_size ≈ K).
+        # latent_shift is the input token-ID offset, NOT a logit-dimension offset.
+        # extract_visual_logits handles this as D==K (happy path) or D>K (offset=D-K).
+        z_seq = logits_full[:, -(N + 1) : -1]   # [1, N, D]
+        if D == K:
+            delta = (z_vis - z_seq).abs().max().item()
+            print(
+                f"  [7] SKIP latent_shift formula: D={D} == K={K}  "
+                f"latent_shift={latent_shift}.\n"
+                f"  [7] Explanation: URSA lm_head outputs K visual logits directly.\n"
+                f"  [7]   latent_shift={latent_shift} is the input token-ID shift "
+                f"(raw_code + lm_vocab_size), NOT a logit-dim offset.\n"
+                f"  [7]   extract_visual_logits happy-path: z = logits[:, -(N+1):-1] "
+                f"(no vocab-dim slicing).\n"
+                f"  [7]   Fallback check: z_vis == raw causal slice  "
+                f"max_delta={delta:.2e}"
+            )
+            assert delta < 1e-5, (
+                f"z_vis != raw causal slice when D==K: delta={delta:.2e}"
+            )
+        else:
+            # D > K but D < latent_shift + K  →  extract uses offset = D - K
+            offset   = D - K
+            z_manual = z_seq[..., offset:]
+            delta    = (z_vis - z_manual).abs().max().item()
+            print(
+                f"  [7] SKIP latent_shift formula: D={D} < latent_shift+K={latent_shift + K}.\n"
+                f"  [7] extract_visual_logits uses offset={offset} (D-K). "
+                f"max_delta={delta:.2e}"
+            )
+            assert delta < 1e-5, (
+                f"z_vis != z_seq[..., D-K:]: delta={delta:.2e}"
+            )
+        print("[7] extract_visual_logits alignment PASSED (fallback path) ✓")
+
+    # =========================================================================
+    # Patch 8 — flex_attn semantics sanity
+    # =========================================================================
+    print("\n[8] flex_attn semantics sanity …")
+    fa = _probe_flex_attn(teacher)
+    if fa is None or not hasattr(fa, "set_offsets_by_lens"):
+        print("  [8] flex_attn.set_offsets_by_lens not available — skip")
+        print("[8] flex_attn semantics sanity PASSED (skipped — no flex_attn) ✓")
+    else:
+        L_total   = ids_test.size(1)          # L_test + N + 1
+        txt_block = L_test + (N + 1)          # single-block: all tokens in one block
+        block_lens = [txt_block]
+
+        try:
+            # Forward A: offsets=None — standard causal attention (our training config)
+            _reset_flex_attn(teacher, "teacher")
+            with torch.no_grad():
+                logits_A = teacher(ids_test, rope_pos=rpos_test).sample.float()
+            z_A = extract_visual_logits(logits_A, N, K)
+
+            # Forward B: set_offsets_by_lens with a single block.
+            # A single block causes the mask to allow full (bidirectional) attention
+            # within the block, which differs from standard causal attention.
+            fa.set_offsets_by_lens(block_lens)
+            with torch.no_grad():
+                logits_B = teacher(ids_test, rope_pos=rpos_test).sample.float()
+            z_B = extract_visual_logits(logits_B, N, K)
+
+            delta_mean = (z_A - z_B).abs().mean().item()
+            delta_max  = (z_A - z_B).abs().max().item()
+            print(
+                f"  [8] offsets=None  vs  set_offsets_by_lens({block_lens}):\n"
+                f"  [8]   mean_abs_delta={delta_mean:.4e}  max_abs_delta={delta_max:.4e}"
+            )
+            if delta_mean > 1e-3:
+                print(
+                    f"  [8] WARNING: mean_delta={delta_mean:.2e} > 1e-3.\n"
+                    "  [8]   Single-block flex_attn uses FULL (bidirectional) attention\n"
+                    "  [8]   inside the block, whereas offsets=None gives standard CAUSAL\n"
+                    "  [8]   attention. This difference is EXPECTED — it confirms our\n"
+                    "  [8]   training correctly uses offsets=None (no packed sequences)."
+                )
+            else:
+                print(f"  [8] delta ≤ 1e-3: attention semantics equivalent for this input.")
+            print("[8] flex_attn semantics sanity PASSED ✓")
+
+        except (NotImplementedError, RuntimeError, Exception) as exc:
+            print(f"  [8] flex_attn runtime not available ({type(exc).__name__}: {exc}) — skip")
+            print("[8] flex_attn semantics sanity PASSED (runtime skip) ✓")
+        finally:
+            _reset_flex_attn(teacher, "teacher")   # always restore clean state
+
+    # =========================================================================
+    # Patch 9 — logp / token reshape consistency
+    # =========================================================================
+    print("\n[9] logp/token reshape consistency …")
+    T9, H9, W9 = 3, 4, 5
+    N9, B9     = T9 * H9 * W9, 1      # 60 tokens, batch=1
+
+    torch.manual_seed(99)
+    z9 = torch.randn(B9, N9, K)
+    p9 = F.softmax(z9 / 1.0, dim=-1)  # [1, 60, K]; each row sums to 1
+
+    # ----- token sampling ---------------------------------------------------
+    x_hat_flat = torch.multinomial(p9.view(-1, K), 1)      # [N9, 1]  (1 sample per row)
+    x_hat_1d   = x_hat_flat.view(B9, N9)                   # [1, 60]
+    x_hat_4d   = x_hat_1d.view(B9, T9, H9, W9)            # [1, 3, 4, 5]
+
+    # reshape round-trip: 1d → 4d → 1d must be lossless
+    x_hat_back = x_hat_4d.view(B9, N9)
+    assert torch.equal(x_hat_1d, x_hat_back), (
+        f"reshape round-trip FAILED: x_hat_1d != x_hat_4d.view(B,N)\n"
+        f"  x_hat_1d.shape={x_hat_1d.shape}  x_hat_back.shape={x_hat_back.shape}"
+    )
+
+    # ----- logp computation (mirrors training code) -------------------------
+    # logp_all[b, n] = log p9[b, n, x_hat_1d[b, n]]
+    logp_all = (
+        p9.clamp(1e-8).log()
+        .gather(-1, x_hat_1d.unsqueeze(-1))
+        .squeeze(-1)
+    )  # [B9, N9]
+    logp_sum = logp_all.sum(-1)  # [B9]
+
+    # ----- spot-check 10 random token positions -----------------------------
+    torch.manual_seed(7)
+    positions = torch.randperm(N9)[:10].tolist()
+    for pos in positions:
+        tok_id    = x_hat_1d[0, pos].item()
+        logp_man  = math.log(max(p9[0, pos, tok_id].item(), 1e-8))
+        logp_gat  = logp_all[0, pos].item()
+        diff      = abs(logp_man - logp_gat)
+        assert diff < 1e-6, (
+            f"logp mismatch at pos={pos}, tok={tok_id}: "
+            f"manual={logp_man:.8f}  gathered={logp_gat:.8f}  diff={diff:.2e}"
+        )
+
+    # check logp_sum matches sum of logp_all
+    logp_sum_manual = logp_all[0].sum().item()
+    assert abs(logp_sum.item() - logp_sum_manual) < 1e-5, \
+        f"logp_sum mismatch: {logp_sum.item():.6f} vs {logp_sum_manual:.6f}"
+
+    print(
+        f"  [9] T={T9},H={H9},W={W9}  N={N9}  K={K}  "
+        f"x_hat reshape round-trip ✓  "
+        f"10 logp spot-checks (pos={positions}) ✓  "
+        f"logp_sum={logp_sum.item():.3f}"
+    )
+    print("[9] logp/token reshape consistency PASSED ✓")
+
+    print("\n" + "=" * 64)
+    print("[patch 7/8/9] All 3 additional dry_run checks PASSED ✓")
+    print("=" * 64)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/URSA/setup.py b/URSA/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2da6b463fa6227cfd88955cbf46de7aae188332
--- /dev/null
+++ b/URSA/setup.py
@@ -0,0 +1,133 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2024-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Python setup script."""
+
+import argparse
+import os
+import shutil
+import subprocess
+import sys
+
+import setuptools
+import setuptools.command.build_py
+import setuptools.command.install
+
+
+def parse_args():
+    """Parse arguments."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--version", default=None)
+    args, unknown = parser.parse_known_args()
+    sys.argv = [sys.argv[0]] + unknown
+    args.git_version = None
+    args.long_description = ""
+    if args.version is None and os.path.exists("version.txt"):
+        with open("version.txt", "r") as f:
+            args.version = f.read().strip()
+    if os.path.exists(".git"):
+        try:
+            git_version = subprocess.check_output(["git", "rev-parse", "HEAD"], cwd="./")
+            args.git_version = git_version.decode("ascii").strip()
+        except (OSError, subprocess.CalledProcessError):
+            pass
+    if os.path.exists("README.md"):
+        with open(os.path.join("README.md"), encoding="utf-8") as f:
+            args.long_description = f.read()
+    return args
+
+
+def clean_builds():
+    for path in ["build", "diffnext.egg-info"]:
+        if os.path.exists(path):
+            shutil.rmtree(path)
+
+
+def find_packages(top):
+    """Return the python sources installed to package."""
+    packages = []
+    for root, _, _ in os.walk(top):
+        if os.path.exists(os.path.join(root, "__init__.py")):
+            packages.append(root)
+    return packages
+
+
+def find_package_data():
+    """Return the external data installed to package."""
+    return []
+
+
+class BuildPyCommand(setuptools.command.build_py.build_py):
+    """Enhanced 'build_py' command."""
+
+    def build_packages(self):
+        with open("diffnext/version.py", "w") as f:
+            f.write(
+                'version = "{}"\n'
+                'git_version = "{}"\n'
+                "__version__ = version\n".format(args.version, args.git_version)
+            )
+        super(BuildPyCommand, self).build_packages()
+
+    def build_package_data(self):
+        self.package_data = {"diffnext": find_package_data()}
+        super(BuildPyCommand, self).build_package_data()
+
+
+class InstallCommand(setuptools.command.install.install):
+    """Enhanced 'install' command."""
+
+    def initialize_options(self):
+        super(InstallCommand, self).initialize_options()
+        self.old_and_unmanageable = True
+
+
+args = parse_args()
+setuptools.setup(
+    name="diffnext",
+    version=args.version,
+    description="A diffusers based library for autoregressive diffusion models.",
+    long_description=args.long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/baaivision/URSA",
+    author="BAAI",
+    license="Apache License",
+    packages=find_packages("diffnext"),
+    cmdclass={"build_py": BuildPyCommand, "install": InstallCommand},
+    install_requires=[
+        "torch",
+        "diffusers",
+        "transformers",
+        "accelerate",
+        "imageio",
+        "imageio-ffmpeg",
+        "omegaconf",
+        "wandb",
+        "scipy",
+    ],
+    classifiers=[
+        "Development Status :: 5 - Production/Stable",
+        "Intended Audience :: Developers",
+        "Intended Audience :: Education",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: Apache Software License",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3 :: Only",
+        "Topic :: Scientific/Engineering",
+        "Topic :: Scientific/Engineering :: Mathematics",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    ],
+)
+clean_builds()
diff --git a/URSA/src/__init__.py b/URSA/src/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/tom/ursa.jpg b/URSA/tom/ursa.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c85b83057769f3ee78c5ef87051f0ff836819339
Binary files /dev/null and b/URSA/tom/ursa.jpg differ
diff --git a/URSA/ursa.jpg b/URSA/ursa.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..25edbb886a12086e5736605f81fee22a3605609b
Binary files /dev/null and b/URSA/ursa.jpg differ
diff --git a/URSA/version.txt b/URSA/version.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c181bf5996673f39c9806ebc7b202f8c36f0de1c
--- /dev/null
+++ b/URSA/version.txt
@@ -0,0 +1 @@
+0.3.0a0
diff --git a/hf_upload2.log b/hf_upload2.log
new file mode 100644
index 0000000000000000000000000000000000000000..4de951f2e6341cabc946f136f930f1bb18f2fa5a
--- /dev/null
+++ b/hf_upload2.log
@@ -0,0 +1,34107 @@
+nohup: ignoring input
+Repo created: https://huggingface.co/BryanW/43.wm
+Found 779 candidate files to upload
+Running validation checks on files to upload...
+Validation checks complete.
+Starting upload...
+[33mYou are about to upload a large folder to the Hub using `hf upload-large-folder`. This is a new feature so feedback is very welcome!
+
+A few things to keep in mind:
+  - Repository limits still apply: https://huggingface.co/docs/hub/repositories-recommendations
+  - Do not start several processes in parallel.
+  - You can interrupt and resume the process at any time. The script will pick up where it left off except for partially uploaded files that would have to be entirely reuploaded.
+  - Do not upload the same folder to several repositories. If you need to do so, you must delete the `./.cache/huggingface/` folder first.
+
+Some temporary metadata will be stored under `/gfs/space/private/fengzl/World_Model/.cache/huggingface`.
+  - You must not modify those files manually.
+  - You must not delete the `./.cache/huggingface/` folder while a process is running.
+  - You can delete the `./.cache/huggingface/` folder to reinitialize the upload state when process is not running. Files will have to be hashed and preuploaded again, except for already committed files.
+
+If the process output is too verbose, you can disable the progress bars with `--no-bars`. You can also entirely disable the status report with `--no-report`.
+
+For more details, run `hf upload-large-folder --help` or check the documentation at https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-large-folder.[0m
+Recovering from metadata files:   0%|          | 0/779 [00:00<?, ?it/s]Recovering from metadata files:  26%|██▋       | 206/779 [00:00<00:00, 2053.79it/s]Recovering from metadata files:  53%|█████▎    | 412/779 [00:00<00:00, 2027.53it/s]Recovering from metadata files:  79%|███████▉  | 615/779 [00:00<00:00, 1981.06it/s]Recovering from metadata files: 100%|██████████| 779/779 [00:00<00:00, 1982.28it/s]
+Successfully committed 50 at once. Increasing the limit for next batch.
+
+
+
+---------- 2026-03-23 23:56:58 (0:00:00) ----------
+Files:   hashed 3/779 (8.7K/92.8G) | pre-uploaded: 0/0 (0.0/92.8G) (+779 unsure) | committed: 0/779 (0.0/92.8G) | ignored: 0
+Workers: hashing: 15 | get upload mode: 1 | pre-uploading: 0 | committing: 0 | waiting: 0
+---------------------------------------------------
+Processing Files (0 / 0)      : |          |  0.00B /  0.00B            
+New Data Upload               : |          |  0.00B /  0.00B            [AProcessing Files (0 / 0)      : |          |  0.00B /  0.00B            
+New Data Upload               : |          |  0.00B /  0.00B            [AProcessing Files (0 / 0)      : |          |  0.00B /  0.00B            
+New Data Upload               : |          |  0.00B /  0.00B            [AProcessing Files (0 / 0)      : |          |  0.00B /  0.00B            
+New Data Upload               : |          |  0.00B /  0.00B            [AProcessing Files (0 / 0)      : |          |  0.00B /  0.00B            
+New Data Upload               : |          |  0.00B /  0.00B            [AProcessing Files (0 / 0)      : |          |  0.00B /  0.00B            
+New Data Upload               : |          |  0.00B /  0.00B            [A
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  201kB /  201kB            [A[A
+
+  ...o_test/step_001500/aux.pt:   0%|          | 16.0MB / 3.96GB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  140kB /  140kB            [A[A[A
+
+  ...o_test/step_001500/aux.pt:   0%|          | 16.0MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   0%|          | 16.0MB / 3.96GB, 6.66MB/s  
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  139kB /  139kB            [A[A[A[A
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  198kB /  198kB            [A[A[A[A[A
+
+
+
+
+
+  ..._a_teacher_25step_cfg.mp4: 100%|██████████|  102kB /  102kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...n__teacher_25step_cfg.mp4: 100%|██████████|  104kB /  104kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  141kB /  141kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...teacher_25step_no_cfg.mp4: 100%|██████████|  100kB /  100kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...teacher_25step_no_cfg.mp4: 100%|██████████|  102kB /  102kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ..._s_teacher_25step_cfg.mp4: 100%|██████████|  101kB /  101kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  201kB /  201kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  140kB /  140kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  139kB /  139kB            [A[A[A[A
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  198kB /  198kB            [A[A[A[A[A
+
+
+
+
+
+  ..._a_teacher_25step_cfg.mp4: 100%|██████████|  102kB /  102kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...n__teacher_25step_cfg.mp4: 100%|██████████|  104kB /  104kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  141kB /  141kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...teacher_25step_no_cfg.mp4: 100%|██████████|  100kB /  100kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...teacher_25step_no_cfg.mp4: 100%|██████████|  102kB /  102kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 95 files]                :  10%|█         | 24.3MB /  242MB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (104 / 105)  :  11%|█         | 25.5MB /  243MB, 10.6MB/s  Processing Files (0 / 0)      : |          |  0.00B /  0.00B            
+New Data Upload               : |          |  0.00B /  0.00B            [AProcessing Files (0 / 0)      : |          |  0.00B /  0.00B            
+New Data Upload               : |          |  0.00B /  0.00B            [AProcessing Files (0 / 0)      : |          |  0.00B /  0.00B            
+New Data Upload               : |          |  0.00B /  0.00B            [A
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:   2%|▏         | 64.0MB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:   1%|▏         | 72.0MB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:   1%|▏         | 63.9MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:   1%|▏         | 64.0MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:   1%|▏         | 63.9MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:   2%|▏         | 64.0MB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:   1%|▏         | 72.0MB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:   1%|▏         | 63.9MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:   1%|▏         | 64.0MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:   1%|▏         | 63.9MB / 4.89GB            [A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 9)      :   1%|          |  329MB / 48.0GB,  137MB/s  Processing Files (0 / 0)      : |          |  0.00B /  0.00B            
+New Data Upload               : |          |  0.00B /  0.00B            [A
+
+  ...st/step_002000/student.pt:   2%|▏         | 88.0MB / 3.96GB            [A[A
+
+  ...st/step_002000/student.pt:   2%|▏         | 88.0MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   2%|▏         | 88.0MB / 3.96GB,  147MB/s  Processing Files (0 / 0)      : |          |  0.00B /  0.00B            
+New Data Upload               : |          |  0.00B /  0.00B            [AProcessing Files (0 / 0)      : |          |  0.00B /  0.00B            
+New Data Upload               : |          |  0.00B /  0.00B            [A
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  136kB /  136kB            [A[A[A[A[A
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  399kB /  399kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  210kB /  210kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  152kB /  152kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...pytorch_model.safetensors:  50%|████▉     |  112MB /  225MB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_001500/aux.pt:   3%|▎         |  112MB / 3.96GB            [A[AProcessing Files (104 / 105)  :  53%|█████▎    |  130MB /  243MB, 49.8MB/s  Processing Files (0 / 1)      :   3%|▎         |  112MB / 3.96GB, 43.0MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:   4%|▍         |  168MB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:   3%|▎         |  168MB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:   3%|▎         |  168MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:   3%|▎         |  160MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:   3%|▎         |  160MB / 4.89GB            [A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 9)      :   2%|▏         |  825MB / 48.0GB,  317MB/s  
+
+  ...st/step_002000/student.pt:   5%|▍         |  190MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   5%|▍         |  190MB / 3.96GB,  238MB/s  
+
+  ...o_test/step_001500/aux.pt:   5%|▌         |  199MB / 3.96GB            [A[A
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  136kB /  136kB            [A[A[A[A[A
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  399kB /  399kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  210kB /  210kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  152kB /  152kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...pytorch_model.safetensors:  92%|█████████▏|  208MB /  225MB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :   5%|▌         |  199MB / 3.96GB, 71.2MB/s  Processing Files (104 / 105)  :  93%|█████████▎|  225MB /  243MB, 80.5MB/s  
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:   0%|          | 16.0MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:   6%|▋         |  256MB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:   5%|▌         |  256MB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:   5%|▌         |  248MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:   5%|▌         |  248MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:   5%|▌         |  248MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:   0%|          | 16.0MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :   3%|▎         | 1.27GB / 48.0GB,  454MB/s  
+
+  ...st/step_002000/student.pt:   7%|▋         |  264MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   7%|▋         |  264MB / 3.96GB,  264MB/s  
+
+  ...o_test/step_001500/aux.pt:   7%|▋         |  283MB / 3.96GB            [A[A
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  136kB /  136kB            [A[A[A[A[A
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  399kB /  399kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  210kB /  210kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  152kB /  152kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :   7%|▋         |  283MB / 3.96GB, 94.4MB/s  Processing Files (105 / 105)  : 100%|██████████|  243MB /  243MB, 81.0MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:   8%|▊         |  336MB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:   7%|▋         |  336MB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:   7%|▋         |  320MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:   7%|▋         |  328MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:   7%|▋         |  320MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:   2%|▏         | 87.9MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :   4%|▎         | 1.73GB / 48.0GB,  576MB/s  
+
+  ...st/step_002000/student.pt:   8%|▊         |  330MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   8%|▊         |  330MB / 3.96GB,  275MB/s  
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  136kB /  136kB            [A[A[A[A[A
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  399kB /  399kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  210kB /  210kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  152kB /  152kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_001500/aux.pt:   9%|▉         |  360MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   9%|▉         |  360MB / 3.96GB,  112MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  10%|█         |  408MB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:   8%|▊         |  400MB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:   8%|▊         |  392MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:   8%|▊         |  408MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:   8%|▊         |  384MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:   3%|▎         |  152MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :   4%|▍         | 2.14GB / 48.0GB,  670MB/s  
+
+  ...st/step_002000/student.pt:  10%|█         |  399MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  10%|█         |  399MB / 3.96GB,  285MB/s  
+
+  ...o_test/step_001500/aux.pt:  11%|█         |  427MB / 3.96GB            [A[A
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  136kB /  136kB            [A[A[A[A[A
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  399kB /  399kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  210kB /  210kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  152kB /  152kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  11%|█         |  427MB / 3.96GB,  126MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  12%|█▏        |  488MB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  10%|▉         |  480MB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:   9%|▉         |  464MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  10%|▉         |  480MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:   9%|▉         |  464MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:   5%|▍         |  224MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :   5%|▌         | 2.60GB / 48.0GB,  765MB/s  
+
+  ...st/step_002000/student.pt:  12%|█▏        |  479MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  12%|█▏        |  479MB / 3.96GB,  299MB/s  
+
+  ...o_test/step_001500/aux.pt:  13%|█▎        |  499MB / 3.96GB            [A[A
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  136kB /  136kB            [A[A[A[A[A
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  399kB /  399kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  210kB /  210kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  152kB /  152kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  13%|█▎        |  499MB / 3.96GB,  139MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  14%|█▍        |  568MB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  11%|█▏        |  560MB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  11%|█         |  544MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  11%|█▏        |  560MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  11%|█         |  544MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:   6%|▌         |  304MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :   6%|▋         | 3.08GB / 48.0GB,  856MB/s  
+
+  ...st/step_002000/student.pt:  14%|█▍        |  559MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  14%|█▍        |  559MB / 3.96GB,  311MB/s  
+
+  ...o_test/step_001500/aux.pt:  15%|█▍        |  579MB / 3.96GB            [A[A
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  136kB /  136kB            [A[A[A[A[A
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  399kB /  399kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  210kB /  210kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  152kB /  152kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  15%|█▍        |  579MB / 3.96GB,  152MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  16%|█▋        |  648MB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  13%|█▎        |  632MB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  13%|█▎        |  632MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  13%|█▎        |  648MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  13%|█▎        |  632MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:   8%|▊         |  392MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :   7%|▋         | 3.58GB / 48.0GB,  943MB/s  
+
+  ...st/step_002000/student.pt:  16%|█▋        |  647MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  16%|█▋        |  647MB / 3.96GB,  324MB/s  
+
+  ...o_test/step_001500/aux.pt:  17%|█▋        |  667MB / 3.96GB            [A[A
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  136kB /  136kB            [A[A[A[A[A
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  399kB /  399kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  210kB /  210kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  152kB /  152kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  17%|█▋        |  667MB / 3.96GB,  167MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  18%|█▊        |  728MB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  15%|█▍        |  712MB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  15%|█▍        |  712MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  15%|█▍        |  728MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  15%|█▍        |  712MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:   9%|▉         |  464MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :   8%|▊         | 4.06GB / 48.0GB, 1.01GB/s  
+
+  ...st/step_002000/student.pt:  18%|█▊        |  727MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  18%|█▊        |  727MB / 3.96GB,  331MB/s  
+
+  ...o_test/step_001500/aux.pt:  19%|█▊        |  739MB / 3.96GB            [A[A
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  136kB /  136kB            [A[A[A[A[A
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  399kB /  399kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  210kB /  210kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  152kB /  152kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  19%|█▊        |  739MB / 3.96GB,  176MB/s  Successfully committed 75 at once. Increasing the limit for next batch.
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  20%|██        |  808MB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  16%|█▌        |  792MB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  16%|█▌        |  784MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  17%|█▋        |  808MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  16%|█▌        |  792MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  11%|█         |  544MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :   9%|▉         | 4.53GB / 48.0GB, 1.08GB/s  
+
+  ...st/step_002000/student.pt:  20%|██        |  799MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  20%|██        |  799MB / 3.96GB,  333MB/s  
+
+  ...o_test/step_001500/aux.pt:  20%|██        |  811MB / 3.96GB            [A[A
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  136kB /  136kB            [A[A[A[A[A
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  399kB /  399kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  210kB /  210kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  152kB /  152kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  20%|██        |  811MB / 3.96GB,  184MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  22%|██▏       |  888MB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  18%|█▊        |  872MB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  18%|█▊        |  864MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  18%|█▊        |  888MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  18%|█▊        |  872MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  13%|█▎        |  624MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  10%|█         | 5.01GB / 48.0GB, 1.14GB/s  
+
+  ...st/step_002000/student.pt:  22%|██▏       |  879MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  22%|██▏       |  879MB / 3.96GB,  338MB/s  
+
+  ...o_test/step_001500/aux.pt:  22%|██▏       |  883MB / 3.96GB            [A[A
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  136kB /  136kB            [A[A[A[A[A
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  399kB /  399kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  210kB /  210kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  152kB /  152kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  22%|██▏       |  883MB / 3.96GB,  192MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  25%|██▍       |  976MB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  19%|█▉        |  952MB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  19%|█▉        |  944MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  20%|█▉        |  960MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  19%|█▉        |  952MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  14%|█▍        |  704MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  11%|█▏        | 5.49GB / 48.0GB, 1.19GB/s  
+
+  ...st/step_002000/student.pt:  24%|██▍       |  959MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  24%|██▍       |  959MB / 3.96GB,  342MB/s  
+
+  ...o_test/step_001500/aux.pt:  24%|██▍       |  971MB / 3.96GB            [A[A
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  136kB /  136kB            [A[A[A[A[A
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  399kB /  399kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  210kB /  210kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  152kB /  152kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  24%|██▍       |  971MB / 3.96GB,  202MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  27%|██▋       | 1.06GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  21%|██▏       | 1.04GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  21%|██        | 1.02GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  21%|██▏       | 1.04GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  21%|██        | 1.03GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  16%|█▌        |  784MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  12%|█▏        | 5.98GB / 48.0GB, 1.24GB/s  
+
+  ...st/step_002000/student.pt:  26%|██▌       | 1.04GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  26%|██▌       | 1.04GB / 3.96GB,  346MB/s  
+
+  ...o_test/step_001500/aux.pt:  27%|██▋       | 1.05GB / 3.96GB            [A[A
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  136kB /  136kB            [A[A[A[A[A
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  399kB /  399kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  210kB /  210kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  152kB /  152kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  27%|██▋       | 1.05GB / 3.96GB,  210MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  29%|██▉       | 1.15GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  23%|██▎       | 1.13GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  23%|██▎       | 1.10GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  23%|██▎       | 1.13GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  23%|██▎       | 1.12GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  18%|█▊        |  872MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  14%|█▎        | 6.50GB / 48.0GB, 1.30GB/s  
+
+  ...st/step_002000/student.pt:  28%|██▊       | 1.13GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  28%|██▊       | 1.13GB / 3.96GB,  352MB/s  
+
+  ...o_test/step_001500/aux.pt:  29%|██▊       | 1.14GB / 3.96GB            [A[A
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  136kB /  136kB            [A[A[A[A[A
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  399kB /  399kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  210kB /  210kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  152kB /  152kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  29%|██▊       | 1.14GB / 3.96GB,  219MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  31%|███       | 1.23GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  25%|██▍       | 1.22GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  24%|██▍       | 1.19GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  25%|██▍       | 1.22GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  25%|██▍       | 1.20GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  20%|█▉        |  960MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  15%|█▍        | 7.02GB / 48.0GB, 1.35GB/s  
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  136kB /  136kB            [A[A[A[A[A
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  399kB /  399kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  210kB /  210kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  152kB /  152kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt:  30%|███       | 1.21GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  30%|███       | 1.21GB / 3.96GB,  355MB/s  
+
+  ...o_test/step_001500/aux.pt:  31%|███       | 1.23GB / 3.96GB            [A[A
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  136kB /  136kB            [A[A[A[A[A
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  399kB /  399kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  210kB /  210kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  152kB /  152kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  31%|███       | 1.23GB / 3.96GB,  227MB/s  Processing Files (105 / 105)  : 100%|██████████|  243MB /  243MB, 45.0MB/s  
+New Data Upload               : |          |  0.00B /  0.00B,  0.00B/s  
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  136kB /  136kB            
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  399kB /  399kB            
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  210kB /  210kB            
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  152kB /  152kB            
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  33%|███▎      | 1.31GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  27%|██▋       | 1.30GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  26%|██▋       | 1.29GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  27%|██▋       | 1.30GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  26%|██▌       | 1.27GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  21%|██▏       | 1.05GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  16%|█▌        | 7.52GB / 48.0GB, 1.39GB/s  
+
+  ...st/step_002000/student.pt:  33%|███▎      | 1.29GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  33%|███▎      | 1.29GB / 3.96GB,  360MB/s  
+
+  ...o_test/step_001500/aux.pt:  33%|███▎      | 1.31GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  33%|███▎      | 1.31GB / 3.96GB,  235MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  35%|███▌      | 1.40GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  28%|██▊       | 1.38GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  28%|██▊       | 1.37GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  28%|██▊       | 1.38GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  28%|██▊       | 1.36GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  23%|██▎       | 1.14GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  17%|█▋        | 8.03GB / 48.0GB, 1.43GB/s  
+
+  ...st/step_002000/student.pt:  35%|███▍      | 1.37GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  35%|███▍      | 1.37GB / 3.96GB,  362MB/s  
+
+  ...o_test/step_001500/aux.pt:  35%|███▌      | 1.39GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  35%|███▌      | 1.39GB / 3.96GB,  240MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  37%|███▋      | 1.46GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  30%|██▉       | 1.46GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  29%|██▉       | 1.44GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  30%|██▉       | 1.46GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  29%|██▉       | 1.44GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  25%|██▍       | 1.22GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  18%|█▊        | 8.47GB / 48.0GB, 1.46GB/s  
+
+  ...st/step_002000/student.pt:  37%|███▋      | 1.45GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  37%|███▋      | 1.45GB / 3.96GB,  364MB/s  
+
+  ...o_test/step_001500/aux.pt:  37%|███▋      | 1.47GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  37%|███▋      | 1.47GB / 3.96GB,  244MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  39%|███▉      | 1.55GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  32%|███▏      | 1.54GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  31%|███       | 1.52GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  31%|███▏      | 1.54GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  31%|███       | 1.52GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  27%|██▋       | 1.30GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  19%|█▊        | 8.98GB / 48.0GB, 1.50GB/s  
+
+  ...st/step_002000/student.pt:  39%|███▊      | 1.53GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  39%|███▊      | 1.53GB / 3.96GB,  365MB/s  
+
+  ...o_test/step_001500/aux.pt:  39%|███▉      | 1.54GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  39%|███▉      | 1.54GB / 3.96GB,  248MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  41%|████      | 1.63GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  33%|███▎      | 1.62GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  33%|███▎      | 1.61GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  33%|███▎      | 1.61GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  33%|███▎      | 1.60GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  28%|██▊       | 1.38GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  20%|█▉        | 9.46GB / 48.0GB, 1.52GB/s  
+
+  ...st/step_002000/student.pt:  41%|████      | 1.61GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  41%|████      | 1.61GB / 3.96GB,  367MB/s  
+
+  ...o_test/step_001500/aux.pt:  41%|████      | 1.63GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  41%|████      | 1.63GB / 3.96GB,  254MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  43%|████▎     | 1.71GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  35%|███▌      | 1.71GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  34%|███▍      | 1.68GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  34%|███▍      | 1.68GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  34%|███▍      | 1.67GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  30%|██▉       | 1.46GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  21%|██        | 9.91GB / 48.0GB, 1.55GB/s  
+
+  ...st/step_002000/student.pt:  43%|████▎     | 1.69GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  43%|████▎     | 1.69GB / 3.96GB,  367MB/s  
+
+  ...o_test/step_001500/aux.pt:  43%|████▎     | 1.71GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  43%|████▎     | 1.71GB / 3.96GB,  259MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  45%|████▌     | 1.78GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  36%|███▋      | 1.78GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  36%|███▌      | 1.75GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  36%|███▌      | 1.74GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  36%|███▌      | 1.74GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  31%|███▏      | 1.54GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  22%|██▏       | 10.3GB / 48.0GB, 1.57GB/s  
+
+  ...st/step_002000/student.pt:  44%|████▍     | 1.76GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  44%|████▍     | 1.76GB / 3.96GB,  366MB/s  
+
+  ...o_test/step_001500/aux.pt:  45%|████▌     | 1.79GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  45%|████▌     | 1.79GB / 3.96GB,  263MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  47%|████▋     | 1.86GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  38%|███▊      | 1.86GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  37%|███▋      | 1.82GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  37%|███▋      | 1.82GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  37%|███▋      | 1.82GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  33%|███▎      | 1.61GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  23%|██▎       | 10.8GB / 48.0GB, 1.59GB/s  
+
+  ...st/step_002000/student.pt:  46%|████▌     | 1.83GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  46%|████▌     | 1.83GB / 3.96GB,  366MB/s  
+
+  ...o_test/step_001500/aux.pt:  47%|████▋     | 1.86GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  47%|████▋     | 1.86GB / 3.96GB,  266MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  49%|████▉     | 1.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  40%|███▉      | 1.94GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  39%|███▉      | 1.90GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  39%|███▉      | 1.90GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  39%|███▉      | 1.90GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  35%|███▍      | 1.69GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  23%|██▎       | 11.3GB / 48.0GB, 1.61GB/s  
+
+  ...st/step_002000/student.pt:  48%|████▊     | 1.90GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  48%|████▊     | 1.90GB / 3.96GB,  366MB/s  
+
+  ...o_test/step_001500/aux.pt:  49%|████▉     | 1.94GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  49%|████▉     | 1.94GB / 3.96GB,  269MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  52%|█████▏    | 2.05GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  41%|████      | 2.02GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  41%|████      | 1.98GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  41%|████      | 1.98GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  41%|████      | 1.99GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  36%|███▋      | 1.78GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  25%|██▍       | 11.8GB / 48.0GB, 1.64GB/s  
+
+  ...st/step_002000/student.pt:  50%|█████     | 1.99GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  50%|█████     | 1.99GB / 3.96GB,  369MB/s  
+
+  ...o_test/step_001500/aux.pt:  51%|█████     | 2.03GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  51%|█████     | 2.03GB / 3.96GB,  274MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  54%|█████▍    | 2.14GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  43%|████▎     | 2.11GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  42%|████▏     | 2.07GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  42%|████▏     | 2.07GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  43%|████▎     | 2.08GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  38%|███▊      | 1.86GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  26%|██▌       | 12.3GB / 48.0GB, 1.67GB/s  
+
+  ...st/step_002000/student.pt:  52%|█████▏    | 2.08GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  52%|█████▏    | 2.08GB / 3.96GB,  371MB/s  
+
+  ...o_test/step_001500/aux.pt:  53%|█████▎    | 2.11GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  53%|█████▎    | 2.11GB / 3.96GB,  277MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  56%|█████▌    | 2.22GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  45%|████▍     | 2.18GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  44%|████▍     | 2.15GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  44%|████▍     | 2.15GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  44%|████▍     | 2.15GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  40%|███▉      | 1.94GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  27%|██▋       | 12.8GB / 48.0GB, 1.68GB/s  
+
+  ...st/step_002000/student.pt:  54%|█████▍    | 2.16GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  54%|█████▍    | 2.16GB / 3.96GB,  372MB/s  
+
+  ...o_test/step_001500/aux.pt:  55%|█████▌    | 2.19GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  55%|█████▌    | 2.19GB / 3.96GB,  280MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  58%|█████▊    | 2.30GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  46%|████▋     | 2.26GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  46%|████▌     | 2.23GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  46%|████▌     | 2.23GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  46%|████▌     | 2.23GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  41%|████      | 2.02GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  28%|██▊       | 13.3GB / 48.0GB, 1.70GB/s  
+
+  ...st/step_002000/student.pt:  56%|█████▋    | 2.24GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  56%|█████▋    | 2.24GB / 3.96GB,  373MB/s  
+
+  ...o_test/step_001500/aux.pt:  57%|█████▋    | 2.27GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  57%|█████▋    | 2.27GB / 3.96GB,  283MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  60%|█████▉    | 2.38GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  48%|████▊     | 2.34GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  47%|████▋     | 2.31GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  47%|████▋     | 2.30GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  47%|████▋     | 2.31GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  43%|████▎     | 2.10GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  29%|██▊       | 13.7GB / 48.0GB, 1.72GB/s  
+
+  ...st/step_002000/student.pt:  58%|█████▊    | 2.32GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  58%|█████▊    | 2.32GB / 3.96GB,  374MB/s  
+
+  ...o_test/step_001500/aux.pt:  59%|█████▉    | 2.35GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  59%|█████▉    | 2.35GB / 3.96GB,  286MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  62%|██████▏   | 2.46GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  50%|████▉     | 2.42GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  49%|████▉     | 2.39GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  49%|████▉     | 2.38GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  49%|████▉     | 2.39GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  44%|████▍     | 2.18GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  30%|██▉       | 14.2GB / 48.0GB, 1.73GB/s  
+
+  ...st/step_002000/student.pt:  61%|██████    | 2.40GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  61%|██████    | 2.40GB / 3.96GB,  375MB/s  
+
+  ...o_test/step_001500/aux.pt:  61%|██████    | 2.42GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  61%|██████    | 2.42GB / 3.96GB,  288MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  64%|██████▍   | 2.54GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  51%|█████     | 2.50GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  51%|█████     | 2.48GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  50%|█████     | 2.46GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  51%|█████     | 2.47GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  46%|████▌     | 2.25GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  31%|███       | 14.7GB / 48.0GB, 1.75GB/s  
+
+  ...st/step_002000/student.pt:  62%|██████▏   | 2.47GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  62%|██████▏   | 2.47GB / 3.96GB,  374MB/s  
+
+  ...o_test/step_001500/aux.pt:  63%|██████▎   | 2.49GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  63%|██████▎   | 2.49GB / 3.96GB,  290MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  66%|██████▌   | 2.61GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  53%|█████▎    | 2.58GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  52%|█████▏    | 2.56GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  52%|█████▏    | 2.54GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  52%|█████▏    | 2.54GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  48%|████▊     | 2.33GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  32%|███▏      | 15.2GB / 48.0GB, 1.76GB/s  
+
+  ...st/step_002000/student.pt:  64%|██████▍   | 2.55GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  64%|██████▍   | 2.55GB / 3.96GB,  375MB/s  
+
+  ...o_test/step_001500/aux.pt:  65%|██████▍   | 2.56GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  65%|██████▍   | 2.56GB / 3.96GB,  291MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  68%|██████▊   | 2.69GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  54%|█████▍    | 2.65GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  54%|█████▎    | 2.62GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  54%|█████▎    | 2.62GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  54%|█████▎    | 2.62GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  49%|████▉     | 2.41GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  33%|███▎      | 15.6GB / 48.0GB, 1.77GB/s  
+
+  ...st/step_002000/student.pt:  66%|██████▋   | 2.63GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  66%|██████▋   | 2.63GB / 3.96GB,  376MB/s  
+
+  ...o_test/step_001500/aux.pt:  67%|██████▋   | 2.64GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  67%|██████▋   | 2.64GB / 3.96GB,  294MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  70%|██████▉   | 2.77GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  56%|█████▌    | 2.73GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  55%|█████▌    | 2.70GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  55%|█████▌    | 2.70GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  55%|█████▌    | 2.70GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  51%|█████     | 2.49GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  34%|███▎      | 16.1GB / 48.0GB, 1.79GB/s  
+
+  ...st/step_002000/student.pt:  68%|██████▊   | 2.71GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  68%|██████▊   | 2.71GB / 3.96GB,  377MB/s  
+
+  ...o_test/step_001500/aux.pt:  69%|██████▉   | 2.73GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  69%|██████▉   | 2.73GB / 3.96GB,  297MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  72%|███████▏  | 2.85GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  57%|█████▋    | 2.81GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  57%|█████▋    | 2.78GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  57%|█████▋    | 2.78GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  57%|█████▋    | 2.78GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  53%|█████▎    | 2.57GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  35%|███▍      | 16.6GB / 48.0GB, 1.80GB/s  
+
+  ...st/step_002000/student.pt:  70%|███████   | 2.79GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  70%|███████   | 2.79GB / 3.96GB,  377MB/s  
+
+  ...o_test/step_001500/aux.pt:  71%|███████   | 2.81GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  71%|███████   | 2.81GB / 3.96GB,  299MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  74%|███████▍  | 2.93GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  59%|█████▉    | 2.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  59%|█████▊    | 2.86GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  59%|█████▊    | 2.86GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  59%|█████▊    | 2.86GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  54%|█████▍    | 2.64GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  36%|███▌      | 17.0GB / 48.0GB, 1.81GB/s  
+
+  ...st/step_002000/student.pt:  72%|███████▏  | 2.86GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  72%|███████▏  | 2.86GB / 3.96GB,  377MB/s  
+
+  ...o_test/step_001500/aux.pt:  73%|███████▎  | 2.89GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  73%|███████▎  | 2.89GB / 3.96GB,  301MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  76%|███████▌  | 3.01GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  61%|██████    | 2.97GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  60%|██████    | 2.94GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  60%|██████    | 2.94GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  60%|██████    | 2.94GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  56%|█████▌    | 2.72GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  37%|███▋      | 17.5GB / 48.0GB, 1.82GB/s  
+
+  ...st/step_002000/student.pt:  74%|███████▍  | 2.94GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  74%|███████▍  | 2.94GB / 3.96GB,  377MB/s  
+
+  ...o_test/step_001500/aux.pt:  75%|███████▍  | 2.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  75%|███████▍  | 2.96GB / 3.96GB,  302MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  78%|███████▊  | 3.09GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  62%|██████▏   | 3.05GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  62%|██████▏   | 3.02GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  62%|██████▏   | 3.02GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  62%|██████▏   | 3.02GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  57%|█████▋    | 2.80GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  38%|███▊      | 18.0GB / 48.0GB, 1.84GB/s  
+
+  ...st/step_002000/student.pt:  76%|███████▋  | 3.02GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  76%|███████▋  | 3.02GB / 3.96GB,  378MB/s  
+
+  ...o_test/step_001500/aux.pt:  77%|███████▋  | 3.04GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  77%|███████▋  | 3.04GB / 3.96GB,  304MB/s  Successfully committed 100 at once. Increasing the limit for next batch.
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  80%|███████▉  | 3.17GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  64%|██████▍   | 3.13GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  63%|██████▎   | 3.10GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  63%|██████▎   | 3.10GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  63%|██████▎   | 3.10GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  59%|█████▉    | 2.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  39%|███▊      | 18.5GB / 48.0GB, 1.85GB/s  
+
+  ...st/step_002000/student.pt:  78%|███████▊  | 3.11GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  78%|███████▊  | 3.11GB / 3.96GB,  379MB/s  
+
+  ...o_test/step_001500/aux.pt:  79%|███████▉  | 3.13GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  79%|███████▉  | 3.13GB / 3.96GB,  307MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  82%|████████▏ | 3.26GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  66%|██████▌   | 3.22GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  65%|██████▌   | 3.19GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  65%|██████▌   | 3.18GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  65%|██████▌   | 3.18GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  61%|██████    | 2.97GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  40%|███▉      | 19.0GB / 48.0GB, 1.86GB/s  
+
+  ...st/step_002000/student.pt:  81%|████████  | 3.20GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  81%|████████  | 3.20GB / 3.96GB,  381MB/s  
+
+  ...o_test/step_001500/aux.pt:  81%|████████  | 3.22GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  81%|████████  | 3.22GB / 3.96GB,  316MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  84%|████████▍ | 3.34GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  67%|██████▋   | 3.30GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  67%|██████▋   | 3.28GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  67%|██████▋   | 3.27GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  67%|██████▋   | 3.27GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  62%|██████▏   | 3.06GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  41%|████      | 19.5GB / 48.0GB, 1.91GB/s  
+
+  ...st/step_002000/student.pt:  83%|████████▎ | 3.29GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  83%|████████▎ | 3.29GB / 3.96GB,  382MB/s  
+
+  ...o_test/step_001500/aux.pt:  83%|████████▎ | 3.31GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  83%|████████▎ | 3.31GB / 3.96GB,  324MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  87%|████████▋ | 3.43GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  69%|██████▉   | 3.38GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  69%|██████▊   | 3.36GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  69%|██████▊   | 3.35GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  69%|██████▊   | 3.36GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  64%|██████▍   | 3.14GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  42%|████▏     | 20.0GB / 48.0GB, 1.96GB/s  
+
+  ...st/step_002000/student.pt:  85%|████████▍ | 3.37GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  85%|████████▍ | 3.37GB / 3.96GB,  383MB/s  
+
+  ...o_test/step_001500/aux.pt:  85%|████████▌ | 3.39GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  85%|████████▌ | 3.39GB / 3.96GB,  332MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  89%|████████▊ | 3.51GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  71%|███████   | 3.46GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  70%|███████   | 3.44GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  70%|███████   | 3.43GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  70%|███████   | 3.44GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  66%|██████▌   | 3.22GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  43%|████▎     | 20.5GB / 48.0GB, 2.01GB/s  
+
+  ...st/step_002000/student.pt:  87%|████████▋ | 3.45GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  87%|████████▋ | 3.45GB / 3.96GB,  383MB/s  
+
+  ...o_test/step_001500/aux.pt:  87%|████████▋ | 3.47GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  87%|████████▋ | 3.47GB / 3.96GB,  340MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  91%|█████████ | 3.60GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  73%|███████▎  | 3.55GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  72%|███████▏  | 3.53GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  72%|███████▏  | 3.52GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  72%|███████▏  | 3.52GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  67%|██████▋   | 3.30GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  44%|████▍     | 21.0GB / 48.0GB, 2.06GB/s  
+
+  ...st/step_002000/student.pt:  89%|████████▉ | 3.53GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  89%|████████▉ | 3.53GB / 3.96GB,  383MB/s  
+
+  ...o_test/step_001500/aux.pt:  90%|████████▉ | 3.55GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  90%|████████▉ | 3.55GB / 3.96GB,  349MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  93%|█████████▎| 3.68GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  74%|███████▍  | 3.64GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  74%|███████▍  | 3.61GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  74%|███████▍  | 3.61GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  74%|███████▍  | 3.61GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  69%|██████▉   | 3.38GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  45%|████▍     | 21.5GB / 48.0GB, 2.11GB/s  
+
+  ...st/step_002000/student.pt:  91%|█████████ | 3.61GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  91%|█████████ | 3.61GB / 3.96GB,  385MB/s  
+
+  ...o_test/step_001500/aux.pt:  92%|█████████▏| 3.64GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  92%|█████████▏| 3.64GB / 3.96GB,  357MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  95%|█████████▌| 3.77GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  76%|███████▋  | 3.73GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  76%|███████▌  | 3.70GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  76%|███████▌  | 3.70GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  76%|███████▌  | 3.70GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  71%|███████   | 3.47GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  46%|████▌     | 22.1GB / 48.0GB, 2.16GB/s  
+
+  ...st/step_002000/student.pt:  93%|█████████▎| 3.70GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  93%|█████████▎| 3.70GB / 3.96GB,  386MB/s  
+
+  ...o_test/step_001500/aux.pt:  94%|█████████▍| 3.72GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  94%|█████████▍| 3.72GB / 3.96GB,  365MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  97%|█████████▋| 3.84GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  78%|███████▊  | 3.81GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  77%|███████▋  | 3.78GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  77%|███████▋  | 3.78GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  77%|███████▋  | 3.78GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  73%|███████▎  | 3.56GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  47%|████▋     | 22.5GB / 48.0GB, 2.21GB/s  
+
+  ...st/step_002000/student.pt:  95%|█████████▌| 3.77GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  95%|█████████▌| 3.77GB / 3.96GB,  385MB/s  
+
+  ...o_test/step_001500/aux.pt:  96%|█████████▌| 3.80GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  96%|█████████▌| 3.80GB / 3.96GB,  373MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors:  99%|█████████▉| 3.92GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  80%|███████▉  | 3.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  79%|███████▉  | 3.86GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  79%|███████▊  | 3.85GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  79%|███████▊  | 3.85GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  74%|███████▍  | 3.63GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (4 / 10)     :  48%|████▊     | 23.0GB / 48.0GB, 2.25GB/s  
+
+  ...st/step_002000/student.pt:  97%|█████████▋| 3.85GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  97%|█████████▋| 3.85GB / 3.96GB,  386MB/s  
+
+  ...o_test/step_001500/aux.pt:  98%|█████████▊| 3.88GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  98%|█████████▊| 3.88GB / 3.96GB,  381MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  81%|████████  | 3.96GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  81%|████████  | 3.94GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  80%|████████  | 3.93GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  80%|████████  | 3.93GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  76%|███████▌  | 3.71GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (5 / 10)     :  49%|████▉     | 23.4GB / 48.0GB, 2.30GB/s  
+
+  ...st/step_002000/student.pt:  99%|█████████▉| 3.93GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  99%|█████████▉| 3.93GB / 3.96GB,  385MB/s  
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  387MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A
+
+
+
+
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  82%|████████▏ | 4.03GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  82%|████████▏ | 4.02GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  82%|████████▏ | 4.01GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  82%|████████▏ | 4.00GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 2 files]                 :  26%|██▌       | 3.85GB / 14.7GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (5 / 12)     :  50%|████▉     | 23.9GB / 48.0GB, 2.34GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  387MB/s  
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A
+
+
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  84%|████████▍ | 4.10GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  84%|████████▍ | 4.10GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  83%|████████▎ | 4.08GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  83%|████████▎ | 4.08GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  79%|███████▉  | 3.86GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:   2%|▏         |  120MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 1 files]                 :   2%|▏         |  160MB / 9.78GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (5 / 13)     :  51%|█████     | 24.5GB / 48.0GB, 2.40GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A
+
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  86%|████████▌ | 4.18GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  85%|████████▌ | 4.18GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  85%|████████▍ | 4.15GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  85%|████████▍ | 4.15GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  80%|████████  | 3.94GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:   4%|▍         |  200MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:   3%|▎         |  168MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:   3%|▎         |  144MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (5 / 13)     :  52%|█████▏    | 25.1GB / 48.0GB, 2.43GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A
+
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  87%|████████▋ | 4.26GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  87%|████████▋ | 4.25GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  87%|████████▋ | 4.23GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  87%|████████▋ | 4.23GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  82%|████████▏ | 4.02GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:   5%|▌         |  264MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:   5%|▍         |  232MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:   4%|▍         |  216MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (5 / 13)     :  53%|█████▎    | 25.7GB / 48.0GB, 2.44GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A
+
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  89%|████████▊ | 4.34GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  89%|████████▊ | 4.34GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  88%|████████▊ | 4.31GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  88%|████████▊ | 4.31GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  84%|████████▍ | 4.10GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:   7%|▋         |  352MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:   6%|▋         |  312MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:   6%|▌         |  296MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (5 / 13)     :  55%|█████▍    | 26.3GB / 48.0GB, 2.46GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A
+
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  90%|█████████ | 4.42GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  90%|█████████ | 4.41GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  90%|████████▉ | 4.39GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  90%|████████▉ | 4.38GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  85%|████████▌ | 4.18GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:   9%|▊         |  424MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:   8%|▊         |  384MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:   8%|▊         |  376MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (5 / 13)     :  56%|█████▌    | 26.9GB / 48.0GB, 2.47GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A
+
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  92%|█████████▏| 4.50GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  92%|█████████▏| 4.49GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  91%|█████████▏| 4.47GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  91%|█████████▏| 4.46GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  87%|████████▋ | 4.26GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  10%|█         |  504MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  10%|▉         |  472MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:   9%|▉         |  456MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (5 / 13)     :  57%|█████▋    | 27.6GB / 48.0GB, 2.49GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A
+
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  94%|█████████▍| 4.59GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  94%|█████████▎| 4.58GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  93%|█████████▎| 4.56GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  93%|█████████▎| 4.55GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  89%|████████▉ | 4.34GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  12%|█▏        |  592MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  11%|█▏        |  560MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  11%|█         |  544MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (5 / 13)     :  59%|█████▉    | 28.3GB / 48.0GB, 2.52GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A
+
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  96%|█████████▌| 4.67GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  96%|█████████▌| 4.67GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  95%|█████████▌| 4.66GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  95%|█████████▌| 4.65GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  91%|█████████ | 4.44GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  14%|█▍        |  680MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  13%|█▎        |  648MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  13%|█▎        |  624MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (5 / 13)     :  60%|██████    | 29.0GB / 48.0GB, 2.54GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A
+
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  97%|█████████▋| 4.76GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  97%|█████████▋| 4.76GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  97%|█████████▋| 4.74GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  97%|█████████▋| 4.73GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  92%|█████████▏| 4.52GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  16%|█▌        |  760MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  15%|█▌        |  736MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  15%|█▍        |  712MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (5 / 13)     :  62%|██████▏   | 29.7GB / 48.0GB, 2.56GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  324MB/s  
+New Data Upload               :   4%|▎         |  601kB / 17.1MB, 59.0kB/s  [A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A
+
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  99%|█████████▉| 4.85GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  99%|█████████▉| 4.85GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  99%|█████████▉| 4.83GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  98%|█████████▊| 4.82GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  94%|█████████▍| 4.61GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  17%|█▋        |  848MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  17%|█▋        |  824MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  16%|█▋        |  800MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (5 / 13)     :  63%|██████▎   | 30.4GB / 48.0GB, 2.58GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A
+
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  96%|█████████▌| 4.70GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  19%|█▉        |  936MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  18%|█▊        |  904MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  18%|█▊        |  880MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 13)     :  64%|██████▍   | 30.9GB / 48.0GB, 2.59GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:   1%|          | 24.0MB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:   1%|          | 24.0MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   1%|          | 24.0MB / 3.96GB, 2.35MB/s  
+
+  ...st/step_001500/student.pt:   0%|          | 7.98MB / 3.96GB            [A[A
+
+  ...st/step_001500/student.pt:   0%|          | 7.98MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   0%|          | 7.98MB / 3.96GB,  782kB/s  
+
+  ...o_test/step_001000/aux.pt:   0%|          | 7.95MB / 3.96GB            [A[A
+
+  ...o_test/step_001000/aux.pt:   0%|          | 7.95MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   0%|          | 7.95MB / 3.96GB,  779kB/s  
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A
+
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  98%|█████████▊| 4.78GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  21%|██        | 1.02GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  20%|█▉        |  976MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 1 files]                 :  10%|█         | 1.02GB / 9.78GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 14)     :  65%|██████▌   | 31.3GB / 48.0GB, 2.58GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  301MB/s  
+New Data Upload               :   7%|▋         | 1.20MB / 17.1MB,  118kB/s  [A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:   3%|▎         |  104MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   3%|▎         |  104MB / 3.96GB, 10.2MB/s  
+
+  ...st/step_001500/student.pt:   2%|▏         | 95.9MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   2%|▏         | 95.9MB / 3.96GB, 9.40MB/s  
+
+  ...o_test/step_001000/aux.pt:   2%|▏         | 80.0MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   2%|▏         | 80.0MB / 3.96GB, 7.84MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv:  99%|█████████▉| 4.86GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  22%|██▏       | 1.09GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  21%|██▏       | 1.05GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  21%|██        | 1.03GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:   3%|▎         |  144MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 14)     :  66%|██████▌   | 31.7GB / 48.0GB, 2.57GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  293MB/s  
+New Data Upload               :  11%|█         | 1.80MB / 17.1MB,  177kB/s  [A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:   4%|▍         |  176MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   4%|▍         |  176MB / 3.96GB, 17.2MB/s  
+
+  ...st/step_001500/student.pt:   4%|▍         |  168MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   4%|▍         |  168MB / 3.96GB, 16.5MB/s  
+
+  ...o_test/step_001000/aux.pt:   4%|▍         |  152MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   4%|▍         |  152MB / 3.96GB, 14.9MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  24%|██▍       | 1.17GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  23%|██▎       | 1.13GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  23%|██▎       | 1.11GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:   5%|▍         |  224MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (10 / 14)    :  67%|██████▋   | 32.0GB / 48.0GB, 2.56GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  285MB/s  
+New Data Upload               :  21%|██        | 3.61MB / 17.1MB,  354kB/s  [A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:   6%|▌         |  244MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   6%|▌         |  244MB / 3.96GB, 24.0MB/s  
+
+  ...st/step_001500/student.pt:   6%|▋         |  248MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   6%|▋         |  248MB / 3.96GB, 24.3MB/s  
+
+  ...o_test/step_001000/aux.pt:   6%|▌         |  223MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   6%|▌         |  223MB / 3.96GB, 21.8MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  25%|██▌       | 1.24GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  25%|██▍       | 1.20GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  24%|██▍       | 1.18GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:   6%|▌         |  304MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (10 / 14)    :  67%|██████▋   | 32.3GB / 48.0GB, 2.53GB/s  
+
+  ...o_test/step_000500/aux.pt:   1%|▏         | 56.0MB / 3.96GB            [A[A
+
+  ...o_test/step_000500/aux.pt:   1%|▏         | 56.0MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   1%|▏         | 56.0MB / 3.96GB, 5.49MB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  277MB/s  
+New Data Upload               :  35%|███▌      | 6.01MB / 17.1MB,  590kB/s  [A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:   8%|▊         |  323MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   8%|▊         |  323MB / 3.96GB, 31.7MB/s  
+
+  ...st/step_001500/student.pt:   8%|▊         |  328MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   8%|▊         |  328MB / 3.96GB, 32.2MB/s  
+
+  ...o_test/step_001000/aux.pt:   8%|▊         |  299MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   8%|▊         |  299MB / 3.96GB, 29.3MB/s  
+
+  ...o_test/step_000500/aux.pt:   3%|▎         |  128MB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  27%|██▋       | 1.30GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  26%|██▌       | 1.27GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  26%|██▌       | 1.26GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:   8%|▊         |  376MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :   3%|▎         |  128MB / 3.96GB, 12.5MB/s  Processing Files (10 / 14)    :  68%|██████▊   | 32.6GB / 48.0GB, 2.51GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:  10%|▉         |  387MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  10%|▉         |  387MB / 3.96GB, 37.9MB/s  
+
+  ...st/step_001500/student.pt:  10%|▉         |  392MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  10%|▉         |  392MB / 3.96GB, 38.4MB/s  
+
+  ...o_test/step_001000/aux.pt:   9%|▉         |  367MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   9%|▉         |  367MB / 3.96GB, 35.9MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  28%|██▊       | 1.38GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  27%|██▋       | 1.34GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  27%|██▋       | 1.33GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:   9%|▉         |  440MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:   5%|▌         |  199MB / 3.96GB            [A[AProcessing Files (10 / 14)    :  69%|██████▊   | 32.9GB / 48.0GB, 2.49GB/s  Processing Files (0 / 1)      :   5%|▌         |  199MB / 3.96GB, 19.6MB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  261MB/s  
+New Data Upload               :  49%|████▉     | 8.42MB / 17.1MB,  825kB/s  [A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:  11%|█▏        |  451MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  11%|█▏        |  451MB / 3.96GB, 44.2MB/s  
+
+  ...st/step_001500/student.pt:  12%|█▏        |  464MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  12%|█▏        |  464MB / 3.96GB, 45.5MB/s  
+
+  ...o_test/step_001000/aux.pt:  11%|█         |  427MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  11%|█         |  427MB / 3.96GB, 41.9MB/s  
+
+  ...o_test/step_000500/aux.pt:   7%|▋         |  260MB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  30%|██▉       | 1.45GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  29%|██▉       | 1.41GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  28%|██▊       | 1.39GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  10%|█         |  512MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :   7%|▋         |  260MB / 3.96GB, 25.5MB/s  Processing Files (10 / 14)    :  69%|██████▉   | 33.2GB / 48.0GB, 2.46GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  253MB/s  
+New Data Upload               :  74%|███████▍  | 12.6MB / 17.1MB, 1.24MB/s  [A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  251MB/s  
+New Data Upload               :   5%|▍         |  601kB / 13.0MB, 59.0kB/s  [A
+
+  ...o_test/step_002000/aux.pt:  13%|█▎        |  531MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  13%|█▎        |  531MB / 3.96GB, 52.1MB/s  
+
+  ...st/step_001500/student.pt:  14%|█▎        |  536MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  14%|█▎        |  536MB / 3.96GB, 52.5MB/s  
+
+  ...o_test/step_001000/aux.pt:  13%|█▎        |  507MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  13%|█▎        |  507MB / 3.96GB, 49.7MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  31%|███       | 1.51GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  30%|███       | 1.48GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  30%|██▉       | 1.46GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  12%|█▏        |  584MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:   8%|▊         |  330MB / 3.96GB            [A[AProcessing Files (10 / 14)    :  70%|██████▉   | 33.5GB / 48.0GB, 2.45GB/s  Processing Files (0 / 1)      :   8%|▊         |  330MB / 3.96GB, 32.4MB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  246MB/s  
+New Data Upload               :  99%|█████████▊| 16.8MB / 17.1MB, 1.65MB/s  [A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:  15%|█▌        |  603MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  15%|█▌        |  603MB / 3.96GB, 59.1MB/s  
+
+  ...st/step_001500/student.pt:  16%|█▌        |  616MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  16%|█▌        |  616MB / 3.96GB, 60.4MB/s  
+
+  ...o_test/step_001000/aux.pt:  15%|█▍        |  579MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  15%|█▍        |  579MB / 3.96GB, 56.8MB/s  
+
+  ...o_test/step_000500/aux.pt:  10%|█         |  403MB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  32%|███▏      | 1.58GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  32%|███▏      | 1.55GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  32%|███▏      | 1.54GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  14%|█▎        |  664MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  10%|█         |  403MB / 3.96GB, 39.5MB/s  Processing Files (10 / 14)    :  70%|███████   | 33.8GB / 48.0GB, 2.43GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:  17%|█▋        |  675MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  17%|█▋        |  675MB / 3.96GB, 66.2MB/s  
+
+  ...st/step_001500/student.pt:  17%|█▋        |  688MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  17%|█▋        |  688MB / 3.96GB, 67.4MB/s  
+
+  ...o_test/step_001000/aux.pt:  16%|█▋        |  651MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  16%|█▋        |  651MB / 3.96GB, 63.8MB/s  
+
+  ...o_test/step_000500/aux.pt:  12%|█▏        |  483MB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  34%|███▍      | 1.66GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  34%|███▎      | 1.64GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  33%|███▎      | 1.62GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  15%|█▌        |  736MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  12%|█▏        |  483MB / 3.96GB, 47.4MB/s  Processing Files (10 / 14)    :  71%|███████   | 34.1GB / 48.0GB, 2.41GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[ASuccessfully committed 125 at once. Increasing the limit for next batch.
+
+
+  ...o_test/step_002000/aux.pt:  19%|█▉        |  747MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  19%|█▉        |  747MB / 3.96GB, 73.2MB/s  
+
+  ...st/step_001500/student.pt:  19%|█▉        |  760MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  19%|█▉        |  760MB / 3.96GB, 74.5MB/s  
+
+  ...o_test/step_001000/aux.pt:  18%|█▊        |  731MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  18%|█▊        |  731MB / 3.96GB, 71.7MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  36%|███▌      | 1.74GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  35%|███▌      | 1.71GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  35%|███▍      | 1.70GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  17%|█▋        |  816MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  14%|█▍        |  563MB / 3.96GB            [A[AProcessing Files (10 / 14)    :  72%|███████▏  | 34.4GB / 48.0GB, 2.40GB/s  Processing Files (0 / 1)      :  14%|█▍        |  563MB / 3.96GB, 55.2MB/s  
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB,  223MB/s  
+New Data Upload               : 100%|██████████| 17.1MB / 17.1MB, 1.67MB/s  [A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  220MB/s  
+New Data Upload               :   9%|▉         | 1.20MB / 13.0MB,  118kB/s  [A
+
+  ...o_test/step_002000/aux.pt:  21%|██        |  827MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  21%|██        |  827MB / 3.96GB, 81.1MB/s  
+
+  ...st/step_001500/student.pt:  21%|██        |  840MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  21%|██        |  840MB / 3.96GB, 82.4MB/s  
+
+  ...o_test/step_001000/aux.pt:  20%|██        |  803MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  20%|██        |  803MB / 3.96GB, 78.7MB/s  
+
+  ...o_test/step_000500/aux.pt:  16%|█▌        |  627MB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  37%|███▋      | 1.82GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  37%|███▋      | 1.79GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  36%|███▋      | 1.78GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  18%|█▊        |  896MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  16%|█▌        |  627MB / 3.96GB, 61.5MB/s  Processing Files (10 / 14)    :  72%|███████▏  | 34.7GB / 48.0GB, 2.39GB/s  
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  212MB/s  
+New Data Upload               :  19%|█▊        | 2.41MB / 13.0MB,  236kB/s  [A
+
+  ...o_test/step_002000/aux.pt:  23%|██▎       |  899MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  23%|██▎       |  899MB / 3.96GB, 88.1MB/s  
+
+  ...st/step_001500/student.pt:  23%|██▎       |  920MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  23%|██▎       |  920MB / 3.96GB, 90.2MB/s  
+
+  ...o_test/step_001000/aux.pt:  22%|██▏       |  867MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  22%|██▏       |  867MB / 3.96GB, 85.0MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  39%|███▊      | 1.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  38%|███▊      | 1.86GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  38%|███▊      | 1.85GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  20%|█▉        |  976MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  18%|█▊        |  707MB / 3.96GB            [A[AProcessing Files (10 / 14)    :  73%|███████▎  | 35.0GB / 48.0GB, 2.37GB/s  Processing Files (0 / 1)      :  18%|█▊        |  707MB / 3.96GB, 69.3MB/s  
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  205MB/s  
+New Data Upload               :  28%|██▊       | 3.61MB / 13.0MB,  354kB/s  [AProcessing Files (0 / 0)      : |          |  0.00B /  0.00B            
+New Data Upload               : |          |  0.00B /  0.00B            [A
+
+  ...o_test/step_002000/aux.pt:  25%|██▍       |  979MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  25%|██▍       |  979MB / 3.96GB, 96.0MB/s  
+
+  ...st/step_001500/student.pt:  25%|██▌       |  992MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  25%|██▌       |  992MB / 3.96GB, 97.3MB/s  
+
+  ...o_test/step_001000/aux.pt:  24%|██▍       |  947MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  24%|██▍       |  947MB / 3.96GB, 92.8MB/s  
+
+  ...o_test/step_000500/aux.pt:  20%|██        |  795MB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  40%|████      | 1.97GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  40%|███▉      | 1.95GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  39%|███▉      | 1.93GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  22%|██▏       | 1.06GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  20%|██        |  795MB / 3.96GB, 77.9MB/s  Processing Files (10 / 14)    :  74%|███████▎  | 35.3GB / 48.0GB, 2.36GB/s  
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  198MB/s  
+New Data Upload               :  46%|████▋     | 6.01MB / 13.0MB,  590kB/s  [A
+
+  ...o_test/step_002000/aux.pt:  27%|██▋       | 1.06GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  27%|██▋       | 1.06GB / 3.96GB,  104MB/s  
+
+  ...st/step_001500/student.pt:  27%|██▋       | 1.07GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  27%|██▋       | 1.07GB / 3.96GB,  105MB/s  
+
+  ...o_test/step_001000/aux.pt:  26%|██▌       | 1.03GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  26%|██▌       | 1.03GB / 3.96GB,  101MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  42%|████▏     | 2.06GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  42%|████▏     | 2.04GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  41%|████▏     | 2.02GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  23%|██▎       | 1.14GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  22%|██▏       |  875MB / 3.96GB            [A[AProcessing Files (10 / 14)    :  74%|███████▍  | 35.7GB / 48.0GB, 2.34GB/s  Processing Files (0 / 1)      :  22%|██▏       |  875MB / 3.96GB, 85.8MB/s  
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  189MB/s  
+New Data Upload               :  65%|██████▍   | 8.42MB / 13.0MB,  825kB/s  [A
+
+  ...o_test/step_002000/aux.pt:  29%|██▉       | 1.15GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  29%|██▉       | 1.15GB / 3.96GB,  112MB/s  
+
+  ...st/step_001500/student.pt:  29%|██▉       | 1.17GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  29%|██▉       | 1.17GB / 3.96GB,  115MB/s  
+
+  ...o_test/step_001000/aux.pt:  28%|██▊       | 1.12GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  28%|██▊       | 1.12GB / 3.96GB,  110MB/s  
+
+  ...o_test/step_000500/aux.pt:  24%|██▍       |  955MB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  44%|████▍     | 2.14GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  43%|████▎     | 2.12GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  43%|████▎     | 2.11GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  25%|██▌       | 1.22GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  24%|██▍       |  955MB / 3.96GB, 93.6MB/s  Processing Files (10 / 14)    :  75%|███████▌  | 36.0GB / 48.0GB, 2.32GB/s  
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:  31%|███       | 1.23GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  31%|███       | 1.23GB / 3.96GB,  120MB/s  
+
+  ...st/step_001500/student.pt:  32%|███▏      | 1.26GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  32%|███▏      | 1.26GB / 3.96GB,  123MB/s  
+
+  ...o_test/step_001000/aux.pt:  30%|███       | 1.19GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  30%|███       | 1.19GB / 3.96GB,  117MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  45%|████▌     | 2.22GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  45%|████▍     | 2.20GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  45%|████▍     | 2.18GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  27%|██▋       | 1.30GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  26%|██▌       | 1.03GB / 3.96GB            [A[AProcessing Files (10 / 14)    :  76%|███████▌  | 36.3GB / 48.0GB, 2.31GB/s  Processing Files (0 / 1)      :  26%|██▌       | 1.03GB / 3.96GB,  101MB/s  
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  174MB/s  
+New Data Upload               :  93%|█████████▎| 12.0MB / 13.0MB, 1.18MB/s  [A
+
+  ...o_test/step_002000/aux.pt:  33%|███▎      | 1.31GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  33%|███▎      | 1.31GB / 3.96GB,  128MB/s  
+
+  ...st/step_001500/student.pt:  34%|███▎      | 1.34GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  34%|███▎      | 1.34GB / 3.96GB,  131MB/s  
+
+  ...o_test/step_001000/aux.pt:  32%|███▏      | 1.28GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  32%|███▏      | 1.28GB / 3.96GB,  125MB/s  
+
+  ...o_test/step_000500/aux.pt:  28%|██▊       | 1.11GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  47%|████▋     | 2.30GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  47%|████▋     | 2.29GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  46%|████▋     | 2.26GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  28%|██▊       | 1.39GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  28%|██▊       | 1.11GB / 3.96GB,  109MB/s  Processing Files (10 / 14)    :  76%|███████▋  | 36.7GB / 48.0GB, 2.29GB/s  
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  166MB/s  
+New Data Upload               :  97%|█████████▋| 12.6MB / 13.0MB, 1.24MB/s  [A
+
+  ...o_test/step_002000/aux.pt:  35%|███▍      | 1.39GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  35%|███▍      | 1.39GB / 3.96GB,  136MB/s  
+
+  ...st/step_001500/student.pt:  36%|███▌      | 1.42GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  36%|███▌      | 1.42GB / 3.96GB,  139MB/s  
+
+  ...o_test/step_001000/aux.pt:  34%|███▍      | 1.36GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  34%|███▍      | 1.36GB / 3.96GB,  133MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  49%|████▉     | 2.38GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  48%|████▊     | 2.37GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  48%|████▊     | 2.34GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  30%|███       | 1.47GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  30%|███       | 1.19GB / 3.96GB            [A[AProcessing Files (10 / 14)    :  77%|███████▋  | 37.0GB / 48.0GB, 2.28GB/s  Processing Files (0 / 1)      :  30%|███       | 1.19GB / 3.96GB,  117MB/s  
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:  37%|███▋      | 1.47GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  37%|███▋      | 1.47GB / 3.96GB,  144MB/s  
+
+  ...st/step_001500/student.pt:  38%|███▊      | 1.50GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  38%|███▊      | 1.50GB / 3.96GB,  147MB/s  
+
+  ...o_test/step_001000/aux.pt:  36%|███▌      | 1.43GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  36%|███▌      | 1.43GB / 3.96GB,  141MB/s  
+
+  ...o_test/step_000500/aux.pt:  32%|███▏      | 1.27GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  50%|█████     | 2.46GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  50%|█████     | 2.45GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  50%|████▉     | 2.42GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  32%|███▏      | 1.55GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  32%|███▏      | 1.27GB / 3.96GB,  124MB/s  Processing Files (10 / 14)    :  78%|███████▊  | 37.3GB / 48.0GB, 2.26GB/s  
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB,  152MB/s  
+New Data Upload               : 100%|██████████| 13.0MB / 13.0MB, 1.27MB/s  [A
+
+  ...o_test/step_002000/aux.pt:  39%|███▉      | 1.55GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  39%|███▉      | 1.55GB / 3.96GB,  152MB/s  
+
+  ...st/step_001500/student.pt:  40%|███▉      | 1.58GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  40%|███▉      | 1.58GB / 3.96GB,  155MB/s  
+
+  ...o_test/step_001000/aux.pt:  38%|███▊      | 1.51GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  38%|███▊      | 1.51GB / 3.96GB,  149MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  52%|█████▏    | 2.54GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  52%|█████▏    | 2.54GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  51%|█████▏    | 2.51GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  33%|███▎      | 1.63GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  34%|███▍      | 1.36GB / 3.96GB            [A[AProcessing Files (10 / 14)    :  78%|███████▊  | 37.6GB / 48.0GB, 2.25GB/s  Processing Files (0 / 1)      :  34%|███▍      | 1.36GB / 3.96GB,  133MB/s  
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:  41%|████      | 1.63GB / 3.96GB            [A[A
+
+  ...st/step_001500/student.pt:  42%|████▏     | 1.66GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  41%|████      | 1.63GB / 3.96GB,  160MB/s  Processing Files (0 / 1)      :  42%|████▏     | 1.66GB / 3.96GB,  162MB/s  
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001000/aux.pt:  40%|████      | 1.59GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  40%|████      | 1.59GB / 3.96GB,  156MB/s  
+
+  ...o_test/step_000500/aux.pt:  36%|███▌      | 1.43GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  54%|█████▎    | 2.62GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  53%|█████▎    | 2.61GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  53%|█████▎    | 2.58GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  35%|███▌      | 1.71GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  36%|███▌      | 1.43GB / 3.96GB,  141MB/s  Processing Files (10 / 14)    :  79%|███████▉  | 37.9GB / 48.0GB, 2.23GB/s  
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB,  133MB/s  
+New Data Upload               : 100%|██████████| 17.1MB / 17.1MB, 1.71MB/s  
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:  43%|████▎     | 1.70GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  43%|████▎     | 1.70GB / 3.96GB,  167MB/s  
+
+  ...st/step_001500/student.pt:  44%|████▍     | 1.74GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  44%|████▍     | 1.74GB / 3.96GB,  170MB/s  
+
+  ...o_test/step_001000/aux.pt:  42%|████▏     | 1.67GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  42%|████▏     | 1.67GB / 3.96GB,  163MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  55%|█████▌    | 2.70GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  55%|█████▍    | 2.69GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  54%|█████▍    | 2.66GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  37%|███▋      | 1.79GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  38%|███▊      | 1.51GB / 3.96GB            [A[AProcessing Files (10 / 14)    :  80%|███████▉  | 38.3GB / 48.0GB, 2.22GB/s  Processing Files (0 / 1)      :  38%|███▊      | 1.51GB / 3.96GB,  149MB/s  
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:  45%|████▍     | 1.78GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  45%|████▍     | 1.78GB / 3.96GB,  174MB/s  
+
+  ...st/step_001500/student.pt:  46%|████▌     | 1.81GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  46%|████▌     | 1.81GB / 3.96GB,  177MB/s  
+
+  ...o_test/step_001000/aux.pt:  44%|████▍     | 1.75GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  44%|████▍     | 1.75GB / 3.96GB,  171MB/s  
+
+  ...o_test/step_000500/aux.pt:  40%|████      | 1.59GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  57%|█████▋    | 2.78GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  57%|█████▋    | 2.77GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  56%|█████▌    | 2.74GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  38%|███▊      | 1.87GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  40%|████      | 1.59GB / 3.96GB,  156MB/s  Processing Files (10 / 14)    :  80%|████████  | 38.6GB / 48.0GB, 2.20GB/s  
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:  47%|████▋     | 1.86GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  47%|████▋     | 1.86GB / 3.96GB,  182MB/s  
+
+  ...st/step_001500/student.pt:  48%|████▊     | 1.90GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  48%|████▊     | 1.90GB / 3.96GB,  186MB/s  
+
+  ...o_test/step_001000/aux.pt:  46%|████▌     | 1.83GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  46%|████▌     | 1.83GB / 3.96GB,  179MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  59%|█████▊    | 2.86GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  58%|█████▊    | 2.85GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  58%|█████▊    | 2.82GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  40%|███▉      | 1.95GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  42%|████▏     | 1.68GB / 3.96GB            [A[AProcessing Files (10 / 14)    :  81%|████████  | 38.9GB / 48.0GB, 2.19GB/s  Processing Files (0 / 1)      :  42%|████▏     | 1.68GB / 3.96GB,  164MB/s  
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:  49%|████▉     | 1.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  49%|████▉     | 1.95GB / 3.96GB,  191MB/s  
+
+  ...st/step_001500/student.pt:  49%|████▉     | 1.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  49%|████▉     | 1.96GB / 3.96GB,  192MB/s  
+
+  ...o_test/step_001000/aux.pt:  48%|████▊     | 1.91GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  48%|████▊     | 1.91GB / 3.96GB,  187MB/s  
+
+  ...o_test/step_000500/aux.pt:  44%|████▍     | 1.76GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  60%|██████    | 2.94GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  60%|█████▉    | 2.93GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  59%|█████▉    | 2.90GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  42%|████▏     | 2.03GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  44%|████▍     | 1.76GB / 3.96GB,  172MB/s  Processing Files (10 / 14)    :  82%|████████▏ | 39.2GB / 48.0GB, 2.17GB/s  
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:  51%|█████     | 2.02GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  51%|█████     | 2.02GB / 3.96GB,  198MB/s  
+
+  ...st/step_001500/student.pt:  51%|█████▏    | 2.04GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  51%|█████▏    | 2.04GB / 3.96GB,  200MB/s  
+
+  ...o_test/step_001000/aux.pt:  50%|█████     | 1.99GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  50%|█████     | 1.99GB / 3.96GB,  195MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  62%|██████▏   | 3.02GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  62%|██████▏   | 3.01GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  61%|██████    | 2.97GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  43%|████▎     | 2.11GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  46%|████▋     | 1.83GB / 3.96GB            [A[AProcessing Files (10 / 14)    :  82%|████████▏ | 39.5GB / 48.0GB, 2.16GB/s  Processing Files (0 / 1)      :  46%|████▋     | 1.83GB / 3.96GB,  180MB/s  
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:  53%|█████▎    | 2.10GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  53%|█████▎    | 2.10GB / 3.96GB,  206MB/s  
+
+  ...st/step_001500/student.pt:  54%|█████▎    | 2.13GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  54%|█████▎    | 2.13GB / 3.96GB,  209MB/s  
+
+  ...o_test/step_001000/aux.pt:  52%|█████▏    | 2.07GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  52%|█████▏    | 2.07GB / 3.96GB,  203MB/s  
+
+  ...o_test/step_000500/aux.pt:  49%|████▊     | 1.92GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  63%|██████▎   | 3.10GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  63%|██████▎   | 3.10GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  62%|██████▏   | 3.05GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  45%|████▍     | 2.19GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  49%|████▊     | 1.92GB / 3.96GB,  189MB/s  Processing Files (10 / 14)    :  83%|████████▎ | 39.9GB / 48.0GB, 2.14GB/s  
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:  55%|█████▌    | 2.19GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  55%|█████▌    | 2.19GB / 3.96GB,  214MB/s  
+
+  ...st/step_001500/student.pt:  55%|█████▌    | 2.20GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  55%|█████▌    | 2.20GB / 3.96GB,  216MB/s  
+
+  ...o_test/step_001000/aux.pt:  54%|█████▍    | 2.16GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  54%|█████▍    | 2.16GB / 3.96GB,  211MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  65%|██████▌   | 3.18GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  65%|██████▍   | 3.18GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  64%|██████▍   | 3.13GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  46%|████▋     | 2.27GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  51%|█████     | 2.00GB / 3.96GB            [A[AProcessing Files (10 / 14)    :  84%|████████▎ | 40.2GB / 48.0GB, 2.12GB/s  Processing Files (0 / 1)      :  51%|█████     | 2.00GB / 3.96GB,  196MB/s  
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:  57%|█████▋    | 2.26GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  57%|█████▋    | 2.26GB / 3.96GB,  221MB/s  
+
+  ...st/step_001500/student.pt:  58%|█████▊    | 2.28GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  58%|█████▊    | 2.28GB / 3.96GB,  224MB/s  
+
+  ...o_test/step_001000/aux.pt:  56%|█████▌    | 2.23GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  56%|█████▌    | 2.23GB / 3.96GB,  218MB/s  
+
+  ...o_test/step_000500/aux.pt:  52%|█████▏    | 2.07GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  67%|██████▋   | 3.26GB / 4.89GB            [A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  52%|█████▏    | 2.07GB / 3.96GB,  203MB/s  
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  66%|██████▋   | 3.25GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  65%|██████▌   | 3.20GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  48%|████▊     | 2.34GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (10 / 14)    :  84%|████████▍ | 40.5GB / 48.0GB, 2.10GB/s  
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_002000/aux.pt:  59%|█████▉    | 2.33GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  59%|█████▉    | 2.33GB / 3.96GB,  229MB/s  
+
+  ...st/step_001500/student.pt:  60%|█████▉    | 2.36GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  60%|█████▉    | 2.36GB / 3.96GB,  231MB/s  
+
+  ...o_test/step_001000/aux.pt:  58%|█████▊    | 2.31GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  58%|█████▊    | 2.31GB / 3.96GB,  226MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_000500/aux.pt:  54%|█████▍    | 2.16GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  68%|██████▊   | 3.34GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  68%|██████▊   | 3.33GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  67%|██████▋   | 3.28GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  50%|████▉     | 2.42GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  54%|█████▍    | 2.16GB / 3.96GB,  211MB/s  Processing Files (10 / 14)    :  85%|████████▌ | 40.8GB / 48.0GB, 2.08GB/s  
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB, 57.8MB/s  
+New Data Upload               : 100%|██████████| 13.0MB / 13.0MB, 1.30MB/s  
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            
+
+
+  ...o_test/step_002000/aux.pt:  61%|██████    | 2.41GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  61%|██████    | 2.41GB / 3.96GB,  236MB/s  
+
+  ...st/step_001500/student.pt:  61%|██████▏   | 2.43GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  61%|██████▏   | 2.43GB / 3.96GB,  238MB/s  
+
+  ...o_test/step_001000/aux.pt:  60%|██████    | 2.38GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  60%|██████    | 2.38GB / 3.96GB,  233MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  70%|██████▉   | 3.42GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  70%|██████▉   | 3.41GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  69%|██████▊   | 3.36GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  51%|█████     | 2.50GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  56%|█████▌    | 2.23GB / 3.96GB            [A[AProcessing Files (10 / 14)    :  86%|████████▌ | 41.1GB / 48.0GB, 2.07GB/s  Processing Files (0 / 1)      :  56%|█████▌    | 2.23GB / 3.96GB,  218MB/s  
+
+  ...o_test/step_002000/aux.pt:  63%|██████▎   | 2.49GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  63%|██████▎   | 2.49GB / 3.96GB,  244MB/s  
+
+  ...st/step_001500/student.pt:  63%|██████▎   | 2.51GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  63%|██████▎   | 2.51GB / 3.96GB,  246MB/s  
+
+  ...o_test/step_001000/aux.pt:  62%|██████▏   | 2.45GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  62%|██████▏   | 2.45GB / 3.96GB,  240MB/s  
+
+  ...o_test/step_000500/aux.pt:  58%|█████▊    | 2.31GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  71%|███████▏  | 3.49GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  71%|███████   | 3.48GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  70%|███████   | 3.44GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  53%|█████▎    | 2.58GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  58%|█████▊    | 2.31GB / 3.96GB,  226MB/s  Processing Files (10 / 14)    :  86%|████████▋ | 41.4GB / 48.0GB, 2.05GB/s  
+
+  ...o_test/step_002000/aux.pt:  65%|██████▍   | 2.57GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  65%|██████▍   | 2.57GB / 3.96GB,  252MB/s  
+
+  ...st/step_001500/student.pt:  65%|██████▌   | 2.59GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  65%|██████▌   | 2.59GB / 3.96GB,  254MB/s  
+
+  ...o_test/step_001000/aux.pt:  64%|██████▍   | 2.53GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  64%|██████▍   | 2.53GB / 3.96GB,  248MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  73%|███████▎  | 3.57GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  73%|███████▎  | 3.56GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  72%|███████▏  | 3.52GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  54%|█████▍    | 2.66GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  60%|██████    | 2.39GB / 3.96GB            [A[AProcessing Files (10 / 14)    :  87%|████████▋ | 41.7GB / 48.0GB, 2.03GB/s  Processing Files (0 / 1)      :  60%|██████    | 2.39GB / 3.96GB,  234MB/s  
+
+  ...o_test/step_002000/aux.pt:  67%|██████▋   | 2.65GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  67%|██████▋   | 2.65GB / 3.96GB,  260MB/s  
+
+  ...st/step_001500/student.pt:  67%|██████▋   | 2.66GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  67%|██████▋   | 2.66GB / 3.96GB,  261MB/s  
+
+  ...o_test/step_001000/aux.pt:  66%|██████▌   | 2.60GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  66%|██████▌   | 2.60GB / 3.96GB,  255MB/s  
+
+  ...o_test/step_000500/aux.pt:  62%|██████▏   | 2.47GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  75%|███████▍  | 3.65GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  75%|███████▍  | 3.65GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  74%|███████▍  | 3.61GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  56%|█████▌    | 2.73GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  62%|██████▏   | 2.47GB / 3.96GB,  242MB/s  Processing Files (10 / 14)    :  88%|████████▊ | 42.0GB / 48.0GB, 2.01GB/s  
+
+  ...o_test/step_002000/aux.pt:  69%|██████▉   | 2.73GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  69%|██████▉   | 2.73GB / 3.96GB,  268MB/s  
+
+  ...st/step_001500/student.pt:  69%|██████▉   | 2.74GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  69%|██████▉   | 2.74GB / 3.96GB,  269MB/s  
+
+  ...o_test/step_001000/aux.pt:  68%|██████▊   | 2.68GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  68%|██████▊   | 2.68GB / 3.96GB,  263MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  76%|███████▌  | 3.73GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  76%|███████▌  | 3.73GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  75%|███████▌  | 3.68GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  57%|█████▋    | 2.81GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  64%|██████▍   | 2.55GB / 3.96GB            [A[AProcessing Files (10 / 14)    :  88%|████████▊ | 42.4GB / 48.0GB, 1.99GB/s  Processing Files (0 / 1)      :  64%|██████▍   | 2.55GB / 3.96GB,  250MB/s  
+
+  ...o_test/step_002000/aux.pt:  71%|███████   | 2.80GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  71%|███████   | 2.80GB / 3.96GB,  275MB/s  
+
+  ...st/step_001500/student.pt:  71%|███████   | 2.82GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  71%|███████   | 2.82GB / 3.96GB,  276MB/s  
+
+  ...o_test/step_001000/aux.pt:  69%|██████▉   | 2.75GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  69%|██████▉   | 2.75GB / 3.96GB,  270MB/s  
+
+  ...o_test/step_000500/aux.pt:  66%|██████▌   | 2.62GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  78%|███████▊  | 3.82GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  78%|███████▊  | 3.80GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  77%|███████▋  | 3.76GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  59%|█████▉    | 2.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  66%|██████▌   | 2.62GB / 3.96GB,  257MB/s  Processing Files (10 / 14)    :  89%|████████▉ | 42.7GB / 48.0GB, 1.97GB/s  
+
+  ...o_test/step_002000/aux.pt:  73%|███████▎  | 2.88GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  73%|███████▎  | 2.88GB / 3.96GB,  283MB/s  
+
+  ...st/step_001500/student.pt:  73%|███████▎  | 2.89GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  73%|███████▎  | 2.89GB / 3.96GB,  283MB/s  
+
+  ...o_test/step_001000/aux.pt:  72%|███████▏  | 2.84GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  72%|███████▏  | 2.84GB / 3.96GB,  279MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  80%|███████▉  | 3.90GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  79%|███████▉  | 3.87GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  79%|███████▊  | 3.84GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  61%|██████    | 2.97GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  68%|██████▊   | 2.70GB / 3.96GB            [A[AProcessing Files (10 / 14)    :  90%|████████▉ | 43.0GB / 48.0GB, 1.96GB/s  Processing Files (0 / 1)      :  68%|██████▊   | 2.70GB / 3.96GB,  265MB/s  
+
+  ...o_test/step_002000/aux.pt:  75%|███████▍  | 2.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  75%|███████▍  | 2.96GB / 3.96GB,  290MB/s  
+
+  ...st/step_001500/student.pt:  75%|███████▌  | 2.98GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  75%|███████▌  | 2.98GB / 3.96GB,  292MB/s  
+
+  ...o_test/step_001000/aux.pt:  74%|███████▎  | 2.91GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  74%|███████▎  | 2.91GB / 3.96GB,  286MB/s  
+
+  ...o_test/step_000500/aux.pt:  70%|███████   | 2.78GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  81%|████████▏ | 3.98GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  81%|████████  | 3.94GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  80%|████████  | 3.92GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  62%|██████▏   | 3.04GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  70%|███████   | 2.78GB / 3.96GB,  272MB/s  Processing Files (10 / 14)    :  90%|█████████ | 43.3GB / 48.0GB, 1.95GB/s  
+
+  ...o_test/step_002000/aux.pt:  77%|███████▋  | 3.04GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  77%|███████▋  | 3.04GB / 3.96GB,  298MB/s  
+
+  ...st/step_001500/student.pt:  77%|███████▋  | 3.06GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  77%|███████▋  | 3.06GB / 3.96GB,  300MB/s  
+
+  ...o_test/step_001000/aux.pt:  76%|███████▌  | 2.99GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  76%|███████▌  | 2.99GB / 3.96GB,  294MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  72%|███████▏  | 2.85GB / 3.96GB            [A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  83%|████████▎ | 4.05GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  82%|████████▏ | 4.03GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  82%|████████▏ | 4.00GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  64%|██████▎   | 3.11GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  72%|███████▏  | 2.85GB / 3.96GB,  280MB/s  Processing Files (10 / 14)    :  91%|█████████ | 43.6GB / 48.0GB, 1.93GB/s  
+
+  ...o_test/step_002000/aux.pt:  79%|███████▉  | 3.12GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  79%|███████▉  | 3.12GB / 3.96GB,  306MB/s  
+
+  ...st/step_001500/student.pt:  79%|███████▉  | 3.14GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  79%|███████▉  | 3.14GB / 3.96GB,  307MB/s  
+
+  ...o_test/step_001000/aux.pt:  78%|███████▊  | 3.07GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  78%|███████▊  | 3.07GB / 3.96GB,  301MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  85%|████████▍ | 4.14GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  84%|████████▍ | 4.10GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  83%|████████▎ | 4.08GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  65%|██████▌   | 3.20GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  74%|███████▎  | 2.92GB / 3.96GB            [A[AProcessing Files (10 / 14)    :  92%|█████████▏| 43.9GB / 48.0GB, 1.91GB/s  Processing Files (0 / 1)      :  74%|███████▎  | 2.92GB / 3.96GB,  287MB/s  
+
+  ...o_test/step_002000/aux.pt:  81%|████████  | 3.20GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  81%|████████  | 3.20GB / 3.96GB,  314MB/s  
+
+  ...st/step_001500/student.pt:  81%|████████  | 3.22GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  81%|████████  | 3.22GB / 3.96GB,  315MB/s  
+
+  ...o_test/step_001000/aux.pt:  80%|███████▉  | 3.16GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  80%|███████▉  | 3.16GB / 3.96GB,  309MB/s  
+
+  ...o_test/step_000500/aux.pt:  76%|███████▌  | 3.00GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  86%|████████▌ | 4.22GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  86%|████████▌ | 4.18GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  85%|████████▌ | 4.16GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  67%|██████▋   | 3.28GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  76%|███████▌  | 3.00GB / 3.96GB,  294MB/s  Processing Files (10 / 14)    :  92%|█████████▏| 44.3GB / 48.0GB, 1.88GB/s  
+
+  ...o_test/step_002000/aux.pt:  83%|████████▎ | 3.28GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  83%|████████▎ | 3.28GB / 3.96GB,  321MB/s  
+
+  ...st/step_001500/student.pt:  83%|████████▎ | 3.29GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  83%|████████▎ | 3.29GB / 3.96GB,  322MB/s  
+
+  ...o_test/step_001000/aux.pt:  82%|████████▏ | 3.24GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  82%|████████▏ | 3.24GB / 3.96GB,  317MB/s  
+
+  ...o_test/step_000500/aux.pt:  78%|███████▊  | 3.07GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  88%|████████▊ | 4.29GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  87%|████████▋ | 4.26GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  87%|████████▋ | 4.24GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  68%|██████▊   | 3.34GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  78%|███████▊  | 3.07GB / 3.96GB,  301MB/s  Processing Files (10 / 14)    :  93%|█████████▎| 44.5GB / 48.0GB, 1.85GB/s  
+
+  ...o_test/step_002000/aux.pt:  85%|████████▍ | 3.36GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  85%|████████▍ | 3.36GB / 3.96GB,  329MB/s  
+
+  ...st/step_001500/student.pt:  85%|████████▍ | 3.37GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  85%|████████▍ | 3.37GB / 3.96GB,  330MB/s  
+
+  ...o_test/step_001000/aux.pt:  84%|████████▎ | 3.32GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  84%|████████▎ | 3.32GB / 3.96GB,  325MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  89%|████████▉ | 4.37GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  89%|████████▊ | 4.34GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  88%|████████▊ | 4.32GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  70%|███████   | 3.42GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  80%|███████▉  | 3.16GB / 3.96GB            [A[AProcessing Files (10 / 14)    :  94%|█████████▎| 44.9GB / 48.0GB, 1.82GB/s  Processing Files (0 / 1)      :  80%|███████▉  | 3.16GB / 3.96GB,  309MB/s  
+
+  ...o_test/step_002000/aux.pt:  86%|████████▋ | 3.43GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  86%|████████▋ | 3.43GB / 3.96GB,  336MB/s  
+
+  ...st/step_001500/student.pt:  87%|████████▋ | 3.45GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  87%|████████▋ | 3.45GB / 3.96GB,  338MB/s  
+
+  ...o_test/step_001000/aux.pt:  85%|████████▌ | 3.38GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  85%|████████▌ | 3.38GB / 3.96GB,  331MB/s  
+
+  ...o_test/step_000500/aux.pt:  81%|████████▏ | 3.23GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  91%|█████████ | 4.44GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  90%|█████████ | 4.42GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  90%|████████▉ | 4.40GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  72%|███████▏  | 3.50GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  81%|████████▏ | 3.23GB / 3.96GB,  316MB/s  Processing Files (10 / 14)    :  94%|█████████▍| 45.2GB / 48.0GB, 1.79GB/s  
+
+  ...o_test/step_002000/aux.pt:  88%|████████▊ | 3.50GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  88%|████████▊ | 3.50GB / 3.96GB,  343MB/s  
+
+  ...st/step_001500/student.pt:  89%|████████▉ | 3.53GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  89%|████████▉ | 3.53GB / 3.96GB,  346MB/s  
+
+  ...o_test/step_001000/aux.pt:  87%|████████▋ | 3.46GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  87%|████████▋ | 3.46GB / 3.96GB,  339MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  93%|█████████▎| 4.53GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  92%|█████████▏| 4.50GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  92%|█████████▏| 4.48GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  73%|███████▎  | 3.58GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  83%|████████▎ | 3.31GB / 3.96GB            [A[AProcessing Files (10 / 14)    :  95%|█████████▍| 45.5GB / 48.0GB, 1.76GB/s  Processing Files (0 / 1)      :  83%|████████▎ | 3.31GB / 3.96GB,  324MB/s  
+
+  ...o_test/step_002000/aux.pt:  90%|█████████ | 3.59GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  90%|█████████ | 3.59GB / 3.96GB,  352MB/s  
+
+  ...st/step_001500/student.pt:  91%|█████████ | 3.60GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  91%|█████████ | 3.60GB / 3.96GB,  353MB/s  
+
+  ...o_test/step_001000/aux.pt:  89%|████████▉ | 3.54GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  89%|████████▉ | 3.54GB / 3.96GB,  347MB/s  
+
+  ...o_test/step_000500/aux.pt:  85%|████████▌ | 3.39GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  94%|█████████▍| 4.60GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  94%|█████████▎| 4.58GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  93%|█████████▎| 4.56GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  75%|███████▍  | 3.66GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  85%|████████▌ | 3.39GB / 3.96GB,  332MB/s  Processing Files (10 / 14)    :  95%|█████████▌| 45.8GB / 48.0GB, 1.72GB/s  
+
+  ...o_test/step_002000/aux.pt:  92%|█████████▏| 3.67GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  92%|█████████▏| 3.67GB / 3.96GB,  360MB/s  
+
+  ...st/step_001500/student.pt:  93%|█████████▎| 3.68GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  93%|█████████▎| 3.68GB / 3.96GB,  361MB/s  
+
+  ...o_test/step_001000/aux.pt:  91%|█████████▏| 3.62GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  91%|█████████▏| 3.62GB / 3.96GB,  355MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  96%|█████████▌| 4.69GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  95%|█████████▌| 4.66GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  95%|█████████▍| 4.63GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  76%|███████▋  | 3.74GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  87%|████████▋ | 3.47GB / 3.96GB            [A[AProcessing Files (10 / 14)    :  96%|█████████▌| 46.1GB / 48.0GB, 1.68GB/s  Processing Files (0 / 1)      :  87%|████████▋ | 3.47GB / 3.96GB,  340MB/s  
+
+  ...o_test/step_002000/aux.pt:  95%|█████████▍| 3.76GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  95%|█████████▍| 3.76GB / 3.96GB,  369MB/s  
+
+  ...st/step_001500/student.pt:  95%|█████████▌| 3.77GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  95%|█████████▌| 3.77GB / 3.96GB,  369MB/s  
+
+  ...o_test/step_001000/aux.pt:  93%|█████████▎| 3.70GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  93%|█████████▎| 3.70GB / 3.96GB,  363MB/s  
+
+  ...o_test/step_000500/aux.pt:  90%|████████▉ | 3.56GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  98%|█████████▊| 4.78GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  97%|█████████▋| 4.75GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  97%|█████████▋| 4.73GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  78%|███████▊  | 3.83GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  90%|████████▉ | 3.56GB / 3.96GB,  349MB/s  Processing Files (10 / 14)    :  97%|█████████▋| 46.5GB / 48.0GB, 1.65GB/s  
+
+  ...o_test/step_002000/aux.pt:  97%|█████████▋| 3.85GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  97%|█████████▋| 3.85GB / 3.96GB,  378MB/s  
+
+  ...st/step_001500/student.pt:  97%|█████████▋| 3.86GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  97%|█████████▋| 3.86GB / 3.96GB,  379MB/s  
+
+  ...o_test/step_001000/aux.pt:  96%|█████████▌| 3.80GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  96%|█████████▌| 3.80GB / 3.96GB,  372MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|█████████▉| 4.87GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  99%|█████████▉| 4.85GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  99%|█████████▊| 4.82GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  80%|████████  | 3.93GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  92%|█████████▏| 3.66GB / 3.96GB            [A[AProcessing Files (10 / 14)    :  98%|█████████▊| 46.9GB / 48.0GB, 1.62GB/s  Processing Files (0 / 1)      :  92%|█████████▏| 3.66GB / 3.96GB,  359MB/s  
+
+  ...o_test/step_002000/aux.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  99%|█████████▉| 3.94GB / 3.96GB,  386MB/s  
+
+  ...st/step_001500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  388MB/s  
+
+  ...o_test/step_001000/aux.pt:  98%|█████████▊| 3.88GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  98%|█████████▊| 3.88GB / 3.96GB,  381MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  82%|████████▏ | 4.00GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  95%|█████████▍| 3.75GB / 3.96GB            [A[AProcessing Files (13 / 14)    :  98%|█████████▊| 47.1GB / 48.0GB, 1.58GB/s  Processing Files (0 / 1)      :  95%|█████████▍| 3.75GB / 3.96GB,  367MB/s  
+
+  ...st/step_001000/student.pt:   0%|          | 7.95MB / 3.96GB            [A[A
+
+  ...st/step_001000/student.pt:   0%|          | 7.95MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   0%|          | 7.95MB / 3.96GB,  779kB/s  
+
+  ...st/step_000500/student.pt:   0%|          | 7.95MB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt:   0%|          | 7.95MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   0%|          | 7.95MB / 3.96GB,  779kB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  385MB/s  
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB,  388MB/s  
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  141kB /  141kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  139kB /  139kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  201kB /  201kB            [A[A[A[A
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  198kB /  198kB            [A[A[A[A[A
+
+
+
+
+
+  ..._a_teacher_25step_cfg.mp4: 100%|██████████|  102kB /  102kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  140kB /  140kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...teacher_25step_no_cfg.mp4: 100%|██████████|  102kB /  102kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st_teacher_25step_cfg.mp4: 100%|██████████|  101kB /  101kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...teacher_25step_no_cfg.mp4: 100%|██████████|  100kB /  100kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...d__teacher_25step_cfg.mp4: 100%|██████████|  100kB /  100kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  141kB /  141kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  139kB /  139kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  201kB /  201kB            [A[A[A[A
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  198kB /  198kB            [A[A[A[A[A
+
+
+
+
+
+  ..._a_teacher_25step_cfg.mp4: 100%|██████████|  102kB /  102kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  140kB /  140kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...teacher_25step_no_cfg.mp4: 100%|██████████|  102kB /  102kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st_teacher_25step_cfg.mp4: 100%|██████████|  101kB /  101kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...teacher_25step_no_cfg.mp4: 100%|██████████|  100kB /  100kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 1 files]                 : 100%|██████████|  204kB /  204kB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (11 / 11)    :   1%|          | 1.43MB /  242MB,  193kB/s  
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  387MB/s  
+
+  ...o_test/step_000500/aux.pt:  96%|█████████▋| 3.82GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  83%|████████▎ | 4.07GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      :  96%|█████████▋| 3.82GB / 3.96GB,  374MB/s  Processing Files (13 / 14)    :  98%|█████████▊| 47.2GB / 48.0GB, 1.55GB/s  
+
+  ...st/step_001000/student.pt:   2%|▏         | 88.0MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   2%|▏         | 88.0MB / 3.96GB, 8.63MB/s  
+
+  ...ts/dimo_test/final/aux.pt:   2%|▏         | 80.0MB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt:   2%|▏         | 80.0MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   2%|▏         | 80.0MB / 3.96GB, 7.84MB/s  
+
+  ...imo_test/final/student.pt:   2%|▏         | 71.9MB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt:   2%|▏         | 71.9MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   2%|▏         | 71.9MB / 3.96GB, 7.05MB/s  
+
+  ...st/step_000500/student.pt:   2%|▏         | 79.0MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   2%|▏         | 79.0MB / 3.96GB, 7.75MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  378MB/s  
+New Data Upload               :  32%|███▏      | 4.21MB / 13.0MB,  413kB/s  [A
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  139kB /  139kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  201kB /  201kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  198kB /  198kB            [A[A[A[A
+
+
+
+
+  ..._a_teacher_25step_cfg.mp4: 100%|██████████|  102kB /  102kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  140kB /  140kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...teacher_25step_no_cfg.mp4: 100%|██████████|  102kB /  102kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st_teacher_25step_cfg.mp4: 100%|██████████|  101kB /  101kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...teacher_25step_no_cfg.mp4: 100%|██████████|  100kB /  100kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...d__teacher_25step_cfg.mp4: 100%|██████████|  100kB /  100kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 89 files]                :  36%|███▌      | 87.0MB /  240MB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (99 / 100)   :  37%|███▋      | 88.3MB /  242MB, 11.6MB/s  
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  380MB/s  
+New Data Upload               :  32%|███▏      | 4.21MB / 13.0MB,  413kB/s  [A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  85%|████████▍ | 4.15GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt:  98%|█████████▊| 3.89GB / 3.96GB            [A[AProcessing Files (13 / 14)    :  98%|█████████▊| 47.2GB / 48.0GB, 1.52GB/s  Processing Files (0 / 1)      :  98%|█████████▊| 3.89GB / 3.96GB,  381MB/s  
+
+  ...st/step_001000/student.pt:   4%|▍         |  157MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   4%|▍         |  157MB / 3.96GB, 15.4MB/s  
+
+  ...ts/dimo_test/final/aux.pt:   4%|▍         |  160MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   4%|▍         |  160MB / 3.96GB, 15.7MB/s  
+
+  ...imo_test/final/student.pt:   4%|▍         |  152MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   4%|▍         |  152MB / 3.96GB, 14.9MB/s  
+
+  ...st/step_000500/student.pt:   4%|▍         |  155MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   4%|▍         |  155MB / 3.96GB, 15.2MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors:  67%|██████▋   |  152MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (99 / 100)   :  70%|██████▉   |  168MB /  242MB, 21.6MB/s  
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  87%|████████▋ | 4.24GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  387MB/s  Processing Files (13 / 14)    :  99%|█████████▊| 47.3GB / 48.0GB, 1.50GB/s  
+
+  ...st/step_001000/student.pt:   6%|▌         |  233MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   6%|▌         |  233MB / 3.96GB, 22.8MB/s  
+
+  ...ts/dimo_test/final/aux.pt:   6%|▌         |  230MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   6%|▌         |  230MB / 3.96GB, 22.6MB/s  
+
+  ...imo_test/final/student.pt:   6%|▌         |  236MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   6%|▌         |  236MB / 3.96GB, 23.1MB/s  
+
+  ...st/step_000500/student.pt:   6%|▌         |  220MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   6%|▌         |  220MB / 3.96GB, 21.6MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (100 / 100)  : 100%|██████████|  242MB /  242MB, 30.2MB/s  
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  88%|████████▊ | 4.30GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (13 / 14)    :  99%|█████████▉| 47.4GB / 48.0GB, 1.47GB/s  
+
+  ...st/step_001000/student.pt:   8%|▊         |  305MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   8%|▊         |  305MB / 3.96GB, 29.9MB/s  
+
+  ...ts/dimo_test/final/aux.pt:   7%|▋         |  293MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   7%|▋         |  293MB / 3.96GB, 28.7MB/s  
+
+  ...imo_test/final/student.pt:   7%|▋         |  295MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   7%|▋         |  295MB / 3.96GB, 28.9MB/s  
+
+  ...st/step_000500/student.pt:   7%|▋         |  274MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   7%|▋         |  274MB / 3.96GB, 26.9MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  89%|████████▉ | 4.38GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (13 / 14)    :  99%|█████████▉| 47.5GB / 48.0GB, 1.45GB/s  
+
+  ...st/step_001000/student.pt:   9%|▉         |  357MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   9%|▉         |  357MB / 3.96GB, 35.0MB/s  
+
+  ...ts/dimo_test/final/aux.pt:   9%|▉         |  362MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   9%|▉         |  362MB / 3.96GB, 35.5MB/s  
+
+  ...imo_test/final/student.pt:   9%|▉         |  356MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   9%|▉         |  356MB / 3.96GB, 34.9MB/s  
+
+  ...st/step_000500/student.pt:   8%|▊         |  330MB / 3.96GB            [A[AProcessing Files (0 / 1)      :   8%|▊         |  330MB / 3.96GB, 32.4MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  91%|█████████ | 4.44GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (13 / 14)    :  99%|█████████▉| 47.5GB / 48.0GB, 1.43GB/s  
+
+  ...st/step_001000/student.pt:  10%|█         |  413MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  10%|█         |  413MB / 3.96GB, 40.5MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  11%|█         |  423MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  11%|█         |  423MB / 3.96GB, 41.4MB/s  
+
+  ...imo_test/final/student.pt:  11%|█         |  418MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  11%|█         |  418MB / 3.96GB, 41.0MB/s  
+
+  ...st/step_000500/student.pt:  10%|█         |  405MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  10%|█         |  405MB / 3.96GB, 39.7MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  92%|█████████▏| 4.52GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (13 / 14)    :  99%|█████████▉| 47.6GB / 48.0GB, 1.41GB/s  
+
+  ...st/step_001000/student.pt:  12%|█▏        |  485MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  12%|█▏        |  485MB / 3.96GB, 47.6MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  12%|█▏        |  495MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  12%|█▏        |  495MB / 3.96GB, 48.5MB/s  
+
+  ...imo_test/final/student.pt:  12%|█▏        |  490MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  12%|█▏        |  490MB / 3.96GB, 48.0MB/s  
+
+  ...st/step_000500/student.pt:  12%|█▏        |  485MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  12%|█▏        |  485MB / 3.96GB, 47.5MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  94%|█████████▍| 4.59GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (13 / 14)    :  99%|█████████▉| 47.7GB / 48.0GB, 1.39GB/s  
+
+  ...st/step_001000/student.pt:  14%|█▍        |  557MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  14%|█▍        |  557MB / 3.96GB, 54.6MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  14%|█▍        |  575MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  14%|█▍        |  575MB / 3.96GB, 56.3MB/s  
+
+  ...imo_test/final/student.pt:  14%|█▍        |  570MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  14%|█▍        |  570MB / 3.96GB, 55.9MB/s  
+
+  ...st/step_000500/student.pt:  14%|█▍        |  557MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  14%|█▍        |  557MB / 3.96GB, 54.6MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  95%|█████████▌| 4.66GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (13 / 14)    : 100%|█████████▉| 47.7GB / 48.0GB, 1.37GB/s  
+
+  ...st/step_001000/student.pt:  16%|█▌        |  621MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  16%|█▌        |  621MB / 3.96GB, 60.9MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  16%|█▌        |  631MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  16%|█▌        |  631MB / 3.96GB, 61.8MB/s  
+
+  ...imo_test/final/student.pt:  16%|█▌        |  642MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  16%|█▌        |  642MB / 3.96GB, 62.9MB/s  
+
+  ...st/step_000500/student.pt:  16%|█▌        |  628MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  16%|█▌        |  628MB / 3.96GB, 61.6MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  322MB/s  
+New Data Upload               :  46%|████▋     | 6.01MB / 13.0MB,  590kB/s  [A
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  324MB/s  
+New Data Upload               :  46%|████▋     | 6.01MB / 13.0MB,  590kB/s  [A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  97%|█████████▋| 4.74GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (13 / 14)    : 100%|█████████▉| 47.8GB / 48.0GB, 1.35GB/s  
+
+  ...st/step_001000/student.pt:  17%|█▋        |  693MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  17%|█▋        |  693MB / 3.96GB, 68.0MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  18%|█▊        |  703MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  18%|█▊        |  703MB / 3.96GB, 68.9MB/s  
+
+  ...imo_test/final/student.pt:  18%|█▊        |  714MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  18%|█▊        |  714MB / 3.96GB, 70.0MB/s  
+
+  ...st/step_000500/student.pt:  17%|█▋        |  693MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  17%|█▋        |  693MB / 3.96GB, 67.9MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  315MB/s  
+New Data Upload               :  60%|██████    | 7.82MB / 13.0MB,  766kB/s  [A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  317MB/s  
+New Data Upload               :  65%|██████▍   | 8.42MB / 13.0MB,  825kB/s  [A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv:  98%|█████████▊| 4.81GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (13 / 14)    : 100%|█████████▉| 47.9GB / 48.0GB, 1.33GB/s  
+
+  ...st/step_001000/student.pt:  19%|█▉        |  765MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  19%|█▉        |  765MB / 3.96GB, 75.0MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  20%|█▉        |  783MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  20%|█▉        |  783MB / 3.96GB, 76.7MB/s  
+
+  ...imo_test/final/student.pt:  20%|██        |  794MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  20%|██        |  794MB / 3.96GB, 77.8MB/s  
+
+  ...st/step_000500/student.pt:  20%|█▉        |  781MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  20%|█▉        |  781MB / 3.96GB, 76.5MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  307MB/s  
+New Data Upload               :  93%|█████████▎| 12.0MB / 13.0MB, 1.18MB/s  [A
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (14 / 14)    : 100%|██████████| 48.0GB / 48.0GB, 1.30GB/s  
+
+  ...st/step_001000/student.pt:  21%|██        |  837MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  21%|██        |  837MB / 3.96GB, 82.1MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  21%|██▏       |  847MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  21%|██▏       |  847MB / 3.96GB, 83.0MB/s  
+
+  ...imo_test/final/student.pt:  22%|██▏       |  874MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  22%|██▏       |  874MB / 3.96GB, 85.7MB/s  
+
+  ...st/step_000500/student.pt:  21%|██▏       |  845MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  21%|██▏       |  845MB / 3.96GB, 82.8MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  301MB/s  
+New Data Upload               :  97%|█████████▋| 12.6MB / 13.0MB, 1.24MB/s  [A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  304MB/s  
+New Data Upload               :  97%|█████████▋| 12.6MB / 13.0MB, 1.24MB/s  [A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  318MB/s  
+New Data Upload               :   5%|▍         |  601kB / 13.0MB, 58.9kB/s  [A
+
+  ...st/step_001000/student.pt:  23%|██▎       |  917MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  23%|██▎       |  917MB / 3.96GB, 89.9MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  23%|██▎       |  927MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  23%|██▎       |  927MB / 3.96GB, 90.8MB/s  
+
+  ...imo_test/final/student.pt:  24%|██▍       |  954MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  24%|██▍       |  954MB / 3.96GB, 93.5MB/s  
+
+  ...st/step_000500/student.pt:  23%|██▎       |  917MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  23%|██▎       |  917MB / 3.96GB, 89.9MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  25%|██▌       |  997MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  25%|██▌       |  997MB / 3.96GB, 97.8MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  25%|██▌       | 1.01GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  25%|██▌       | 1.01GB / 3.96GB, 98.7MB/s  
+
+  ...imo_test/final/student.pt:  26%|██▌       | 1.03GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  26%|██▌       | 1.03GB / 3.96GB,  101MB/s  
+
+  ...st/step_000500/student.pt:  25%|██▌       |  996MB / 3.96GB            [A[AProcessing Files (0 / 1)      :  25%|██▌       |  996MB / 3.96GB, 97.7MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB,  285MB/s  
+New Data Upload               : 100%|██████████| 13.0MB / 13.0MB, 1.27MB/s  [A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...st/step_001000/student.pt:  27%|██▋       | 1.08GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  27%|██▋       | 1.08GB / 3.96GB,  106MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  28%|██▊       | 1.09GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  28%|██▊       | 1.09GB / 3.96GB,  107MB/s  
+
+  ...imo_test/final/student.pt:  28%|██▊       | 1.11GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  28%|██▊       | 1.11GB / 3.96GB,  109MB/s  
+
+  ...st/step_000500/student.pt:  27%|██▋       | 1.08GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  27%|██▋       | 1.08GB / 3.96GB,  106MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB,  279MB/s  
+New Data Upload               : 100%|██████████| 13.0MB / 13.0MB, 1.27MB/s  [A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...st/step_001000/student.pt:  29%|██▉       | 1.16GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  29%|██▉       | 1.16GB / 3.96GB,  113MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  29%|██▉       | 1.17GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  29%|██▉       | 1.17GB / 3.96GB,  114MB/s  
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt:  30%|███       | 1.19GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  30%|███       | 1.19GB / 3.96GB,  117MB/s  
+
+  ...st/step_000500/student.pt:  29%|██▉       | 1.16GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  29%|██▉       | 1.16GB / 3.96GB,  113MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB,  263MB/s  
+New Data Upload               : |          |  0.00B /  0.00B,  0.00B/s  
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  287MB/s  
+New Data Upload               :   9%|▉         | 1.20MB / 13.0MB,  118kB/s  [A
+
+  ...st/step_001000/student.pt:  31%|███       | 1.24GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  31%|███       | 1.24GB / 3.96GB,  121MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  31%|███▏      | 1.25GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  31%|███▏      | 1.25GB / 3.96GB,  122MB/s  
+
+  ...imo_test/final/student.pt:  32%|███▏      | 1.27GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  32%|███▏      | 1.27GB / 3.96GB,  125MB/s  
+
+  ...st/step_000500/student.pt:  31%|███       | 1.24GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  31%|███       | 1.24GB / 3.96GB,  121MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  278MB/s  
+New Data Upload               :  19%|█▊        | 2.41MB / 13.0MB,  236kB/s  [A
+
+  ...st/step_001000/student.pt:  33%|███▎      | 1.32GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  33%|███▎      | 1.32GB / 3.96GB,  129MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  33%|███▎      | 1.33GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  33%|███▎      | 1.33GB / 3.96GB,  130MB/s  
+
+  ...imo_test/final/student.pt:  34%|███▍      | 1.35GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  34%|███▍      | 1.35GB / 3.96GB,  132MB/s  
+
+  ...st/step_000500/student.pt:  33%|███▎      | 1.31GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  33%|███▎      | 1.31GB / 3.96GB,  128MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  271MB/s  
+New Data Upload               :  28%|██▊       | 3.61MB / 13.0MB,  354kB/s  [A
+
+  ...st/step_001000/student.pt:  35%|███▌      | 1.39GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  35%|███▌      | 1.39GB / 3.96GB,  136MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  35%|███▌      | 1.40GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  35%|███▌      | 1.40GB / 3.96GB,  137MB/s  
+
+  ...imo_test/final/student.pt:  36%|███▌      | 1.42GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  36%|███▌      | 1.42GB / 3.96GB,  139MB/s  
+
+  ...st/step_000500/student.pt:  35%|███▌      | 1.39GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  35%|███▌      | 1.39GB / 3.96GB,  136MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  37%|███▋      | 1.47GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  37%|███▋      | 1.47GB / 3.96GB,  144MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  37%|███▋      | 1.48GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  37%|███▋      | 1.48GB / 3.96GB,  145MB/s  
+
+  ...imo_test/final/student.pt:  38%|███▊      | 1.49GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  38%|███▊      | 1.49GB / 3.96GB,  146MB/s  
+
+  ...st/step_000500/student.pt:  37%|███▋      | 1.47GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  37%|███▋      | 1.47GB / 3.96GB,  144MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  255MB/s  
+New Data Upload               :  46%|████▋     | 6.01MB / 13.0MB,  589kB/s  [A
+
+  ...st/step_001000/student.pt:  39%|███▉      | 1.54GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  39%|███▉      | 1.54GB / 3.96GB,  151MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  39%|███▉      | 1.56GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  39%|███▉      | 1.56GB / 3.96GB,  153MB/s  
+
+  ...imo_test/final/student.pt:  40%|███▉      | 1.58GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  40%|███▉      | 1.58GB / 3.96GB,  155MB/s  
+
+  ...st/step_000500/student.pt:  39%|███▉      | 1.55GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  39%|███▉      | 1.55GB / 3.96GB,  152MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  248MB/s  
+New Data Upload               :  65%|██████▍   | 8.42MB / 13.0MB,  825kB/s  [A
+
+  ...st/step_001000/student.pt:  41%|████      | 1.61GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  41%|████      | 1.61GB / 3.96GB,  158MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  41%|████▏     | 1.64GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  41%|████▏     | 1.64GB / 3.96GB,  161MB/s  
+
+  ...imo_test/final/student.pt:  42%|████▏     | 1.65GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  42%|████▏     | 1.65GB / 3.96GB,  162MB/s  
+
+  ...st/step_000500/student.pt:  41%|████      | 1.63GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  41%|████      | 1.63GB / 3.96GB,  160MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  240MB/s  
+New Data Upload               :  97%|█████████▋| 12.6MB / 13.0MB, 1.24MB/s  [A
+
+  ...st/step_001000/student.pt:  43%|████▎     | 1.69GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  43%|████▎     | 1.69GB / 3.96GB,  166MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  43%|████▎     | 1.72GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  43%|████▎     | 1.72GB / 3.96GB,  168MB/s  
+
+  ...imo_test/final/student.pt:  44%|████▎     | 1.73GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  44%|████▎     | 1.73GB / 3.96GB,  170MB/s  
+
+  ...st/step_000500/student.pt:  43%|████▎     | 1.71GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  43%|████▎     | 1.71GB / 3.96GB,  167MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  45%|████▍     | 1.77GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  45%|████▍     | 1.77GB / 3.96GB,  174MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  45%|████▌     | 1.80GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  45%|████▌     | 1.80GB / 3.96GB,  176MB/s  
+
+  ...imo_test/final/student.pt:  45%|████▌     | 1.80GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  45%|████▌     | 1.80GB / 3.96GB,  177MB/s  
+
+  ...st/step_000500/student.pt:  45%|████▍     | 1.78GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  45%|████▍     | 1.78GB / 3.96GB,  175MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            [A[A
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            [A[A[A
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            [A[A[A[A
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            [A[A[A[A[A
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (100 / 100)  : 100%|██████████|  242MB /  242MB, 24.2MB/s  
+New Data Upload               : |          |  0.00B /  0.00B,  0.00B/s  
+  ...pytorch_model.safetensors: 100%|██████████|  225MB /  225MB            
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  346kB /  346kB            
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  145kB /  145kB            
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  400kB /  400kB            
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  114kB /  114kB            
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  211kB /  211kB            
+  ..._f_teacher_50step_cfg.mp4: 100%|██████████|  226kB /  226kB            
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  150kB /  150kB            
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  298kB /  298kB            
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_001000/student.pt:  47%|████▋     | 1.85GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  47%|████▋     | 1.85GB / 3.96GB,  182MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  47%|████▋     | 1.88GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  47%|████▋     | 1.88GB / 3.96GB,  184MB/s  
+
+  ...imo_test/final/student.pt:  47%|████▋     | 1.87GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  47%|████▋     | 1.87GB / 3.96GB,  184MB/s  
+
+  ...st/step_000500/student.pt:  47%|████▋     | 1.86GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  47%|████▋     | 1.86GB / 3.96GB,  182MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB,  198MB/s  
+New Data Upload               : 100%|██████████| 13.0MB / 13.0MB, 1.30MB/s  
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  49%|████▉     | 1.93GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  49%|████▉     | 1.93GB / 3.96GB,  190MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  49%|████▉     | 1.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  49%|████▉     | 1.96GB / 3.96GB,  192MB/s  
+
+  ...imo_test/final/student.pt:  49%|████▉     | 1.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  49%|████▉     | 1.95GB / 3.96GB,  192MB/s  
+
+  ...st/step_000500/student.pt:  49%|████▉     | 1.94GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  49%|████▉     | 1.94GB / 3.96GB,  190MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB,  209MB/s  
+New Data Upload               : 100%|██████████| 13.0MB / 13.0MB, 1.27MB/s  [A
+
+  ...st/step_001000/student.pt:  51%|█████     | 2.01GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  51%|█████     | 2.01GB / 3.96GB,  197MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  51%|█████▏    | 2.04GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  51%|█████▏    | 2.04GB / 3.96GB,  200MB/s  
+
+  ...imo_test/final/student.pt:  51%|█████     | 2.03GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  51%|█████     | 2.03GB / 3.96GB,  199MB/s  
+
+  ...st/step_000500/student.pt:  51%|█████     | 2.02GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  51%|█████     | 2.02GB / 3.96GB,  198MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  52%|█████▏    | 2.08GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  52%|█████▏    | 2.08GB / 3.96GB,  204MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  53%|█████▎    | 2.11GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  53%|█████▎    | 2.11GB / 3.96GB,  207MB/s  
+
+  ...imo_test/final/student.pt:  53%|█████▎    | 2.11GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  53%|█████▎    | 2.11GB / 3.96GB,  206MB/s  
+
+  ...st/step_000500/student.pt:  53%|█████▎    | 2.10GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  53%|█████▎    | 2.10GB / 3.96GB,  206MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  54%|█████▍    | 2.16GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  54%|█████▍    | 2.16GB / 3.96GB,  212MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  55%|█████▌    | 2.19GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  55%|█████▌    | 2.19GB / 3.96GB,  215MB/s  
+
+  ...imo_test/final/student.pt:  55%|█████▍    | 2.18GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  55%|█████▍    | 2.18GB / 3.96GB,  214MB/s  
+
+  ...st/step_000500/student.pt:  55%|█████▌    | 2.18GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  55%|█████▌    | 2.18GB / 3.96GB,  214MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_001000/student.pt:  56%|█████▌    | 2.23GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  56%|█████▌    | 2.23GB / 3.96GB,  219MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  57%|█████▋    | 2.26GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  57%|█████▋    | 2.26GB / 3.96GB,  222MB/s  
+
+  ...imo_test/final/student.pt:  57%|█████▋    | 2.25GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  57%|█████▋    | 2.25GB / 3.96GB,  221MB/s  
+
+  ...st/step_000500/student.pt:  57%|█████▋    | 2.25GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  57%|█████▋    | 2.25GB / 3.96GB,  221MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_001000/student.pt:  58%|█████▊    | 2.31GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  58%|█████▊    | 2.31GB / 3.96GB,  226MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  59%|█████▉    | 2.33GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  59%|█████▉    | 2.33GB / 3.96GB,  229MB/s  
+
+  ...imo_test/final/student.pt:  59%|█████▉    | 2.34GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  59%|█████▉    | 2.34GB / 3.96GB,  229MB/s  
+
+  ...st/step_000500/student.pt:  59%|█████▊    | 2.32GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  59%|█████▊    | 2.32GB / 3.96GB,  228MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  60%|██████    | 2.39GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  60%|██████    | 2.39GB / 3.96GB,  234MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  61%|██████    | 2.42GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  61%|██████    | 2.42GB / 3.96GB,  238MB/s  
+
+  ...imo_test/final/student.pt:  61%|██████    | 2.41GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  61%|██████    | 2.41GB / 3.96GB,  236MB/s  
+
+  ...st/step_000500/student.pt:  60%|██████    | 2.40GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  60%|██████    | 2.40GB / 3.96GB,  235MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_001000/student.pt:  62%|██████▏   | 2.46GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  62%|██████▏   | 2.46GB / 3.96GB,  241MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt:  63%|██████▎   | 2.49GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  63%|██████▎   | 2.49GB / 3.96GB,  245MB/s  
+
+  ...imo_test/final/student.pt:  63%|██████▎   | 2.48GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  63%|██████▎   | 2.48GB / 3.96GB,  243MB/s  
+
+  ...st/step_000500/student.pt:  62%|██████▏   | 2.46GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  62%|██████▏   | 2.46GB / 3.96GB,  241MB/s  
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB,  131MB/s  
+New Data Upload               : 100%|██████████| 13.0MB / 13.0MB, 1.30MB/s  
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  64%|██████▍   | 2.53GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  64%|██████▍   | 2.53GB / 3.96GB,  248MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  65%|██████▍   | 2.57GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  65%|██████▍   | 2.57GB / 3.96GB,  252MB/s  
+
+  ...imo_test/final/student.pt:  64%|██████▍   | 2.55GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  64%|██████▍   | 2.55GB / 3.96GB,  250MB/s  Processing Files (0 / 0)      : |          |  0.00B /  0.00B            
+New Data Upload               : |          |  0.00B /  0.00B            [A
+
+  ...st/step_000500/student.pt:  64%|██████▍   | 2.54GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  64%|██████▍   | 2.54GB / 3.96GB,  249MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_001000/student.pt:  66%|██████▌   | 2.61GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  66%|██████▌   | 2.61GB / 3.96GB,  256MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  67%|██████▋   | 2.65GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  67%|██████▋   | 2.65GB / 3.96GB,  259MB/s  
+
+  ...imo_test/final/student.pt:  66%|██████▋   | 2.63GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  66%|██████▋   | 2.63GB / 3.96GB,  258MB/s  
+
+  ...st/step_000500/student.pt:  66%|██████▌   | 2.62GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  66%|██████▌   | 2.62GB / 3.96GB,  257MB/s  
+
+  ...la-36M-v1/Koala_36M_9.csv:   1%|▏         | 71.9MB / 4.89GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:   1%|▏         | 71.9MB / 4.89GB            [A[AProcessing Files (0 / 1)      :   1%|▏         | 71.9MB / 4.89GB,   ???B/s  
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  68%|██████▊   | 2.69GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  68%|██████▊   | 2.69GB / 3.96GB,  264MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  69%|██████▉   | 2.73GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  69%|██████▉   | 2.73GB / 3.96GB,  267MB/s  
+
+  ...imo_test/final/student.pt:  68%|██████▊   | 2.71GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  68%|██████▊   | 2.71GB / 3.96GB,  266MB/s  
+
+  ...st/step_000500/student.pt:  68%|██████▊   | 2.70GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  68%|██████▊   | 2.70GB / 3.96GB,  265MB/s  
+
+  ...la-36M-v1/Koala_36M_9.csv:   3%|▎         |  152MB / 4.89GB            [A[AProcessing Files (0 / 1)      :   3%|▎         |  152MB / 4.89GB,  401MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_001000/student.pt:  70%|██████▉   | 2.77GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  70%|██████▉   | 2.77GB / 3.96GB,  272MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  71%|███████   | 2.81GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  71%|███████   | 2.81GB / 3.96GB,  275MB/s  
+
+  ...imo_test/final/student.pt:  70%|███████   | 2.79GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  70%|███████   | 2.79GB / 3.96GB,  274MB/s  
+
+  ...st/step_000500/student.pt:  70%|███████   | 2.78GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  70%|███████   | 2.78GB / 3.96GB,  273MB/s  
+
+  ...la-36M-v1/Koala_36M_9.csv:   5%|▍         |  232MB / 4.89GB            [A[AProcessing Files (0 / 1)      :   5%|▍         |  232MB / 4.89GB,  400MB/s  
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  72%|███████▏  | 2.86GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  72%|███████▏  | 2.86GB / 3.96GB,  281MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  73%|███████▎  | 2.89GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  73%|███████▎  | 2.89GB / 3.96GB,  284MB/s  
+
+  ...imo_test/final/student.pt:  72%|███████▏  | 2.87GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  72%|███████▏  | 2.87GB / 3.96GB,  282MB/s  
+
+  ...st/step_000500/student.pt:  72%|███████▏  | 2.87GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  72%|███████▏  | 2.87GB / 3.96GB,  281MB/s  
+
+  ...la-36M-v1/Koala_36M_9.csv:   6%|▋         |  312MB / 4.89GB            [A[AProcessing Files (0 / 1)      :   6%|▋         |  312MB / 4.89GB,  401MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_001000/student.pt:  74%|███████▍  | 2.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  74%|███████▍  | 2.95GB / 3.96GB,  289MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  75%|███████▌  | 2.98GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  75%|███████▌  | 2.98GB / 3.96GB,  292MB/s  
+
+  ...imo_test/final/student.pt:  75%|███████▍  | 2.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  75%|███████▍  | 2.96GB / 3.96GB,  290MB/s  
+
+  ...st/step_000500/student.pt:  75%|███████▍  | 2.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  75%|███████▍  | 2.96GB / 3.96GB,  291MB/s  
+
+  ...la-36M-v1/Koala_36M_9.csv:   8%|▊         |  400MB / 4.89GB            [A[AProcessing Files (0 / 1)      :   8%|▊         |  400MB / 4.89GB,  410MB/s  
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  77%|███████▋  | 3.04GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  77%|███████▋  | 3.04GB / 3.96GB,  298MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  77%|███████▋  | 3.07GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  77%|███████▋  | 3.07GB / 3.96GB,  301MB/s  
+
+  ...imo_test/final/student.pt:  77%|███████▋  | 3.05GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  77%|███████▋  | 3.05GB / 3.96GB,  299MB/s  
+
+  ...st/step_000500/student.pt:  77%|███████▋  | 3.05GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  77%|███████▋  | 3.05GB / 3.96GB,  299MB/s  
+
+  ...la-36M-v1/Koala_36M_9.csv:  10%|▉         |  488MB / 4.89GB            [A[AProcessing Files (0 / 1)      :  10%|▉         |  488MB / 4.89GB,  416MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_001000/student.pt:  79%|███████▊  | 3.12GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  79%|███████▊  | 3.12GB / 3.96GB,  306MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  79%|███████▉  | 3.15GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  79%|███████▉  | 3.15GB / 3.96GB,  309MB/s  
+
+  ...imo_test/final/student.pt:  79%|███████▉  | 3.14GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  79%|███████▉  | 3.14GB / 3.96GB,  308MB/s  
+
+  ...st/step_000500/student.pt:  79%|███████▉  | 3.14GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  79%|███████▉  | 3.14GB / 3.96GB,  308MB/s  
+
+  ...la-36M-v1/Koala_36M_9.csv:  12%|█▏        |  576MB / 4.89GB            [A[AProcessing Files (0 / 1)      :  12%|█▏        |  576MB / 4.89GB,  420MB/s  
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  81%|████████  | 3.21GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  81%|████████  | 3.21GB / 3.96GB,  314MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  82%|████████▏ | 3.24GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  82%|████████▏ | 3.24GB / 3.96GB,  317MB/s  
+
+  ...imo_test/final/student.pt:  81%|████████  | 3.22GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  81%|████████  | 3.22GB / 3.96GB,  315MB/s  
+
+  ...st/step_000500/student.pt:  81%|████████  | 3.22GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  81%|████████  | 3.22GB / 3.96GB,  316MB/s  
+
+  ...la-36M-v1/Koala_36M_9.csv:  13%|█▎        |  656MB / 4.89GB            [A[AProcessing Files (0 / 1)      :  13%|█▎        |  656MB / 4.89GB,  417MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_001000/student.pt:  83%|████████▎ | 3.29GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  83%|████████▎ | 3.29GB / 3.96GB,  322MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  84%|████████▎ | 3.31GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  84%|████████▎ | 3.31GB / 3.96GB,  325MB/s  
+
+  ...imo_test/final/student.pt:  83%|████████▎ | 3.30GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  83%|████████▎ | 3.30GB / 3.96GB,  323MB/s  
+
+  ...st/step_000500/student.pt:  83%|████████▎ | 3.30GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  83%|████████▎ | 3.30GB / 3.96GB,  324MB/s  
+
+  ...la-36M-v1/Koala_36M_9.csv:  15%|█▌        |  736MB / 4.89GB            [A[AProcessing Files (0 / 1)      :  15%|█▌        |  736MB / 4.89GB,  415MB/s  
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  85%|████████▍ | 3.37GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  85%|████████▍ | 3.37GB / 3.96GB,  330MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  86%|████████▌ | 3.39GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  86%|████████▌ | 3.39GB / 3.96GB,  332MB/s  
+
+  ...imo_test/final/student.pt:  85%|████████▌ | 3.38GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  85%|████████▌ | 3.38GB / 3.96GB,  331MB/s  
+
+  ...st/step_000500/student.pt:  85%|████████▌ | 3.38GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  85%|████████▌ | 3.38GB / 3.96GB,  331MB/s  
+
+  ...la-36M-v1/Koala_36M_9.csv:  17%|█▋        |  816MB / 4.89GB            [A[AProcessing Files (0 / 1)      :  17%|█▋        |  816MB / 4.89GB,  413MB/s  
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB, 65.7MB/s  
+New Data Upload               : 100%|██████████| 13.0MB / 13.0MB, 1.30MB/s  
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            
+
+
+  ...st/step_001000/student.pt:  87%|████████▋ | 3.45GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  87%|████████▋ | 3.45GB / 3.96GB,  339MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  88%|████████▊ | 3.48GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  88%|████████▊ | 3.48GB / 3.96GB,  341MB/s  
+
+  ...imo_test/final/student.pt:  87%|████████▋ | 3.47GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  87%|████████▋ | 3.47GB / 3.96GB,  340MB/s  
+
+  ...st/step_000500/student.pt:  87%|████████▋ | 3.47GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  87%|████████▋ | 3.47GB / 3.96GB,  340MB/s  
+
+  ...la-36M-v1/Koala_36M_9.csv:  18%|█▊        |  904MB / 4.89GB            [A[AProcessing Files (0 / 1)      :  18%|█▊        |  904MB / 4.89GB,  416MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  89%|████████▉ | 3.53GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  89%|████████▉ | 3.53GB / 3.96GB,  346MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  90%|████████▉ | 3.57GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  90%|████████▉ | 3.57GB / 3.96GB,  350MB/s  
+
+  ...imo_test/final/student.pt:  90%|████████▉ | 3.55GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  90%|████████▉ | 3.55GB / 3.96GB,  348MB/s  
+
+  ...st/step_000500/student.pt:  90%|████████▉ | 3.56GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  90%|████████▉ | 3.56GB / 3.96GB,  349MB/s  
+
+  ...la-36M-v1/Koala_36M_9.csv:  20%|██        |  992MB / 4.89GB            [A[AProcessing Files (0 / 1)      :  20%|██        |  992MB / 4.89GB,  418MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  91%|█████████▏| 3.62GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  91%|█████████▏| 3.62GB / 3.96GB,  355MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  92%|█████████▏| 3.65GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  92%|█████████▏| 3.65GB / 3.96GB,  357MB/s  
+
+  ...imo_test/final/student.pt:  92%|█████████▏| 3.64GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  92%|█████████▏| 3.64GB / 3.96GB,  357MB/s  
+
+  ...st/step_000500/student.pt:  92%|█████████▏| 3.64GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  92%|█████████▏| 3.64GB / 3.96GB,  357MB/s  
+
+  ...la-36M-v1/Koala_36M_9.csv:  22%|██▏       | 1.07GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  22%|██▏       | 1.07GB / 4.89GB,  417MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  94%|█████████▎| 3.71GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  94%|█████████▎| 3.71GB / 3.96GB,  364MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  94%|█████████▍| 3.73GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  94%|█████████▍| 3.73GB / 3.96GB,  366MB/s  
+
+  ...imo_test/final/student.pt:  94%|█████████▍| 3.72GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  94%|█████████▍| 3.72GB / 3.96GB,  365MB/s  
+
+  ...st/step_000500/student.pt:  94%|█████████▍| 3.73GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  94%|█████████▍| 3.73GB / 3.96GB,  366MB/s  
+
+  ...la-36M-v1/Koala_36M_9.csv:  24%|██▍       | 1.17GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  24%|██▍       | 1.17GB / 4.89GB,  422MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  96%|█████████▌| 3.80GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  96%|█████████▌| 3.80GB / 3.96GB,  372MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  96%|█████████▋| 3.82GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  96%|█████████▋| 3.82GB / 3.96GB,  375MB/s  
+
+  ...imo_test/final/student.pt:  96%|█████████▌| 3.81GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  96%|█████████▌| 3.81GB / 3.96GB,  374MB/s  
+
+  ...st/step_000500/student.pt:  96%|█████████▋| 3.82GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  96%|█████████▋| 3.82GB / 3.96GB,  375MB/s  
+
+  ...la-36M-v1/Koala_36M_9.csv:  26%|██▌       | 1.26GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  26%|██▌       | 1.26GB / 4.89GB,  423MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  98%|█████████▊| 3.88GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  98%|█████████▊| 3.88GB / 3.96GB,  380MB/s  
+
+  ...ts/dimo_test/final/aux.pt:  99%|█████████▊| 3.91GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  99%|█████████▊| 3.91GB / 3.96GB,  383MB/s  
+
+  ...imo_test/final/student.pt:  98%|█████████▊| 3.90GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  98%|█████████▊| 3.90GB / 3.96GB,  382MB/s  
+
+  ...st/step_000500/student.pt:  98%|█████████▊| 3.90GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  98%|█████████▊| 3.90GB / 3.96GB,  382MB/s  
+
+  ...la-36M-v1/Koala_36M_9.csv:  27%|██▋       | 1.34GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  27%|██▋       | 1.34GB / 4.89GB,  421MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  99%|█████████▉| 3.94GB / 3.96GB,  385MB/s  
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  388MB/s  
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  387MB/s  
+
+  ...st/step_000500/student.pt:  99%|█████████▉| 3.93GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  99%|█████████▉| 3.93GB / 3.96GB,  384MB/s  
+
+  ...la-36M-v1/Koala_36M_9.csv:  29%|██▉       | 1.42GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  29%|██▉       | 1.42GB / 4.89GB,  423MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  381MB/s  
+New Data Upload               :  30%|██▉       | 4.21MB / 14.0MB,  413kB/s  [A
+
+  ...st/step_000500/student.pt:  99%|█████████▉| 3.93GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  31%|███       | 1.51GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  31%|███       | 1.51GB / 4.89GB,  424MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  372MB/s  
+New Data Upload               :  45%|████▍     | 4.21MB / 9.38MB,  413kB/s  [A
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt:  99%|█████████▉| 3.93GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  99%|█████████▉| 3.93GB / 3.96GB,  370MB/s  
+New Data Upload               :  12%|█▏        | 4.20MB / 35.5MB,  412kB/s  [A
+
+  ...la-36M-v1/Koala_36M_9.csv:  33%|███▎      | 1.59GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  33%|███▎      | 1.59GB / 4.89GB,  422MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt:  99%|█████████▉| 3.93GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  34%|███▍      | 1.68GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  34%|███▍      | 1.68GB / 4.89GB,  423MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt:  99%|█████████▉| 3.93GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  36%|███▌      | 1.77GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  36%|███▌      | 1.77GB / 4.89GB,  424MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt:  99%|█████████▉| 3.93GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  38%|███▊      | 1.86GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  38%|███▊      | 1.86GB / 4.89GB,  425MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt:  99%|█████████▉| 3.93GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  40%|███▉      | 1.94GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  40%|███▉      | 1.94GB / 4.89GB,  424MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  99%|█████████▉| 3.94GB / 3.96GB,  339MB/s  
+New Data Upload               :   2%|▏         |  601kB / 26.6MB, 58.9kB/s  [A
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt:  99%|█████████▉| 3.93GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  41%|████▏     | 2.02GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  41%|████▏     | 2.02GB / 4.89GB,  424MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt:  99%|█████████▉| 3.93GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  43%|████▎     | 2.10GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  43%|████▎     | 2.10GB / 4.89GB,  422MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt:  99%|█████████▉| 3.93GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  45%|████▍     | 2.18GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  45%|████▍     | 2.18GB / 4.89GB,  422MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  319MB/s  
+New Data Upload               :  64%|██████▍   | 6.02MB / 9.38MB,  590kB/s  [A
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  318MB/s  
+New Data Upload               :  43%|████▎     | 6.01MB / 14.0MB,  590kB/s  [A
+
+  ...st/step_000500/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  99%|█████████▉| 3.94GB / 3.96GB,  318MB/s  
+New Data Upload               :  17%|█▋        | 6.00MB / 35.5MB,  588kB/s  [A
+
+  ...la-36M-v1/Koala_36M_9.csv:  46%|████▋     | 2.27GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  46%|████▋     | 2.27GB / 4.89GB,  423MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  312MB/s  
+New Data Upload               :  90%|████████▉ | 8.42MB / 9.38MB,  826kB/s  [A
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  310MB/s  
+New Data Upload               :  60%|█████▉    | 8.42MB / 14.0MB,  825kB/s  [A
+
+  ...st/step_000500/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  99%|█████████▉| 3.94GB / 3.96GB,  310MB/s  
+New Data Upload               :  24%|██▎       | 8.40MB / 35.5MB,  824kB/s  [A
+
+  ...la-36M-v1/Koala_36M_9.csv:  48%|████▊     | 2.35GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  48%|████▊     | 2.35GB / 4.89GB,  422MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  99%|█████████▉| 3.94GB / 3.96GB,  304MB/s  
+New Data Upload               :   5%|▍         | 1.20MB / 26.6MB,  118kB/s  [A
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  306MB/s  
+New Data Upload               :  96%|█████████▌| 9.02MB / 9.38MB,  885kB/s  [A
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  99%|█████████▉| 3.94GB / 3.96GB,  304MB/s  
+New Data Upload               :  36%|███▌      | 12.6MB / 35.5MB, 1.24MB/s  [A
+
+  ...la-36M-v1/Koala_36M_9.csv:  50%|████▉     | 2.43GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  50%|████▉     | 2.43GB / 4.89GB,  421MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  99%|█████████▉| 3.94GB / 3.96GB,  296MB/s  
+New Data Upload               :  11%|█▏        | 3.00MB / 26.6MB,  294kB/s  [A
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  295MB/s  
+New Data Upload               :  86%|████████▌ | 12.0MB / 14.0MB, 1.18MB/s  [A
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  297MB/s  
+New Data Upload               :  47%|████▋     | 16.8MB / 35.5MB, 1.65MB/s  [A
+
+  ...la-36M-v1/Koala_36M_9.csv:  52%|█████▏    | 2.52GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  52%|█████▏    | 2.52GB / 4.89GB,  422MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB,  290MB/s  
+New Data Upload               : 100%|██████████| 9.38MB / 9.38MB,  920kB/s  [A
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  287MB/s  
+New Data Upload               :  98%|█████████▊| 13.8MB / 14.0MB, 1.36MB/s  [A
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  290MB/s  
+New Data Upload               :  58%|█████▊    | 20.4MB / 35.5MB, 2.00MB/s  [A
+
+  ...la-36M-v1/Koala_36M_9.csv:  53%|█████▎    | 2.60GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  53%|█████▎    | 2.60GB / 4.89GB,  421MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[AProcessing Files (0 / 1)      :  99%|█████████▉| 3.94GB / 3.96GB,  281MB/s  
+New Data Upload               :  23%|██▎       | 6.01MB / 26.6MB,  589kB/s  [A
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  282MB/s  
+New Data Upload               :  68%|██████▊   | 24.0MB / 35.5MB, 2.35MB/s  [A
+
+  ...la-36M-v1/Koala_36M_9.csv:  55%|█████▍    | 2.69GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  55%|█████▍    | 2.69GB / 4.89GB,  422MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  273MB/s  
+New Data Upload               :  29%|██▉       | 7.81MB / 26.6MB,  766kB/s  [A
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB,  272MB/s  
+New Data Upload               : 100%|██████████| 14.0MB / 14.0MB, 1.38MB/s  [A
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  275MB/s  
+New Data Upload               :  81%|████████  | 28.8MB / 35.5MB, 2.82MB/s  [A
+
+  ...la-36M-v1/Koala_36M_9.csv:  56%|█████▋    | 2.76GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  56%|█████▋    | 2.76GB / 4.89GB,  420MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  266MB/s  
+New Data Upload               :  45%|████▌     | 12.0MB / 26.6MB, 1.18MB/s  [A
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  58%|█████▊    | 2.85GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  58%|█████▊    | 2.85GB / 4.89GB,  421MB/s  
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (14 / 14)    : 100%|██████████| 48.0GB / 48.0GB,  0.00B/s  
+New Data Upload               : |          |  0.00B /  0.00B,  0.00B/s  
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            
+Processing Files (0 / 1)      : 100%|█████████▉| 3.95GB / 3.96GB,  258MB/s  
+New Data Upload               :  61%|██████    | 16.2MB / 26.6MB, 1.59MB/s  [A
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  260MB/s  
+New Data Upload               :  86%|████████▋ | 30.6MB / 35.5MB, 3.00MB/s  [A
+
+  ...la-36M-v1/Koala_36M_9.csv:  60%|██████    | 2.94GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  60%|██████    | 2.94GB / 4.89GB,  421MB/s  
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  252MB/s  
+New Data Upload               :  98%|█████████▊| 34.8MB / 35.5MB, 3.41MB/s  [A
+
+  ...la-36M-v1/Koala_36M_9.csv:  62%|██████▏   | 3.02GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  62%|██████▏   | 3.02GB / 4.89GB,  421MB/s  
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  244MB/s  
+New Data Upload               :  77%|███████▋  | 20.4MB / 26.6MB, 2.00MB/s  [A
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  245MB/s  
+New Data Upload               : 100%|█████████▉| 35.4MB / 35.5MB, 3.47MB/s  [A
+
+  ...la-36M-v1/Koala_36M_9.csv:  63%|██████▎   | 3.10GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  63%|██████▎   | 3.10GB / 4.89GB,  421MB/s  
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  237MB/s  
+New Data Upload               :  90%|█████████ | 24.0MB / 26.6MB, 2.36MB/s  [A
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  65%|██████▌   | 3.19GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  65%|██████▌   | 3.19GB / 4.89GB,  422MB/s  
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[AProcessing Files (0 / 1)      : 100%|█████████▉| 3.96GB / 3.96GB,  230MB/s  
+New Data Upload               :  99%|█████████▉| 26.4MB / 26.6MB, 2.59MB/s  [A
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  67%|██████▋   | 3.27GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  67%|██████▋   | 3.27GB / 4.89GB,  421MB/s  
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB,  221MB/s  
+New Data Upload               : 100%|██████████| 35.5MB / 35.5MB, 3.48MB/s  [A
+
+  ...la-36M-v1/Koala_36M_9.csv:  69%|██████▊   | 3.36GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  69%|██████▊   | 3.36GB / 4.89GB,  422MB/s  
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  71%|███████   | 3.45GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  71%|███████   | 3.45GB / 4.89GB,  422MB/s  
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB,  207MB/s  
+New Data Upload               : 100%|██████████| 26.6MB / 26.6MB, 2.61MB/s  [A
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  72%|███████▏  | 3.54GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  72%|███████▏  | 3.54GB / 4.89GB,  422MB/s  
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  74%|███████▍  | 3.62GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  74%|███████▍  | 3.62GB / 4.89GB,  423MB/s  
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  76%|███████▌  | 3.71GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  76%|███████▌  | 3.71GB / 4.89GB,  423MB/s  
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  78%|███████▊  | 3.80GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  78%|███████▊  | 3.80GB / 4.89GB,  424MB/s  
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB,  170MB/s  
+New Data Upload               : 100%|██████████| 9.38MB / 9.38MB,  938kB/s  
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  79%|███████▉  | 3.88GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  79%|███████▉  | 3.88GB / 4.89GB,  423MB/s  
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  81%|████████  | 3.97GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  81%|████████  | 3.97GB / 4.89GB,  423MB/s  
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  83%|████████▎ | 4.06GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  83%|████████▎ | 4.06GB / 4.89GB,  424MB/s  
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  85%|████████▍ | 4.14GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  85%|████████▍ | 4.14GB / 4.89GB,  424MB/s  
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  87%|████████▋ | 4.23GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  87%|████████▋ | 4.23GB / 4.89GB,  425MB/s  
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  88%|████████▊ | 4.32GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  88%|████████▊ | 4.32GB / 4.89GB,  425MB/s  
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  90%|█████████ | 4.41GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  90%|█████████ | 4.41GB / 4.89GB,  425MB/s  
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  92%|█████████▏| 4.49GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  92%|█████████▏| 4.49GB / 4.89GB,  425MB/s  
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  94%|█████████▎| 4.58GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  94%|█████████▎| 4.58GB / 4.89GB,  426MB/s  
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  95%|█████████▌| 4.66GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  95%|█████████▌| 4.66GB / 4.89GB,  426MB/s  
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  97%|█████████▋| 4.74GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  97%|█████████▋| 4.74GB / 4.89GB,  426MB/s  
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv:  99%|█████████▊| 4.82GB / 4.89GB            [A[AProcessing Files (0 / 1)      :  99%|█████████▊| 4.82GB / 4.89GB,  425MB/s  
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 4.89GB / 4.89GB,  423MB/s  
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB, 66.7MB/s  
+New Data Upload               : 100%|██████████| 14.0MB / 14.0MB, 1.40MB/s  
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB, 59.9MB/s  
+New Data Upload               : 100%|██████████| 26.6MB / 26.6MB, 2.66MB/s  
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 4.89GB / 4.89GB,  279MB/s  
+New Data Upload               : |          |  0.00B /  0.00B,  0.00B/s  
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[K[F[K[F[K[F[K[F[K[F[K[F[K[F
+---------- 2026-03-23 23:57:58 (0:01:00) ----------
+Files:   hashed 779/779 (92.8G/92.8G) | pre-uploaded: 129/130 (88.8G/92.8G) (+100 unsure) | committed: 350/779 (28.2M/92.8G) | ignored: 0
+Workers: hashing: 0 | get upload mode: 1 | pre-uploading: 1 | committing: 1 | waiting: 13
+---------------------------------------------------
+                             
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[AProcessing Files (1 / 1)      : 100%|██████████| 3.96GB / 3.96GB,  0.00B/s  
+New Data Upload               : 100%|██████████| 35.5MB / 35.5MB,  0.00B/s  
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            
+Successfully committed 200 at once. Increasing the limit for next batch.
+'[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1000)' thrown while requesting POST https://huggingface.co/api/models/BryanW/43.wm/preupload/main
+Retrying in 1s [Retry 1/5].
+Failed to get upload mode: Cannot send a request, as the client has been closed.
+Processing Files (0 / 0)      : |          |  0.00B /  0.00B            
+New Data Upload               : |          |  0.00B /  0.00B            [A
+
+  ..._Model/URSA/ursa_265f.mp4: 100%|██████████|  794kB /  794kB            [A[A
+
+
+  ...d_Model/URSA/ursa_85f.mp4: 100%|██████████|  360kB /  360kB            [A[A[A
+
+
+
+  ..._Model/URSA/ursa_121f.mp4: 100%|██████████|  457kB /  457kB            [A[A[A[A
+
+
+
+
+  ..._Model/URSA/ursa_337f.mp4: 100%|██████████|  966kB /  966kB            [A[A[A[A[A
+
+
+
+
+
+  ..._Model/URSA/ursa_373f.mp4: 100%|██████████| 1.05MB / 1.05MB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...el/URSA/her/ursa_265f.mp4: 100%|██████████| 1.10MB / 1.10MB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._Model/URSA/ursa_481f.mp4: 100%|██████████| 1.28MB / 1.28MB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...duler/scheduler_model.pth: 100%|██████████| 1.54MB / 1.54MB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...del/URSA/her/ursa_85f.mp4: 100%|██████████|  503kB /  503kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ..._Model/URSA/ursa_193f.mp4: 100%|██████████|  622kB /  622kB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ..._Model/URSA/ursa_265f.mp4: 100%|██████████|  794kB /  794kB            [A[A
+
+
+  ...d_Model/URSA/ursa_85f.mp4: 100%|██████████|  360kB /  360kB            [A[A[A
+
+
+
+  ..._Model/URSA/ursa_121f.mp4: 100%|██████████|  457kB /  457kB            [A[A[A[A
+
+
+
+
+  ..._Model/URSA/ursa_337f.mp4: 100%|██████████|  966kB /  966kB            [A[A[A[A[A
+
+
+
+
+
+  ..._Model/URSA/ursa_373f.mp4: 100%|██████████| 1.05MB / 1.05MB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...el/URSA/her/ursa_265f.mp4: 100%|██████████| 1.10MB / 1.10MB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ..._Model/URSA/ursa_481f.mp4: 100%|██████████| 1.28MB / 1.28MB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...duler/scheduler_model.pth: 100%|██████████| 1.54MB / 1.54MB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...del/URSA/her/ursa_85f.mp4: 100%|██████████|  503kB /  503kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 39 files]                : 100%|██████████| 45.5MB / 45.5MB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (49 / 49)    : 100%|██████████| 53.6MB / 53.6MB,   ???B/s  
+
+  ...assets/model_overview.png: 100%|██████████| 1.42MB / 1.42MB            [A[A
+
+
+  ...el/URSA/tom/ursa_229f.mp4: 100%|██████████|  476kB /  476kB            [A[A[A
+
+
+
+  ...l/URSA/tom/ursa_1+48f.mp4: 100%|██████████|  205kB /  205kB            [A[A[A[A
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  203kB /  203kB            [A[A[A[A[A
+
+
+
+
+
+  ...el/URSA/tom/ursa_301f.mp4: 100%|██████████|  620kB /  620kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_121f.mp4: 100%|██████████|  300kB /  300kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_481f.mp4: 100%|██████████|  975kB /  975kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_409f.mp4: 100%|██████████|  818kB /  818kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  .../tokenizer/tokenizer.json: 100%|██████████| 11.4MB / 11.4MB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  .../assets/model_preview.gif: 100%|██████████| 7.06MB / 7.06MB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...assets/model_overview.png: 100%|██████████| 1.42MB / 1.42MB            [A[A
+
+
+  ...el/URSA/tom/ursa_229f.mp4: 100%|██████████|  476kB /  476kB            [A[A[A
+
+
+
+  ...l/URSA/tom/ursa_1+48f.mp4: 100%|██████████|  205kB /  205kB            [A[A[A[A
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  203kB /  203kB            [A[A[A[A[A
+
+
+
+
+
+  ...el/URSA/tom/ursa_301f.mp4: 100%|██████████|  620kB /  620kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_121f.mp4: 100%|██████████|  300kB /  300kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_481f.mp4: 100%|██████████|  975kB /  975kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_409f.mp4: 100%|██████████|  818kB /  818kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  .../tokenizer/tokenizer.json: 100%|██████████| 11.4MB / 11.4MB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  .../assets/model_preview.gif: 100%|██████████| 7.06MB / 7.06MB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...assets/model_overview.png: 100%|██████████| 1.42MB / 1.42MB            [A[A
+
+
+  ...el/URSA/tom/ursa_229f.mp4: 100%|██████████|  476kB /  476kB            [A[A[A
+
+
+
+  ...l/URSA/tom/ursa_1+48f.mp4: 100%|██████████|  205kB /  205kB            [A[A[A[A
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  203kB /  203kB            [A[A[A[A[A
+
+
+
+
+
+  ...el/URSA/tom/ursa_301f.mp4: 100%|██████████|  620kB /  620kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_121f.mp4: 100%|██████████|  300kB /  300kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_481f.mp4: 100%|██████████|  975kB /  975kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_409f.mp4: 100%|██████████|  818kB /  818kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  .../tokenizer/tokenizer.json: 100%|██████████| 11.4MB / 11.4MB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  .../assets/model_preview.gif: 100%|██████████| 7.06MB / 7.06MB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...assets/model_overview.png: 100%|██████████| 1.42MB / 1.42MB            [A[A
+
+
+  ...el/URSA/tom/ursa_229f.mp4: 100%|██████████|  476kB /  476kB            [A[A[A
+
+
+
+  ...l/URSA/tom/ursa_1+48f.mp4: 100%|██████████|  205kB /  205kB            [A[A[A[A
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  203kB /  203kB            [A[A[A[A[A
+
+
+
+
+
+  ...el/URSA/tom/ursa_301f.mp4: 100%|██████████|  620kB /  620kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_121f.mp4: 100%|██████████|  300kB /  300kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_481f.mp4: 100%|██████████|  975kB /  975kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_409f.mp4: 100%|██████████|  818kB /  818kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  .../tokenizer/tokenizer.json: 100%|██████████| 11.4MB / 11.4MB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  .../assets/model_preview.gif: 100%|██████████| 7.06MB / 7.06MB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...assets/model_overview.png: 100%|██████████| 1.42MB / 1.42MB            [A[A
+
+
+  ...el/URSA/tom/ursa_229f.mp4: 100%|██████████|  476kB /  476kB            [A[A[A
+
+
+
+  ...l/URSA/tom/ursa_1+48f.mp4: 100%|██████████|  205kB /  205kB            [A[A[A[A
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  203kB /  203kB            [A[A[A[A[A
+
+
+
+
+
+  ...el/URSA/tom/ursa_301f.mp4: 100%|██████████|  620kB /  620kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_121f.mp4: 100%|██████████|  300kB /  300kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_481f.mp4: 100%|██████████|  975kB /  975kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_409f.mp4: 100%|██████████|  818kB /  818kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  .../tokenizer/tokenizer.json: 100%|██████████| 11.4MB / 11.4MB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  .../assets/model_preview.gif: 100%|██████████| 7.06MB / 7.06MB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...assets/model_overview.png: 100%|██████████| 1.42MB / 1.42MB            [A[A
+
+
+  ...el/URSA/tom/ursa_229f.mp4: 100%|██████████|  476kB /  476kB            [A[A[A
+
+
+
+  ...l/URSA/tom/ursa_1+48f.mp4: 100%|██████████|  205kB /  205kB            [A[A[A[A
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  203kB /  203kB            [A[A[A[A[A
+
+
+
+
+
+  ...el/URSA/tom/ursa_301f.mp4: 100%|██████████|  620kB /  620kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_121f.mp4: 100%|██████████|  300kB /  300kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_481f.mp4: 100%|██████████|  975kB /  975kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_409f.mp4: 100%|██████████|  818kB /  818kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  .../tokenizer/tokenizer.json: 100%|██████████| 11.4MB / 11.4MB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  .../assets/model_preview.gif: 100%|██████████| 7.06MB / 7.06MB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...assets/model_overview.png: 100%|██████████| 1.42MB / 1.42MB            [A[A
+
+
+  ...el/URSA/tom/ursa_229f.mp4: 100%|██████████|  476kB /  476kB            [A[A[A
+
+
+
+  ...l/URSA/tom/ursa_1+48f.mp4: 100%|██████████|  205kB /  205kB            [A[A[A[A
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  203kB /  203kB            [A[A[A[A[A
+
+
+
+
+
+  ...el/URSA/tom/ursa_301f.mp4: 100%|██████████|  620kB /  620kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_121f.mp4: 100%|██████████|  300kB /  300kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_481f.mp4: 100%|██████████|  975kB /  975kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_409f.mp4: 100%|██████████|  818kB /  818kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  .../tokenizer/tokenizer.json: 100%|██████████| 11.4MB / 11.4MB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  .../assets/model_preview.gif: 100%|██████████| 7.06MB / 7.06MB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...assets/model_overview.png: 100%|██████████| 1.42MB / 1.42MB            [A[A
+
+
+  ...el/URSA/tom/ursa_229f.mp4: 100%|██████████|  476kB /  476kB            [A[A[A
+
+
+
+  ...l/URSA/tom/ursa_1+48f.mp4: 100%|██████████|  205kB /  205kB            [A[A[A[A
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  203kB /  203kB            [A[A[A[A[A
+
+
+
+
+
+  ...el/URSA/tom/ursa_301f.mp4: 100%|██████████|  620kB /  620kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_121f.mp4: 100%|██████████|  300kB /  300kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_481f.mp4: 100%|██████████|  975kB /  975kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_409f.mp4: 100%|██████████|  818kB /  818kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  .../tokenizer/tokenizer.json: 100%|██████████| 11.4MB / 11.4MB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  .../assets/model_preview.gif: 100%|██████████| 7.06MB / 7.06MB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...assets/model_overview.png: 100%|██████████| 1.42MB / 1.42MB            [A[A
+
+
+  ...el/URSA/tom/ursa_229f.mp4: 100%|██████████|  476kB /  476kB            [A[A[A
+
+
+
+  ...l/URSA/tom/ursa_1+48f.mp4: 100%|██████████|  205kB /  205kB            [A[A[A[A
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  203kB /  203kB            [A[A[A[A[A
+
+
+
+
+
+  ...el/URSA/tom/ursa_301f.mp4: 100%|██████████|  620kB /  620kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_121f.mp4: 100%|██████████|  300kB /  300kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_481f.mp4: 100%|██████████|  975kB /  975kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...el/URSA/tom/ursa_409f.mp4: 100%|██████████|  818kB /  818kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  .../tokenizer/tokenizer.json: 100%|██████████| 11.4MB / 11.4MB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  .../assets/model_preview.gif: 100%|██████████| 7.06MB / 7.06MB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (49 / 49)    : 100%|██████████| 53.6MB / 53.6MB,  0.00B/s  
+New Data Upload               : |          |  0.00B /  0.00B,  0.00B/s  
+  ...assets/model_overview.png: 100%|██████████| 1.42MB / 1.42MB            
+  ...el/URSA/tom/ursa_229f.mp4: 100%|██████████|  476kB /  476kB            
+  ...l/URSA/tom/ursa_1+48f.mp4: 100%|██████████|  205kB /  205kB            
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  203kB /  203kB            
+  ...el/URSA/tom/ursa_301f.mp4: 100%|██████████|  620kB /  620kB            
+  ...el/URSA/tom/ursa_121f.mp4: 100%|██████████|  300kB /  300kB            
+  ...el/URSA/tom/ursa_481f.mp4: 100%|██████████|  975kB /  975kB            
+  ...el/URSA/tom/ursa_409f.mp4: 100%|██████████|  818kB /  818kB            
+  .../tokenizer/tokenizer.json: 100%|██████████| 11.4MB / 11.4MB            
+  .../assets/model_preview.gif: 100%|██████████| 7.06MB / 7.06MB            
+Processing Files (0 / 0)      : |          |  0.00B /  0.00B            
+New Data Upload               : |          |  0.00B /  0.00B            [A
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  136kB /  136kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  152kB /  152kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  210kB /  210kB            [A[A[A[A
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A[A[A
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  399kB /  399kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_002000/student.pt:   3%|▎         |  120MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...d_f_student_1step_cfg.mp4: 100%|██████████|  136kB /  136kB            [A[A
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  152kB /  152kB            [A[A[A
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  210kB /  210kB            [A[A[A[A
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  170kB /  170kB            [A[A[A[A[A
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  399kB /  399kB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ist_student_1step_cfg.mp4: 100%|██████████|  111kB /  111kB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...t_student_1step_baked.mp4: 100%|██████████|  241kB /  241kB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 7 files]                 :   2%|▏         |  807MB / 32.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :   1%|          |  809MB / 92.5GB,   ???B/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:   6%|▌         |  219MB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:   5%|▌         |  216MB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:   4%|▍         |  216MB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:   6%|▌         |  221MB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:   5%|▌         |  216MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:   6%|▌         |  221MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:   4%|▍         |  175MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:   5%|▌         |  199MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :   2%|▏         | 1.69GB / 92.5GB, 4.37GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:   8%|▊         |  303MB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:   8%|▊         |  320MB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:   6%|▋         |  312MB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:   8%|▊         |  315MB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:   8%|▊         |  304MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:   8%|▊         |  315MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:   7%|▋         |  259MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:   7%|▋         |  283MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :   3%|▎         | 2.41GB / 92.5GB, 4.01GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:   9%|▉         |  362MB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  10%|█         |  416MB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:   8%|▊         |  408MB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  10%|█         |  397MB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  10%|▉         |  392MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  10%|█         |  405MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:   9%|▉         |  360MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:   9%|▉         |  360MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :   3%|▎         | 3.10GB / 92.5GB, 3.82GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  12%|█▏        |  465MB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  13%|█▎        |  504MB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  10%|█         |  512MB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  13%|█▎        |  501MB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  12%|█▏        |  488MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  12%|█▏        |  485MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  11%|█         |  445MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  11%|█▏        |  453MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :   4%|▍         | 3.85GB / 92.5GB, 3.80GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  14%|█▍        |  561MB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  16%|█▌        |  624MB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  13%|█▎        |  624MB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  15%|█▌        |  605MB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  15%|█▍        |  592MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  15%|█▍        |  589MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  14%|█▍        |  549MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  14%|█▍        |  565MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :   5%|▌         | 4.71GB / 92.5GB, 3.90GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  17%|█▋        |  673MB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  19%|█▊        |  736MB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  15%|█▌        |  744MB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  18%|█▊        |  725MB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  18%|█▊        |  712MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  18%|█▊        |  709MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  17%|█▋        |  669MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  17%|█▋        |  685MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :   6%|▌         | 5.65GB / 92.5GB, 4.03GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  20%|█▉        |  785MB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  22%|██▏       |  856MB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  17%|█▋        |  848MB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  21%|██        |  837MB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  21%|██        |  816MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  21%|██        |  821MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  20%|█▉        |  789MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  20%|██        |  797MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :   7%|▋         | 6.55GB / 92.5GB, 4.10GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  23%|██▎       |  913MB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  25%|██▍       |  984MB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  20%|█▉        |  976MB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  24%|██▍       |  957MB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  24%|██▎       |  936MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  24%|██▎       |  941MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  23%|██▎       |  901MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  23%|██▎       |  917MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :   8%|▊         | 7.53GB / 92.5GB, 4.20GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  26%|██▌       | 1.03GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  28%|██▊       | 1.10GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  22%|██▏       | 1.09GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  27%|██▋       | 1.07GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  27%|██▋       | 1.06GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  27%|██▋       | 1.06GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  26%|██▌       | 1.02GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  26%|██▌       | 1.03GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :   9%|▉         | 8.46GB / 92.5GB, 4.25GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  29%|██▊       | 1.14GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  30%|███       | 1.21GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  25%|██▍       | 1.20GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  30%|██▉       | 1.17GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  29%|██▉       | 1.17GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  29%|██▉       | 1.16GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  28%|██▊       | 1.12GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  29%|██▊       | 1.13GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  10%|█         | 9.31GB / 92.5GB, 4.25GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  31%|███▏      | 1.24GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  33%|███▎      | 1.32GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  27%|██▋       | 1.30GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  32%|███▏      | 1.28GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  32%|███▏      | 1.28GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  32%|███▏      | 1.28GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  31%|███       | 1.23GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  31%|███       | 1.24GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  11%|█         | 10.2GB / 92.5GB, 4.26GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  34%|███▍      | 1.35GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  36%|███▌      | 1.42GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  29%|██▉       | 1.41GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  35%|███▌      | 1.40GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  35%|███▍      | 1.38GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  35%|███▌      | 1.40GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  34%|███▎      | 1.33GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  34%|███▍      | 1.34GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  12%|█▏        | 11.0GB / 92.5GB, 4.26GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  37%|███▋      | 1.47GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  39%|███▉      | 1.54GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  31%|███       | 1.52GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  38%|███▊      | 1.52GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  38%|███▊      | 1.50GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  38%|███▊      | 1.52GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  36%|███▋      | 1.44GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  37%|███▋      | 1.46GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  13%|█▎        | 12.0GB / 92.5GB, 4.30GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  40%|████      | 1.59GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  42%|████▏     | 1.66GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  33%|███▎      | 1.63GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  41%|████      | 1.63GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  41%|████      | 1.62GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  41%|████      | 1.63GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  39%|███▉      | 1.56GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  39%|███▉      | 1.56GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  14%|█▍        | 12.9GB / 92.5GB, 4.32GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  43%|████▎     | 1.69GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  44%|████▍     | 1.75GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  35%|███▌      | 1.73GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  44%|████▎     | 1.73GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  43%|████▎     | 1.72GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  44%|████▎     | 1.72GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  42%|████▏     | 1.65GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  42%|████▏     | 1.65GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  15%|█▍        | 13.7GB / 92.5GB, 4.28GB/s  [K[F[K[F[K[F[K[F[K[F[K[F[K[F
+---------- 2026-03-23 23:58:58 (0:02:00) ----------
+Files:   hashed 779/779 (92.8G/92.8G) | pre-uploaded: 179/179 (92.8G/92.8G) | committed: 550/779 (271.9M/92.8G) | ignored: 0
+Workers: hashing: 0 | get upload mode: 0 | pre-uploading: 0 | committing: 1 | waiting: 15
+---------------------------------------------------
+                             
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  45%|████▍     | 1.78GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  46%|████▋     | 1.84GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  37%|███▋      | 1.82GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  46%|████▌     | 1.82GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  46%|████▌     | 1.81GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  46%|████▌     | 1.81GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  44%|████▍     | 1.75GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  44%|████▍     | 1.75GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  16%|█▌        | 14.4GB / 92.5GB, 4.24GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  47%|████▋     | 1.87GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  49%|████▉     | 1.94GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  39%|███▉      | 1.92GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  48%|████▊     | 1.92GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  48%|████▊     | 1.91GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  48%|████▊     | 1.91GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  46%|████▋     | 1.84GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  47%|████▋     | 1.84GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  16%|█▋        | 15.1GB / 92.5GB, 4.21GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  50%|████▉     | 1.97GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  52%|█████▏    | 2.05GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  42%|████▏     | 2.03GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  51%|█████     | 2.03GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  51%|█████     | 2.02GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  51%|█████     | 2.01GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  49%|████▉     | 1.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  49%|████▉     | 1.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  17%|█▋        | 16.0GB / 92.5GB, 4.22GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  52%|█████▏    | 2.08GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  54%|█████▍    | 2.15GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  44%|████▍     | 2.14GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  54%|█████▍    | 2.14GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  54%|█████▎    | 2.13GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  54%|█████▎    | 2.12GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  52%|█████▏    | 2.06GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  52%|█████▏    | 2.05GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  18%|█▊        | 16.9GB / 92.5GB, 4.23GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  55%|█████▌    | 2.19GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  57%|█████▋    | 2.26GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  46%|████▌     | 2.26GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  57%|█████▋    | 2.25GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  57%|█████▋    | 2.24GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  56%|█████▋    | 2.24GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  55%|█████▌    | 2.18GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  55%|█████▍    | 2.16GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  19%|█▉        | 17.8GB / 92.5GB, 4.24GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  58%|█████▊    | 2.31GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  60%|██████    | 2.38GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  49%|████▊     | 2.38GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  60%|█████▉    | 2.37GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  60%|█████▉    | 2.36GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  59%|█████▉    | 2.35GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  58%|█████▊    | 2.30GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  58%|█████▊    | 2.28GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  20%|██        | 18.7GB / 92.5GB, 4.27GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  61%|██████    | 2.43GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  64%|██████▎   | 2.52GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  51%|█████     | 2.50GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  63%|██████▎   | 2.49GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  63%|██████▎   | 2.48GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  62%|██████▏   | 2.47GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  61%|██████▏   | 2.43GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  61%|██████    | 2.41GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  21%|██▏       | 19.7GB / 92.5GB, 4.30GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  64%|██████▍   | 2.55GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  66%|██████▋   | 2.63GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  54%|█████▎    | 2.62GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  66%|██████▌   | 2.60GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  65%|██████▌   | 2.59GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  65%|██████▌   | 2.59GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  64%|██████▍   | 2.54GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  64%|██████▎   | 2.52GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  22%|██▏       | 20.7GB / 92.5GB, 4.31GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  67%|██████▋   | 2.65GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  69%|██████▉   | 2.73GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  56%|█████▌    | 2.72GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  69%|██████▊   | 2.72GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  68%|██████▊   | 2.70GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  68%|██████▊   | 2.69GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  67%|██████▋   | 2.64GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  67%|██████▋   | 2.64GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  23%|██▎       | 21.5GB / 92.5GB, 4.31GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  70%|██████▉   | 2.76GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  72%|███████▏  | 2.84GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  58%|█████▊    | 2.83GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  71%|███████▏  | 2.83GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  71%|███████   | 2.81GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  71%|███████   | 2.80GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  70%|██████▉   | 2.76GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  69%|██████▉   | 2.75GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  24%|██▍       | 22.4GB / 92.5GB, 4.32GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  73%|███████▎  | 2.88GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  75%|███████▍  | 2.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  60%|██████    | 2.94GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  74%|███████▍  | 2.94GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  74%|███████▍  | 2.93GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  73%|███████▎  | 2.91GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  73%|███████▎  | 2.88GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  73%|███████▎  | 2.88GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  25%|██▌       | 23.3GB / 92.5GB, 4.33GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  75%|███████▌  | 2.99GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  77%|███████▋  | 3.06GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  62%|██████▏   | 3.05GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  77%|███████▋  | 3.05GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  77%|███████▋  | 3.04GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  76%|███████▌  | 3.02GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  76%|███████▌  | 3.00GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  75%|███████▌  | 2.99GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  26%|██▌       | 24.2GB / 92.5GB, 4.33GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  78%|███████▊  | 3.10GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  80%|████████  | 3.18GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  64%|██████▍   | 3.14GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  80%|███████▉  | 3.16GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  79%|███████▉  | 3.14GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  79%|███████▉  | 3.13GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  78%|███████▊  | 3.09GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  78%|███████▊  | 3.09GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  27%|██▋       | 25.0GB / 92.5GB, 4.32GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  81%|████████  | 3.20GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  83%|████████▎ | 3.28GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  66%|██████▋   | 3.25GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  82%|████████▏ | 3.26GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  82%|████████▏ | 3.25GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  81%|████████▏ | 3.23GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  81%|████████  | 3.20GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  81%|████████  | 3.20GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  28%|██▊       | 25.9GB / 92.5GB, 4.32GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  83%|████████▎ | 3.31GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  85%|████████▌ | 3.38GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  69%|██████▊   | 3.35GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  85%|████████▍ | 3.36GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  84%|████████▍ | 3.34GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  84%|████████▍ | 3.32GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  83%|████████▎ | 3.30GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  83%|████████▎ | 3.30GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  29%|██▉       | 26.7GB / 92.5GB, 4.31GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  86%|████████▌ | 3.41GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  88%|████████▊ | 3.49GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  71%|███████   | 3.46GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  87%|████████▋ | 3.47GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  87%|████████▋ | 3.45GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  86%|████████▋ | 3.43GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  86%|████████▌ | 3.40GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  86%|████████▌ | 3.40GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  30%|██▉       | 27.5GB / 92.5GB, 4.30GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  88%|████████▊ | 3.51GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  90%|█████████ | 3.58GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  73%|███████▎  | 3.55GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  90%|█████████ | 3.57GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  90%|████████▉ | 3.55GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  89%|████████▉ | 3.53GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  88%|████████▊ | 3.50GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  88%|████████▊ | 3.50GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  31%|███       | 28.3GB / 92.5GB, 4.29GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  91%|█████████ | 3.62GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  93%|█████████▎| 3.69GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  75%|███████▍  | 3.66GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  93%|█████████▎| 3.68GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  92%|█████████▏| 3.66GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  92%|█████████▏| 3.64GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  91%|█████████▏| 3.62GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  91%|█████████ | 3.60GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  32%|███▏      | 29.2GB / 92.5GB, 4.30GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  94%|█████████▍| 3.72GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  96%|█████████▌| 3.79GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  77%|███████▋  | 3.77GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  96%|█████████▌| 3.79GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  95%|█████████▌| 3.77GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  95%|█████████▍| 3.75GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  94%|█████████▍| 3.72GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  94%|█████████▍| 3.72GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  32%|███▏      | 30.0GB / 92.5GB, 4.30GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  97%|█████████▋| 3.83GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors:  98%|█████████▊| 3.90GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  79%|███████▉  | 3.88GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt:  98%|█████████▊| 3.90GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt:  98%|█████████▊| 3.88GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt:  97%|█████████▋| 3.85GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  97%|█████████▋| 3.84GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  97%|█████████▋| 3.83GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (9 / 17)     :  33%|███▎      | 30.9GB / 92.5GB, 4.30GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  81%|████████▏ | 3.98GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt:  99%|█████████▉| 3.93GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt:  99%|█████████▉| 3.94GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (11 / 17)    :  34%|███▍      | 31.6GB / 92.5GB, 4.28GB/s  
+
+  ...d__teacher_50step_cfg.mp4: 100%|██████████|  358kB /  358kB            [A[A
+
+
+  ...st_teacher_50step_cfg.mp4: 100%|██████████|  231kB /  231kB            [A[A[A
+
+
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...pytorch_model.safetensors: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  83%|████████▎ | 4.07GB / 4.89GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...st/step_001500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 5 files]                 :  15%|█▍        | 4.19GB / 28.4GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (11 / 22)    :  35%|███▍      | 32.0GB / 92.5GB, 4.22GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  85%|████████▍ | 4.15GB / 4.89GB            [A[A[A
+
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:   3%|▎         |  160MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:   3%|▎         |  152MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:   2%|▏         |  120MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :   2%|▏         |  328MB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (11 / 24)    :  35%|███▌      | 32.6GB / 92.5GB, 4.18GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  87%|████████▋ | 4.24GB / 4.89GB            [A[A[A
+
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:   5%|▌         |  248MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:   5%|▌         |  248MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:   4%|▍         |  208MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :   4%|▎         |  696MB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (11 / 24)    :  36%|███▌      | 33.3GB / 92.5GB, 4.17GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  89%|████████▊ | 4.34GB / 4.89GB            [A[A[A
+
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:   7%|▋         |  336MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:   7%|▋         |  336MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:   6%|▋         |  312MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :   5%|▌         | 1.06GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (11 / 24)    :  37%|███▋      | 34.1GB / 92.5GB, 4.16GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  91%|█████████ | 4.44GB / 4.89GB            [A[A[A
+
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:   9%|▉         |  440MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:   9%|▉         |  440MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:   9%|▊         |  416MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :   8%|▊         | 1.49GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (11 / 24)    :  38%|███▊      | 34.9GB / 92.5GB, 4.16GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  93%|█████████▎| 4.54GB / 4.89GB            [A[A[A
+
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  11%|█         |  544MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  11%|█         |  544MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  11%|█         |  528MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  10%|▉         | 1.87GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (11 / 24)    :  39%|███▊      | 35.7GB / 92.5GB, 4.16GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  95%|█████████▌| 4.66GB / 4.89GB            [A[A[A
+
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  14%|█▎        |  664MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  13%|█▎        |  656MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  13%|█▎        |  640MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  12%|█▏        | 2.33GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (11 / 24)    :  40%|███▉      | 36.6GB / 92.5GB, 4.17GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_7.csv:  98%|█████████▊| 4.78GB / 4.89GB            [A[A[A
+
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  16%|█▌        |  776MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  16%|█▌        |  768MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  16%|█▌        |  760MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  14%|█▍        | 2.78GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (11 / 24)    :  41%|████      | 37.5GB / 92.5GB, 4.17GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...la-36M-v1/Koala_36M_7.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A
+
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  18%|█▊        |  896MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  18%|█▊        |  880MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  18%|█▊        |  880MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  16%|█▋        | 3.22GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 24)    :  42%|████▏     | 38.5GB / 92.5GB, 4.18GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  20%|██        | 1.00GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  20%|██        |  992MB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  20%|█▉        |  976MB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  19%|█▉        |  928MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  14%|█▍        | 2.71GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  42%|████▏     | 39.2GB / 92.5GB, 4.17GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  22%|██▏       | 1.09GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  22%|██▏       | 1.09GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  22%|██▏       | 1.08GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  21%|██        | 1.02GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  16%|█▌        | 3.10GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  43%|████▎     | 40.0GB / 92.5GB, 4.17GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  25%|██▍       | 1.20GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  24%|██▍       | 1.19GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  24%|██▍       | 1.18GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  23%|██▎       | 1.14GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  18%|█▊        | 3.54GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  44%|████▍     | 40.8GB / 92.5GB, 4.17GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  27%|██▋       | 1.32GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  27%|██▋       | 1.32GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  27%|██▋       | 1.31GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  26%|██▌       | 1.26GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  21%|██        | 4.02GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  45%|████▌     | 41.8GB / 92.5GB, 4.18GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  29%|██▉       | 1.44GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  29%|██▉       | 1.44GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  29%|██▉       | 1.43GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  28%|██▊       | 1.38GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  23%|██▎       | 4.52GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  46%|████▋     | 42.8GB / 92.5GB, 4.20GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  32%|███▏      | 1.56GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  32%|███▏      | 1.55GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  32%|███▏      | 1.54GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  30%|███       | 1.49GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  25%|██▌       | 4.97GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  47%|████▋     | 43.7GB / 92.5GB, 4.20GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  34%|███▍      | 1.68GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  34%|███▍      | 1.67GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  34%|███▍      | 1.66GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  33%|███▎      | 1.61GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  28%|██▊       | 5.44GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  48%|████▊     | 44.6GB / 92.5GB, 4.21GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  37%|███▋      | 1.80GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  36%|███▋      | 1.78GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  36%|███▌      | 1.77GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  35%|███▌      | 1.71GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  30%|███       | 5.90GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  49%|████▉     | 45.5GB / 92.5GB, 4.23GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  39%|███▉      | 1.91GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  39%|███▉      | 1.90GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  38%|███▊      | 1.88GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  37%|███▋      | 1.82GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  32%|███▏      | 6.34GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  50%|█████     | 46.4GB / 92.5GB, 4.25GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  41%|████▏     | 2.02GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  41%|████      | 2.01GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  41%|████      | 1.98GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  40%|███▉      | 1.94GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  34%|███▍      | 6.74GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  51%|█████     | 47.3GB / 92.5GB, 4.26GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  43%|████▎     | 2.12GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  43%|████▎     | 2.12GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  43%|████▎     | 2.09GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  42%|████▏     | 2.03GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  37%|███▋      | 7.18GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  52%|█████▏    | 48.1GB / 92.5GB, 4.26GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  46%|████▌     | 2.23GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  45%|████▌     | 2.22GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  45%|████▍     | 2.20GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  44%|████▎     | 2.14GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  39%|███▉      | 7.60GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  53%|█████▎    | 49.0GB / 92.5GB, 4.25GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  48%|████▊     | 2.34GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  48%|████▊     | 2.33GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  47%|████▋     | 2.32GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  46%|████▌     | 2.25GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  41%|████      | 8.04GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  54%|█████▍    | 49.9GB / 92.5GB, 4.25GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  50%|█████     | 2.46GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  50%|████▉     | 2.44GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  50%|████▉     | 2.42GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  48%|████▊     | 2.34GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  43%|████▎     | 8.42GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  55%|█████▍    | 50.7GB / 92.5GB, 4.23GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  52%|█████▏    | 2.55GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  52%|█████▏    | 2.53GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  51%|█████▏    | 2.51GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  50%|████▉     | 2.43GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  45%|████▌     | 8.81GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  56%|█████▌    | 51.4GB / 92.5GB, 4.21GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  54%|█████▍    | 2.65GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  54%|█████▎    | 2.62GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  53%|█████▎    | 2.61GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  52%|█████▏    | 2.53GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  47%|████▋     | 9.19GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  56%|█████▋    | 52.2GB / 92.5GB, 4.20GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  56%|█████▋    | 2.75GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  55%|█████▌    | 2.71GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  55%|█████▌    | 2.71GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  54%|█████▍    | 2.63GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  49%|████▉     | 9.60GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  57%|█████▋    | 53.0GB / 92.5GB, 4.20GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  58%|█████▊    | 2.86GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  58%|█████▊    | 2.82GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  58%|█████▊    | 2.82GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  56%|█████▌    | 2.74GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  51%|█████▏    | 10.0GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  58%|█████▊    | 53.9GB / 92.5GB, 4.20GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  61%|██████    | 2.98GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  60%|██████    | 2.94GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  60%|██████    | 2.94GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  59%|█████▊    | 2.86GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  54%|█████▎    | 10.5GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  59%|█████▉    | 54.8GB / 92.5GB, 4.20GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  63%|██████▎   | 3.08GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  63%|██████▎   | 3.06GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  62%|██████▏   | 3.05GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  61%|██████    | 2.96GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  56%|█████▌    | 10.9GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  60%|██████    | 55.7GB / 92.5GB, 4.19GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  65%|██████▌   | 3.18GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  64%|██████▍   | 3.15GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  64%|██████▍   | 3.14GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  63%|██████▎   | 3.06GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  58%|█████▊    | 11.4GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  61%|██████    | 56.5GB / 92.5GB, 4.20GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  67%|██████▋   | 3.29GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  67%|██████▋   | 3.26GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  66%|██████▋   | 3.25GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  65%|██████▍   | 3.17GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  60%|██████    | 11.8GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  62%|██████▏   | 57.3GB / 92.5GB, 4.21GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  70%|██████▉   | 3.41GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  69%|██████▉   | 3.37GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  69%|██████▊   | 3.36GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  67%|██████▋   | 3.29GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  63%|██████▎   | 12.2GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  63%|██████▎   | 58.3GB / 92.5GB, 4.23GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  72%|███████▏  | 3.53GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  72%|███████▏  | 3.50GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  71%|███████▏  | 3.49GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  70%|██████▉   | 3.42GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  65%|██████▌   | 12.7GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  64%|██████▍   | 59.2GB / 92.5GB, 4.24GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  75%|███████▍  | 3.65GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  74%|███████▍  | 3.62GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  74%|███████▍  | 3.62GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  72%|███████▏  | 3.54GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  68%|██████▊   | 13.2GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  65%|██████▌   | 60.2GB / 92.5GB, 4.25GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  77%|███████▋  | 3.77GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  76%|███████▋  | 3.74GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  77%|███████▋  | 3.74GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  75%|███████▍  | 3.65GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  70%|███████   | 13.7GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  66%|██████▌   | 61.2GB / 92.5GB, 4.26GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  80%|███████▉  | 3.90GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  79%|███████▉  | 3.86GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  79%|███████▉  | 3.87GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  77%|███████▋  | 3.78GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  73%|███████▎  | 14.3GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  67%|██████▋   | 62.2GB / 92.5GB, 4.27GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  82%|████████▏ | 4.02GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  81%|████████▏ | 3.98GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  82%|████████▏ | 4.00GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  80%|████████  | 3.91GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  76%|███████▌  | 14.8GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  68%|██████▊   | 63.3GB / 92.5GB, 4.27GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  85%|████████▍ | 4.15GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  84%|████████▍ | 4.11GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  84%|████████▍ | 4.13GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  83%|████████▎ | 4.04GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  78%|███████▊  | 15.3GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  70%|██████▉   | 64.3GB / 92.5GB, 4.28GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  88%|████████▊ | 4.28GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  87%|████████▋ | 4.23GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  87%|████████▋ | 4.26GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  85%|████████▌ | 4.17GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  81%|████████  | 15.8GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  71%|███████   | 65.3GB / 92.5GB, 4.30GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  90%|█████████ | 4.42GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  89%|████████▉ | 4.37GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  90%|████████▉ | 4.39GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  88%|████████▊ | 4.30GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  84%|████████▎ | 16.3GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  72%|███████▏  | 66.4GB / 92.5GB, 4.31GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  93%|█████████▎| 4.55GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  92%|█████████▏| 4.50GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  93%|█████████▎| 4.53GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  91%|█████████ | 4.43GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  86%|████████▌ | 16.9GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  73%|███████▎  | 67.5GB / 92.5GB, 4.33GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  96%|█████████▌| 4.68GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  95%|█████████▍| 4.64GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  95%|█████████▌| 4.66GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  93%|█████████▎| 4.55GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  89%|████████▉ | 17.4GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  74%|███████▍  | 68.5GB / 92.5GB, 4.34GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv:  98%|█████████▊| 4.81GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv:  97%|█████████▋| 4.76GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv:  98%|█████████▊| 4.78GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  96%|█████████▌| 4.68GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  92%|█████████▏| 17.9GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (12 / 25)    :  75%|███████▌  | 69.5GB / 92.5GB, 4.36GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...a-36M-v1/Koala_36M_10.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|█████████▉| 4.87GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_5.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv:  98%|█████████▊| 4.78GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 3 files]                 :  94%|█████████▍| 18.4GB / 19.6GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (14 / 25)    :  76%|███████▌  | 70.4GB / 92.5GB, 4.36GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_8.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|█████████▉| 4.87GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_4.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv:  99%|█████████▉| 4.86GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 2 files]                 :  65%|██████▍   | 8.93GB / 13.7GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (17 / 26)    :  77%|███████▋  | 70.8GB / 92.5GB, 4.33GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_3.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_1.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_2.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  84%|████████▍ | 4.10GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 4 files]                 :   2%|▏         |  320MB / 20.7GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (19 / 30)    :  77%|███████▋  | 71.2GB / 92.5GB, 4.29GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  86%|████████▌ | 4.18GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:   5%|▍         |  191MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:   4%|▍         |  160MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:   4%|▍         |  152MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 1 files]                 :   3%|▎         |  231MB / 8.85GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (19 / 30)    :  78%|███████▊  | 71.7GB / 92.5GB, 4.26GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  87%|████████▋ | 4.26GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:   6%|▋         |  251MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:   6%|▌         |  227MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:   6%|▌         |  227MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 1 files]                 :   4%|▍         |  372MB / 8.85GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (19 / 30)    :  78%|███████▊  | 72.1GB / 92.5GB, 4.21GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  88%|████████▊ | 4.31GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:   8%|▊         |  323MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:   7%|▋         |  295MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:   7%|▋         |  293MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 1 files]                 :   6%|▌         |  490MB / 8.85GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (19 / 30)    :  78%|███████▊  | 72.5GB / 92.5GB, 4.16GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  89%|████████▉ | 4.38GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  10%|▉         |  381MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:   9%|▉         |  355MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:   9%|▉         |  352MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 1 files]                 :   7%|▋         |  630MB / 8.85GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (19 / 30)    :  79%|███████▉  | 72.9GB / 92.5GB, 4.12GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  91%|█████████ | 4.44GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  11%|█         |  445MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  11%|█         |  417MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  11%|█         |  421MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 1 files]                 :   9%|▊         |  767MB / 8.85GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (19 / 30)    :  79%|███████▉  | 73.3GB / 92.5GB, 4.08GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  92%|█████████▏| 4.52GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  13%|█▎        |  525MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  13%|█▎        |  505MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  13%|█▎        |  501MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 1 files]                 :  11%|█         |  935MB / 8.85GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (19 / 30)    :  80%|███████▉  | 73.8GB / 92.5GB, 4.10GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  94%|█████████▍| 4.60GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  15%|█▌        |  613MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  15%|█▍        |  585MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  15%|█▍        |  581MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 1 files]                 :  12%|█▏        | 1.10GB / 8.85GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (19 / 30)    :  80%|████████  | 74.3GB / 92.5GB, 4.09GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  96%|█████████▌| 4.68GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  17%|█▋        |  685MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  17%|█▋        |  665MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  17%|█▋        |  661MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 1 files]                 :  14%|█▍        | 1.26GB / 8.85GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (19 / 30)    :  81%|████████  | 74.8GB / 92.5GB, 4.06GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  97%|█████████▋| 4.76GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  19%|█▉        |  765MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  19%|█▊        |  737MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  19%|█▊        |  741MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 1 files]                 :  16%|█▌        | 1.42GB / 8.85GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (19 / 30)    :  81%|████████▏ | 75.2GB / 92.5GB, 4.03GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv:  99%|█████████▉| 4.84GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  21%|██▏       |  845MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  21%|██        |  817MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  21%|██        |  820MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 1 files]                 :  18%|█▊        | 1.58GB / 8.85GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (19 / 30)    :  82%|████████▏ | 75.7GB / 92.5GB, 4.00GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_6.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  24%|██▎       |  933MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  23%|██▎       |  905MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  23%|██▎       |  909MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  [+ 1 files]                 :  20%|█▉        | 1.74GB / 8.85GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  82%|████████▏ | 76.2GB / 92.5GB, 3.97GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  26%|██▌       | 1.02GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  25%|██▌       |  993MB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  25%|██▌       |  997MB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  20%|█▉        |  960MB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  24%|██▍       |  951MB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  83%|████████▎ | 76.6GB / 92.5GB, 3.92GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  28%|██▊       | 1.09GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  27%|██▋       | 1.07GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  27%|██▋       | 1.08GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  21%|██▏       | 1.05GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  26%|██▌       | 1.03GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  83%|████████▎ | 77.0GB / 92.5GB, 3.87GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  30%|██▉       | 1.17GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  29%|██▉       | 1.16GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  29%|██▉       | 1.16GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  23%|██▎       | 1.13GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  28%|██▊       | 1.11GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  84%|████████▎ | 77.4GB / 92.5GB, 3.82GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  32%|███▏      | 1.25GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  31%|███▏      | 1.24GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  31%|███▏      | 1.24GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  25%|██▍       | 1.22GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  30%|███       | 1.19GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  84%|████████▍ | 77.8GB / 92.5GB, 3.79GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  34%|███▍      | 1.34GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  33%|███▎      | 1.32GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  33%|███▎      | 1.32GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  26%|██▋       | 1.29GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  32%|███▏      | 1.27GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  85%|████████▍ | 78.2GB / 92.5GB, 3.75GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  35%|███▌      | 1.40GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  35%|███▌      | 1.39GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  35%|███▌      | 1.40GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  28%|██▊       | 1.37GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  34%|███▎      | 1.34GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  85%|████████▍ | 78.6GB / 92.5GB, 3.70GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  38%|███▊      | 1.49GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  37%|███▋      | 1.48GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  37%|███▋      | 1.48GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  30%|██▉       | 1.45GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  36%|███▌      | 1.42GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  85%|████████▌ | 79.0GB / 92.5GB, 3.65GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  40%|███▉      | 1.57GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  39%|███▉      | 1.56GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  39%|███▉      | 1.56GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  31%|███▏      | 1.53GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  38%|███▊      | 1.50GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  86%|████████▌ | 79.4GB / 92.5GB, 3.59GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  42%|████▏     | 1.65GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  41%|████      | 1.63GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  41%|████▏     | 1.64GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  33%|███▎      | 1.61GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  40%|███▉      | 1.58GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  86%|████████▋ | 79.8GB / 92.5GB, 3.54GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  44%|████▎     | 1.73GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  43%|████▎     | 1.71GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  43%|████▎     | 1.72GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  35%|███▍      | 1.69GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  42%|████▏     | 1.66GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  87%|████████▋ | 80.2GB / 92.5GB, 3.49GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  46%|████▌     | 1.81GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  45%|████▌     | 1.79GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  45%|████▌     | 1.80GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  36%|███▌      | 1.77GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  44%|████▎     | 1.73GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  87%|████████▋ | 80.6GB / 92.5GB, 3.44GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  48%|████▊     | 1.89GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  47%|████▋     | 1.87GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  47%|████▋     | 1.88GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  38%|███▊      | 1.85GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  46%|████▌     | 1.81GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  88%|████████▊ | 81.0GB / 92.5GB, 3.39GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  50%|████▉     | 1.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  49%|████▉     | 1.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  49%|████▉     | 1.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  39%|███▉      | 1.93GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  48%|████▊     | 1.89GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  88%|████████▊ | 81.4GB / 92.5GB, 3.34GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  52%|█████▏    | 2.05GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  51%|█████     | 2.03GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  51%|█████     | 2.03GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  41%|████      | 2.01GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  50%|████▉     | 1.97GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  88%|████████▊ | 81.8GB / 92.5GB, 3.30GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  54%|█████▍    | 2.13GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  53%|█████▎    | 2.11GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  54%|█████▎    | 2.12GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  43%|████▎     | 2.10GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  52%|█████▏    | 2.06GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  89%|████████▉ | 82.2GB / 92.5GB, 3.26GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  56%|█████▌    | 2.22GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  56%|█████▌    | 2.20GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  56%|█████▌    | 2.20GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  45%|████▍     | 2.18GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  54%|█████▍    | 2.14GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  89%|████████▉ | 82.7GB / 92.5GB, 3.21GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  58%|█████▊    | 2.31GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  58%|█████▊    | 2.28GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  58%|█████▊    | 2.29GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  46%|████▋     | 2.26GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  56%|█████▌    | 2.22GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  90%|████████▉ | 83.1GB / 92.5GB, 3.18GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  60%|██████    | 2.39GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  59%|█████▉    | 2.35GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  60%|█████▉    | 2.37GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  48%|████▊     | 2.34GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  58%|█████▊    | 2.30GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  90%|█████████ | 83.5GB / 92.5GB, 3.14GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  62%|██████▏   | 2.47GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  62%|██████▏   | 2.44GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  62%|██████▏   | 2.45GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  50%|████▉     | 2.43GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  60%|██████    | 2.39GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  91%|█████████ | 83.9GB / 92.5GB, 3.11GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  64%|██████▍   | 2.55GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  64%|██████▎   | 2.52GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  64%|██████▍   | 2.53GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  51%|█████▏    | 2.51GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  62%|██████▏   | 2.47GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  91%|█████████ | 84.3GB / 92.5GB, 3.07GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  66%|██████▋   | 2.63GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  65%|██████▌   | 2.59GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  66%|██████▌   | 2.61GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  53%|█████▎    | 2.59GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  64%|██████▍   | 2.55GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  92%|█████████▏| 84.7GB / 92.5GB, 3.02GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  68%|██████▊   | 2.71GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  68%|██████▊   | 2.68GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  68%|██████▊   | 2.68GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  55%|█████▍    | 2.67GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  66%|██████▌   | 2.62GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  92%|█████████▏| 85.1GB / 92.5GB, 2.97GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  70%|███████   | 2.79GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  70%|██████▉   | 2.76GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  70%|██████▉   | 2.76GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  56%|█████▋    | 2.75GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  68%|██████▊   | 2.70GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  92%|█████████▏| 85.5GB / 92.5GB, 2.92GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  72%|███████▏  | 2.87GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  72%|███████▏  | 2.84GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  72%|███████▏  | 2.84GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  58%|█████▊    | 2.84GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  70%|███████   | 2.78GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  93%|█████████▎| 85.9GB / 92.5GB, 2.88GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  75%|███████▍  | 2.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  74%|███████▍  | 2.93GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  74%|███████▍  | 2.93GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  60%|█████▉    | 2.92GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  72%|███████▏  | 2.87GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  93%|█████████▎| 86.3GB / 92.5GB, 2.84GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  77%|███████▋  | 3.04GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  76%|███████▌  | 3.02GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  76%|███████▌  | 3.02GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  62%|██████▏   | 3.01GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  74%|███████▍  | 2.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  94%|█████████▍| 86.7GB / 92.5GB, 2.79GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  79%|███████▉  | 3.13GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  78%|███████▊  | 3.11GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  78%|███████▊  | 3.10GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  63%|██████▎   | 3.09GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  77%|███████▋  | 3.05GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  94%|█████████▍| 87.2GB / 92.5GB, 2.74GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  81%|████████▏ | 3.23GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  81%|████████  | 3.19GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  80%|████████  | 3.19GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  65%|██████▌   | 3.18GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  79%|███████▉  | 3.14GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  95%|█████████▍| 87.6GB / 92.5GB, 2.69GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  84%|████████▎ | 3.32GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  83%|████████▎ | 3.28GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  83%|████████▎ | 3.28GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  67%|██████▋   | 3.26GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  81%|████████▏ | 3.22GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  95%|█████████▌| 88.1GB / 92.5GB, 2.63GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  86%|████████▌ | 3.40GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  85%|████████▍ | 3.37GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  85%|████████▍ | 3.36GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  69%|██████▊   | 3.35GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  84%|████████▎ | 3.31GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  96%|█████████▌| 88.5GB / 92.5GB, 2.57GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  88%|████████▊ | 3.48GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  87%|████████▋ | 3.45GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  87%|████████▋ | 3.44GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  70%|███████   | 3.44GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  86%|████████▌ | 3.39GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  96%|█████████▌| 88.9GB / 92.5GB, 2.51GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  90%|█████████ | 3.57GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  89%|████████▉ | 3.54GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  89%|████████▉ | 3.52GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  72%|███████▏  | 3.52GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  88%|████████▊ | 3.48GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  97%|█████████▋| 89.3GB / 92.5GB, 2.45GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  92%|█████████▏| 3.65GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  91%|█████████ | 3.62GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  91%|█████████ | 3.59GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  74%|███████▎  | 3.60GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  90%|████████▉ | 3.55GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  97%|█████████▋| 89.7GB / 92.5GB, 2.39GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  94%|█████████▍| 3.73GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  93%|█████████▎| 3.70GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  93%|█████████▎| 3.67GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  75%|███████▌  | 3.68GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  91%|█████████▏| 3.62GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  97%|█████████▋| 90.1GB / 92.5GB, 2.32GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  96%|█████████▌| 3.80GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  95%|█████████▌| 3.77GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  95%|█████████▍| 3.75GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  77%|███████▋  | 3.74GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  93%|█████████▎| 3.68GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  98%|█████████▊| 90.4GB / 92.5GB, 2.25GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  98%|█████████▊| 3.87GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  97%|█████████▋| 3.85GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  96%|█████████▋| 3.82GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  78%|███████▊  | 3.82GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  95%|█████████▍| 3.75GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  98%|█████████▊| 90.8GB / 92.5GB, 2.19GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt:  99%|█████████▉| 3.93GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt:  99%|█████████▉| 3.92GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt:  98%|█████████▊| 3.90GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  80%|███████▉  | 3.90GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  97%|█████████▋| 3.83GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  99%|█████████▊| 91.2GB / 92.5GB, 2.12GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  81%|████████▏ | 3.98GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt:  99%|█████████▊| 3.91GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  99%|█████████▉| 91.4GB / 92.5GB, 2.07GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  83%|████████▎ | 4.06GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  99%|█████████▉| 91.5GB / 92.5GB, 2.03GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  85%|████████▍ | 4.14GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  99%|█████████▉| 91.6GB / 92.5GB, 2.00GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  86%|████████▋ | 4.22GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  99%|█████████▉| 91.7GB / 92.5GB, 1.96GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  88%|████████▊ | 4.31GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  99%|█████████▉| 91.8GB / 92.5GB, 1.93GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  90%|█████████ | 4.41GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  99%|█████████▉| 91.9GB / 92.5GB, 1.90GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  92%|█████████▏| 4.49GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    :  99%|█████████▉| 92.0GB / 92.5GB, 1.87GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  94%|█████████▎| 4.58GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    : 100%|█████████▉| 92.1GB / 92.5GB, 1.84GB/s  
+New Data Upload               :   1%|          |  601kB / 60.0MB, 59.0kB/s  [A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  95%|█████████▌| 4.66GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    : 100%|█████████▉| 92.2GB / 92.5GB, 1.80GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  97%|█████████▋| 4.75GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    : 100%|█████████▉| 92.2GB / 92.5GB, 1.76GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv:  99%|█████████▉| 4.83GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (20 / 30)    : 100%|█████████▉| 92.3GB / 92.5GB, 1.72GB/s  
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (21 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB, 1.68GB/s  
+New Data Upload               :   1%|          | 1.20MB /  122MB,  118kB/s  [A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (21 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB, 1.64GB/s  
+New Data Upload               :   2%|▏         | 2.41MB /  122MB,  236kB/s  [A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (21 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB, 1.59GB/s  
+New Data Upload               :   3%|▎         | 4.21MB /  122MB,  413kB/s  [A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (21 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB, 1.55GB/s  
+New Data Upload               :   5%|▍         | 6.01MB /  122MB,  589kB/s  [A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (21 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB, 1.47GB/s  
+New Data Upload               :   6%|▋         | 7.82MB /  122MB,  766kB/s  [A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (21 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB, 1.43GB/s  
+New Data Upload               :  10%|▉         | 12.0MB /  122MB, 1.18MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (21 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB, 1.39GB/s  
+New Data Upload               :  13%|█▎        | 16.2MB /  122MB, 1.59MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (21 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB, 1.35GB/s  
+New Data Upload               :  17%|█▋        | 21.0MB /  122MB, 2.06MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (21 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB, 1.31GB/s  
+New Data Upload               :  19%|█▊        | 22.8MB /  122MB, 2.24MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (21 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB, 1.27GB/s  
+New Data Upload               :  21%|██        | 25.3MB /  122MB, 2.48MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (21 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB, 1.24GB/s  
+New Data Upload               :  25%|██▍       | 30.1MB /  122MB, 2.95MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (21 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB, 1.20GB/s  
+New Data Upload               :  28%|██▊       | 34.9MB /  122MB, 3.42MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (21 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB, 1.16GB/s  
+New Data Upload               :  32%|███▏      | 39.7MB /  122MB, 3.89MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (21 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB, 1.12GB/s  
+New Data Upload               :  35%|███▌      | 43.3MB /  122MB, 4.24MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (21 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB, 1.04GB/s  
+New Data Upload               :  39%|███▉      | 48.1MB /  122MB, 4.72MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (21 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB, 1.00GB/s  
+New Data Upload               :  43%|████▎     | 52.9MB /  122MB, 5.19MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (21 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB,  960MB/s  
+New Data Upload               :  47%|████▋     | 57.1MB /  122MB, 5.60MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (21 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB,  919MB/s  
+New Data Upload               :  49%|████▊     | 59.5MB /  122MB, 5.84MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (26 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB,  601MB/s  
+New Data Upload               :  49%|████▉     | 60.0MB /  122MB, 5.89MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (26 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB,  560MB/s  
+New Data Upload               :  52%|█████▏    | 64.2MB /  122MB, 6.30MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (26 / 30)    : 100%|█████████▉| 92.4GB / 92.5GB,  472MB/s  
+New Data Upload               :  54%|█████▍    | 66.6MB /  122MB, 6.53MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (26 / 30)    : 100%|█████████▉| 92.5GB / 92.5GB,  431MB/s  
+New Data Upload               :  57%|█████▋    | 70.2MB /  122MB, 6.89MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (26 / 30)    : 100%|█████████▉| 92.5GB / 92.5GB,  349MB/s  
+New Data Upload               :  60%|██████    | 73.9MB /  122MB, 7.24MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (26 / 30)    : 100%|█████████▉| 92.5GB / 92.5GB,  307MB/s  
+New Data Upload               :  64%|██████▍   | 78.1MB /  122MB, 7.65MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (26 / 30)    : 100%|█████████▉| 92.5GB / 92.5GB,  270MB/s  
+New Data Upload               :  68%|██████▊   | 82.9MB /  122MB, 8.12MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (26 / 30)    : 100%|█████████▉| 92.5GB / 92.5GB,  232MB/s  
+New Data Upload               :  71%|███████   | 87.1MB /  122MB, 8.54MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (26 / 30)    : 100%|█████████▉| 92.5GB / 92.5GB,  200MB/s  
+New Data Upload               :  73%|███████▎  | 88.9MB /  122MB, 8.71MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.95GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (26 / 30)    : 100%|█████████▉| 92.5GB / 92.5GB,  163MB/s  
+New Data Upload               :  75%|███████▍  | 91.3MB /  122MB, 8.95MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (26 / 30)    : 100%|█████████▉| 92.5GB / 92.5GB,  127MB/s  
+New Data Upload               :  78%|███████▊  | 94.9MB /  122MB, 9.30MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (26 / 30)    : 100%|█████████▉| 92.5GB / 92.5GB,  103MB/s  
+New Data Upload               :  81%|████████  | 99.1MB /  122MB, 9.71MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (26 / 30)    : 100%|█████████▉| 92.5GB / 92.5GB, 91.8MB/s  
+New Data Upload               :  84%|████████▍ |  103MB /  122MB, 10.1MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (26 / 30)    : 100%|█████████▉| 92.5GB / 92.5GB, 84.4MB/s  
+New Data Upload               :  88%|████████▊ |  108MB /  122MB, 10.5MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (26 / 30)    : 100%|█████████▉| 92.5GB / 92.5GB, 75.9MB/s  
+New Data Upload               :  89%|████████▉ |  109MB /  122MB, 10.7MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (26 / 30)    : 100%|█████████▉| 92.5GB / 92.5GB, 67.5MB/s  
+New Data Upload               :  91%|█████████ |  111MB /  122MB, 10.9MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (26 / 30)    : 100%|█████████▉| 92.5GB / 92.5GB, 58.4MB/s  
+New Data Upload               :  94%|█████████▎|  115MB /  122MB, 11.2MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (26 / 30)    : 100%|█████████▉| 92.5GB / 92.5GB, 50.9MB/s  
+New Data Upload               :  97%|█████████▋|  118MB /  122MB, 11.6MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (26 / 30)    : 100%|█████████▉| 92.5GB / 92.5GB, 42.6MB/s  
+New Data Upload               : 100%|█████████▉|  122MB /  122MB, 11.9MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|█████████▉| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (30 / 30)    : 100%|██████████| 92.5GB / 92.5GB, 11.4MB/s  
+New Data Upload               : 100%|██████████|  122MB /  122MB, 11.4MB/s  [A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[A
+
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A
+
+
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A
+
+
+
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A
+
+
+
+
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A
+
+
+
+
+
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A
+
+
+
+
+
+
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A
+
+
+
+
+
+
+
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            [A[A[A[A[A[A[A[A[A[A
+
+
+
+
+
+
+
+
+
+
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB            [A[A[A[A[A[A[A[A[A[A[AProcessing Files (30 / 30)    : 100%|██████████| 92.5GB / 92.5GB,  0.00B/s  
+New Data Upload               : 100%|██████████|  122MB /  122MB,  0.00B/s  
+  ...st/step_002000/student.pt: 100%|██████████| 3.96GB / 3.96GB            
+  ...o_test/step_001000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            
+  ...o_test/step_002000/aux.pt: 100%|██████████| 3.96GB / 3.96GB            
+  ...o_test/step_000500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            
+  ...o_test/step_001500/aux.pt: 100%|██████████| 3.96GB / 3.96GB            
+  ...ts/dimo_test/final/aux.pt: 100%|██████████| 3.96GB / 3.96GB            
+  ...imo_test/final/student.pt: 100%|██████████| 3.96GB / 3.96GB            
+  ...st/step_001000/student.pt: 100%|██████████| 3.96GB / 3.96GB            
+  ...la-36M-v1/Koala_36M_9.csv: 100%|██████████| 4.89GB / 4.89GB            
+  ...st/step_000500/student.pt: 100%|██████████| 3.96GB / 3.96GB