Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +30 -0
- examples/inference_sample/processed/kid_coffee/infer_scripts/case1_left.yaml +1 -1
- examples/inference_sample/processed/kid_coffee/infer_scripts/case1_right.yaml +1 -1
- examples/training_sample/.DS_Store +0 -0
- examples/training_sample/processed/00000000/clip.mp4 +3 -0
- examples/training_sample/processed/00000000/dynamic_masks.mp4 +3 -0
- examples/training_sample/processed/00000000/dynamic_prompts.json +14 -0
- examples/training_sample/processed/00000000/geometry.npz +3 -0
- examples/training_sample/processed/00000000/latents.pt +3 -0
- examples/training_sample/processed/00000000/mask_car.mp4 +0 -0
- examples/training_sample/processed/00000000/mask_person.mp4 +0 -0
- examples/training_sample/processed/00000000/mask_sky.mp4 +3 -0
- examples/training_sample/processed/00000000/train_preceding_proj_fg_rgb_1.mp4 +0 -0
- examples/training_sample/processed/00000000/train_preceding_proj_fg_rgb_9.mp4 +0 -0
- examples/training_sample/processed/00000000/train_preceding_rgb_1.mp4 +0 -0
- examples/training_sample/processed/00000000/train_preceding_rgb_9.mp4 +3 -0
- examples/training_sample/processed/00000000/train_preceding_scene_proj_fg_overlay_rgb_1.mp4 +0 -0
- examples/training_sample/processed/00000000/train_preceding_scene_proj_fg_overlay_rgb_9.mp4 +0 -0
- examples/training_sample/processed/00000000/train_preceding_scene_proj_rgb_1.mp4 +0 -0
- examples/training_sample/processed/00000000/train_preceding_scene_proj_rgb_9.mp4 +0 -0
- examples/training_sample/processed/00000000/train_reference_instance_00.mp4 +0 -0
- examples/training_sample/processed/00000000/train_reference_instance_01.mp4 +0 -0
- examples/training_sample/processed/00000000/train_reference_instance_02.mp4 +0 -0
- examples/training_sample/processed/00000000/train_reference_instance_03.mp4 +0 -0
- examples/training_sample/processed/00000000/train_reference_instance_04.mp4 +0 -0
- examples/training_sample/processed/00000000/train_reference_instances.json +59 -0
- examples/training_sample/processed/00000000/train_reference_rgb.mp4 +3 -0
- examples/training_sample/processed/00000000/train_reference_scene_rgb.mp4 +3 -0
- examples/training_sample/processed/00000000/train_reference_scene_rgb_orig.mp4 +3 -0
- examples/training_sample/processed/00000000/train_sample.json +171 -0
- examples/training_sample/processed/00000000/train_target_fg_rgb.txt +1 -0
- examples/training_sample/processed/00000000/train_target_proj_fg_rgb.mp4 +0 -0
- examples/training_sample/processed/00000000/train_target_rgb.mp4 +3 -0
- examples/training_sample/processed/00000000/train_target_rgb.txt +1 -0
- examples/training_sample/processed/00000000/train_target_scene_proj_fg_overlay_rgb.mp4 +3 -0
- examples/training_sample/processed/00000000/train_target_scene_proj_rgb.mp4 +3 -0
- examples/training_sample/processed/00000000/train_target_scene_proj_rgb_orig.mp4 +3 -0
- examples/training_sample/processed/00000000/train_target_scene_rgb.txt +1 -0
- examples/training_sample/processed/00000001/clip.mp4 +3 -0
- examples/training_sample/processed/00000001/dynamic_masks.mp4 +0 -0
- examples/training_sample/processed/00000001/dynamic_prompts.json +6 -0
- examples/training_sample/processed/00000001/geometry.npz +3 -0
- examples/training_sample/processed/00000001/latents.pt +3 -0
- examples/training_sample/processed/00000001/train_preceding_proj_fg_rgb_1.mp4 +0 -0
- examples/training_sample/processed/00000001/train_preceding_proj_fg_rgb_9.mp4 +0 -0
- examples/training_sample/processed/00000001/train_preceding_rgb_1.mp4 +0 -0
- examples/training_sample/processed/00000001/train_preceding_rgb_9.mp4 +0 -0
- examples/training_sample/processed/00000001/train_preceding_scene_proj_fg_overlay_rgb_1.mp4 +0 -0
- examples/training_sample/processed/00000001/train_preceding_scene_proj_fg_overlay_rgb_9.mp4 +0 -0
- examples/training_sample/processed/00000001/train_preceding_scene_proj_rgb_1.mp4 +0 -0
.gitattributes
CHANGED
|
@@ -70,3 +70,33 @@ examples/inference_sample/processed/kid_coffee/trajectory_templates/visualizatio
|
|
| 70 |
examples/inference_sample/processed/kid_coffee/trajectory_templates/visualizations/kid_coffee/trajectories_global_frustum.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 71 |
examples/inference_sample/processed/kid_coffee/trajectory_templates/visualizations/kid_coffee/trajectories_global_overlay.png filter=lfs diff=lfs merge=lfs -text
|
| 72 |
examples/inference_sample/raw/kid_coffee.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
examples/inference_sample/processed/kid_coffee/trajectory_templates/visualizations/kid_coffee/trajectories_global_frustum.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 71 |
examples/inference_sample/processed/kid_coffee/trajectory_templates/visualizations/kid_coffee/trajectories_global_overlay.png filter=lfs diff=lfs merge=lfs -text
|
| 72 |
examples/inference_sample/raw/kid_coffee.png filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
examples/training_sample/processed/00000000/clip.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
examples/training_sample/processed/00000000/dynamic_masks.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
examples/training_sample/processed/00000000/mask_sky.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
examples/training_sample/processed/00000000/train_preceding_rgb_9.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
examples/training_sample/processed/00000000/train_reference_rgb.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
examples/training_sample/processed/00000000/train_reference_scene_rgb.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
examples/training_sample/processed/00000000/train_reference_scene_rgb_orig.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
examples/training_sample/processed/00000000/train_target_rgb.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
examples/training_sample/processed/00000000/train_target_scene_proj_fg_overlay_rgb.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
examples/training_sample/processed/00000000/train_target_scene_proj_rgb.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
examples/training_sample/processed/00000000/train_target_scene_proj_rgb_orig.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
examples/training_sample/processed/00000001/clip.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
examples/training_sample/processed/00000001/train_target_rgb.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
examples/training_sample/processed/00000001/train_target_scene_proj_fg_overlay_rgb.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
examples/training_sample/processed/00000001/train_target_scene_proj_rgb.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
examples/training_sample/processed/00000001/train_target_scene_proj_rgb_orig.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
examples/training_sample/processed/00000002/clip.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
examples/training_sample/processed/00000002/dynamic_masks.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
examples/training_sample/processed/00000002/mask_person.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
examples/training_sample/processed/00000002/train_preceding_scene_proj_fg_overlay_rgb_9.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
examples/training_sample/processed/00000002/train_preceding_scene_proj_rgb_9.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
examples/training_sample/processed/00000002/train_target_proj_fg_rgb.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
examples/training_sample/processed/00000002/train_target_rgb.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
examples/training_sample/processed/00000002/train_target_scene_proj_fg_overlay_rgb.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
examples/training_sample/processed/00000002/train_target_scene_proj_rgb.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
examples/training_sample/processed/00000002/train_target_scene_proj_rgb_orig.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
examples/training_sample/processed_lmdb/shard_000.lmdb/data.mdb filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
examples/training_sample/raw/MIRA/4230740.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
examples/training_sample/raw/RealEstate10K/1259726fc1f8e966.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
examples/training_sample/raw/SpatiaVID_HQ/0a2d11ab-57d7-516d-b873-c29555a41796.mp4 filter=lfs diff=lfs merge=lfs -text
|
examples/inference_sample/processed/kid_coffee/infer_scripts/case1_left.yaml
CHANGED
|
@@ -9,7 +9,7 @@ iter_input:
|
|
| 9 |
from a perpendicular angle. The red-and-white striped canopy and colorful pennant
|
| 10 |
banner are seen edge-on, with the coffee and books signage now aligned vertically
|
| 11 |
in the frame.
|
| 12 |
-
fg_text: ''
|
| 13 |
'1':
|
| 14 |
scene_text: The scene now presents a side profile of the stall, with the red-and-white
|
| 15 |
striped canopy and colorful pennant banner visible overhead. The white tablecloth
|
|
|
|
| 9 |
from a perpendicular angle. The red-and-white striped canopy and colorful pennant
|
| 10 |
banner are seen edge-on, with the coffee and books signage now aligned vertically
|
| 11 |
in the frame.
|
| 12 |
+
fg_text: 'On the right, a woden bench at under the wall sits a lovely corgi dog, staying steadily on the bench and rest.'
|
| 13 |
'1':
|
| 14 |
scene_text: The scene now presents a side profile of the stall, with the red-and-white
|
| 15 |
striped canopy and colorful pennant banner visible overhead. The white tablecloth
|
examples/inference_sample/processed/kid_coffee/infer_scripts/case1_right.yaml
CHANGED
|
@@ -10,7 +10,7 @@ iter_input:
|
|
| 10 |
The white tablecloth covers the counter, with chalkboard signs for "COFFEE"
|
| 11 |
and "BOOKS" now oriented sideways, and stacks of books, a jar of pencils, and
|
| 12 |
potted flowers still arranged on the right side.
|
| 13 |
-
fg_text: ''
|
| 14 |
'1':
|
| 15 |
scene_text: The scene is viewed from a side angle, showing the full length of
|
| 16 |
the stall’s white-clothed counter with chalkboard signs for “COFFEE” and “BOOKS,”
|
|
|
|
| 10 |
The white tablecloth covers the counter, with chalkboard signs for "COFFEE"
|
| 11 |
and "BOOKS" now oriented sideways, and stacks of books, a jar of pencils, and
|
| 12 |
potted flowers still arranged on the right side.
|
| 13 |
+
fg_text: 'On the right, a woden bench at under the wall sits a lovely corgi dog, staying steadily on the bench and rest.'
|
| 14 |
'1':
|
| 15 |
scene_text: The scene is viewed from a side angle, showing the full length of
|
| 16 |
the stall’s white-clothed counter with chalkboard signs for “COFFEE” and “BOOKS,”
|
examples/training_sample/.DS_Store
ADDED
|
Binary file (10.2 kB). View file
|
|
|
examples/training_sample/processed/00000000/clip.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7933df8527f851b0dd18600242720f9edf8f0f54114f40317860d7e163c6e943
|
| 3 |
+
size 3029650
|
examples/training_sample/processed/00000000/dynamic_masks.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:135723756707066e4b1b7edecf0f55f3474496ffb120d52dc71871dbb8a389ea
|
| 3 |
+
size 339662
|
examples/training_sample/processed/00000000/dynamic_prompts.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"raw": "1) person\n2) car\n3) sky",
|
| 3 |
+
"entities": [
|
| 4 |
+
"person",
|
| 5 |
+
"car",
|
| 6 |
+
"sky"
|
| 7 |
+
],
|
| 8 |
+
"prompts": [
|
| 9 |
+
"person",
|
| 10 |
+
"car",
|
| 11 |
+
"sky"
|
| 12 |
+
],
|
| 13 |
+
"status": "ok"
|
| 14 |
+
}
|
examples/training_sample/processed/00000000/geometry.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ff89760739ce6a0ffbcdfbdb1f1883ed5971e10c9dd82973fcf6c8c8206e657
|
| 3 |
+
size 165477281
|
examples/training_sample/processed/00000000/latents.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30c6d9d6301d71c9c4717ef468d447a2e2ba18f5510af051458f382f93f40e6a
|
| 3 |
+
size 27363108
|
examples/training_sample/processed/00000000/mask_car.mp4
ADDED
|
Binary file (44.7 kB). View file
|
|
|
examples/training_sample/processed/00000000/mask_person.mp4
ADDED
|
Binary file (30 kB). View file
|
|
|
examples/training_sample/processed/00000000/mask_sky.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:600e63d9faf910365cf5f71290869bfa05f646f644415696da0a1a476f4674ed
|
| 3 |
+
size 283869
|
examples/training_sample/processed/00000000/train_preceding_proj_fg_rgb_1.mp4
ADDED
|
Binary file (2.23 kB). View file
|
|
|
examples/training_sample/processed/00000000/train_preceding_proj_fg_rgb_9.mp4
ADDED
|
Binary file (11.9 kB). View file
|
|
|
examples/training_sample/processed/00000000/train_preceding_rgb_1.mp4
ADDED
|
Binary file (23.7 kB). View file
|
|
|
examples/training_sample/processed/00000000/train_preceding_rgb_9.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a806dc60b82aa5f24a74cf7584fa33a4febfd733c39ca623e29a56a5a78716bd
|
| 3 |
+
size 121834
|
examples/training_sample/processed/00000000/train_preceding_scene_proj_fg_overlay_rgb_1.mp4
ADDED
|
Binary file (14.4 kB). View file
|
|
|
examples/training_sample/processed/00000000/train_preceding_scene_proj_fg_overlay_rgb_9.mp4
ADDED
|
Binary file (80.9 kB). View file
|
|
|
examples/training_sample/processed/00000000/train_preceding_scene_proj_rgb_1.mp4
ADDED
|
Binary file (14.3 kB). View file
|
|
|
examples/training_sample/processed/00000000/train_preceding_scene_proj_rgb_9.mp4
ADDED
|
Binary file (73.4 kB). View file
|
|
|
examples/training_sample/processed/00000000/train_reference_instance_00.mp4
ADDED
|
Binary file (14.3 kB). View file
|
|
|
examples/training_sample/processed/00000000/train_reference_instance_01.mp4
ADDED
|
Binary file (13.1 kB). View file
|
|
|
examples/training_sample/processed/00000000/train_reference_instance_02.mp4
ADDED
|
Binary file (9.62 kB). View file
|
|
|
examples/training_sample/processed/00000000/train_reference_instance_03.mp4
ADDED
|
Binary file (7.87 kB). View file
|
|
|
examples/training_sample/processed/00000000/train_reference_instance_04.mp4
ADDED
|
Binary file (6.24 kB). View file
|
|
|
examples/training_sample/processed/00000000/train_reference_instances.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"target_video": "train_target_rgb.mp4",
|
| 3 |
+
"prompts": [
|
| 4 |
+
"person",
|
| 5 |
+
"car"
|
| 6 |
+
],
|
| 7 |
+
"instances": [
|
| 8 |
+
{
|
| 9 |
+
"rank": 0,
|
| 10 |
+
"obj_id": 9,
|
| 11 |
+
"max_area_ratio": 0.030376101762820514,
|
| 12 |
+
"frame_indices": [
|
| 13 |
+
42,
|
| 14 |
+
53,
|
| 15 |
+
60
|
| 16 |
+
]
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"rank": 1,
|
| 20 |
+
"obj_id": 8,
|
| 21 |
+
"max_area_ratio": 0.005681590544871795,
|
| 22 |
+
"frame_indices": [
|
| 23 |
+
1,
|
| 24 |
+
42,
|
| 25 |
+
62
|
| 26 |
+
]
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"rank": 2,
|
| 30 |
+
"obj_id": 7,
|
| 31 |
+
"max_area_ratio": 0.002423878205128205,
|
| 32 |
+
"frame_indices": [
|
| 33 |
+
11,
|
| 34 |
+
29,
|
| 35 |
+
54
|
| 36 |
+
]
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"rank": 3,
|
| 40 |
+
"obj_id": 6,
|
| 41 |
+
"max_area_ratio": 0.0023337339743589743,
|
| 42 |
+
"frame_indices": [
|
| 43 |
+
60,
|
| 44 |
+
62,
|
| 45 |
+
63
|
| 46 |
+
]
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"rank": 4,
|
| 50 |
+
"obj_id": 5,
|
| 51 |
+
"max_area_ratio": 0.001970653044871795,
|
| 52 |
+
"frame_indices": [
|
| 53 |
+
55,
|
| 54 |
+
59,
|
| 55 |
+
60
|
| 56 |
+
]
|
| 57 |
+
}
|
| 58 |
+
]
|
| 59 |
+
}
|
examples/training_sample/processed/00000000/train_reference_rgb.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9198ec28b716cdd629680b654c2db416f2056136a6500df4f5db614381825cde
|
| 3 |
+
size 144828
|
examples/training_sample/processed/00000000/train_reference_scene_rgb.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43933ec73381a1adee7be5ce33194b3568922f0c496ae3657bc155d0b14cbb41
|
| 3 |
+
size 138324
|
examples/training_sample/processed/00000000/train_reference_scene_rgb_orig.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5228a8f111598e17ac0fc46376070444536b3ea7541aac5051ff919c40470d5
|
| 3 |
+
size 143767
|
examples/training_sample/processed/00000000/train_sample.json
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"t0": 26,
|
| 3 |
+
"P9_idx": [
|
| 4 |
+
17,
|
| 5 |
+
18,
|
| 6 |
+
19,
|
| 7 |
+
20,
|
| 8 |
+
21,
|
| 9 |
+
22,
|
| 10 |
+
23,
|
| 11 |
+
24,
|
| 12 |
+
25
|
| 13 |
+
],
|
| 14 |
+
"P1_idx": [
|
| 15 |
+
25
|
| 16 |
+
],
|
| 17 |
+
"T_idx": [
|
| 18 |
+
26,
|
| 19 |
+
27,
|
| 20 |
+
28,
|
| 21 |
+
29,
|
| 22 |
+
30,
|
| 23 |
+
31,
|
| 24 |
+
32,
|
| 25 |
+
33,
|
| 26 |
+
34,
|
| 27 |
+
35,
|
| 28 |
+
36,
|
| 29 |
+
37,
|
| 30 |
+
38,
|
| 31 |
+
39,
|
| 32 |
+
40,
|
| 33 |
+
41,
|
| 34 |
+
42,
|
| 35 |
+
43,
|
| 36 |
+
44,
|
| 37 |
+
45,
|
| 38 |
+
46,
|
| 39 |
+
47,
|
| 40 |
+
48,
|
| 41 |
+
49,
|
| 42 |
+
50,
|
| 43 |
+
51,
|
| 44 |
+
52,
|
| 45 |
+
53,
|
| 46 |
+
54,
|
| 47 |
+
55,
|
| 48 |
+
56,
|
| 49 |
+
57,
|
| 50 |
+
58,
|
| 51 |
+
59,
|
| 52 |
+
60,
|
| 53 |
+
61,
|
| 54 |
+
62,
|
| 55 |
+
63,
|
| 56 |
+
64,
|
| 57 |
+
65,
|
| 58 |
+
66,
|
| 59 |
+
67,
|
| 60 |
+
68,
|
| 61 |
+
69,
|
| 62 |
+
70,
|
| 63 |
+
71,
|
| 64 |
+
72,
|
| 65 |
+
73,
|
| 66 |
+
74,
|
| 67 |
+
75,
|
| 68 |
+
76,
|
| 69 |
+
77,
|
| 70 |
+
78,
|
| 71 |
+
79,
|
| 72 |
+
80,
|
| 73 |
+
81,
|
| 74 |
+
82,
|
| 75 |
+
83,
|
| 76 |
+
84,
|
| 77 |
+
85,
|
| 78 |
+
86,
|
| 79 |
+
87,
|
| 80 |
+
88,
|
| 81 |
+
89,
|
| 82 |
+
90
|
| 83 |
+
],
|
| 84 |
+
"C_idx": [
|
| 85 |
+
0,
|
| 86 |
+
1,
|
| 87 |
+
2,
|
| 88 |
+
3,
|
| 89 |
+
4,
|
| 90 |
+
5,
|
| 91 |
+
6,
|
| 92 |
+
7,
|
| 93 |
+
8,
|
| 94 |
+
9,
|
| 95 |
+
10,
|
| 96 |
+
11,
|
| 97 |
+
12,
|
| 98 |
+
13,
|
| 99 |
+
14,
|
| 100 |
+
15,
|
| 101 |
+
92,
|
| 102 |
+
93,
|
| 103 |
+
94,
|
| 104 |
+
95,
|
| 105 |
+
96,
|
| 106 |
+
97,
|
| 107 |
+
98,
|
| 108 |
+
99,
|
| 109 |
+
100,
|
| 110 |
+
101,
|
| 111 |
+
102,
|
| 112 |
+
103,
|
| 113 |
+
104,
|
| 114 |
+
105,
|
| 115 |
+
106,
|
| 116 |
+
107,
|
| 117 |
+
108,
|
| 118 |
+
109,
|
| 119 |
+
110,
|
| 120 |
+
111,
|
| 121 |
+
112,
|
| 122 |
+
113,
|
| 123 |
+
114,
|
| 124 |
+
115,
|
| 125 |
+
116,
|
| 126 |
+
117,
|
| 127 |
+
118,
|
| 128 |
+
119,
|
| 129 |
+
120
|
| 130 |
+
],
|
| 131 |
+
"scene_idx": 115,
|
| 132 |
+
"R_idx": [
|
| 133 |
+
92,
|
| 134 |
+
14,
|
| 135 |
+
12,
|
| 136 |
+
94,
|
| 137 |
+
10,
|
| 138 |
+
96,
|
| 139 |
+
98
|
| 140 |
+
],
|
| 141 |
+
"R_iou": [
|
| 142 |
+
0.07454549403766372,
|
| 143 |
+
0.0694113967938869,
|
| 144 |
+
0.06155005990764799,
|
| 145 |
+
0.06073820098027936,
|
| 146 |
+
0.06025912646675358,
|
| 147 |
+
0.05896005194930133,
|
| 148 |
+
0.05594802497313094
|
| 149 |
+
],
|
| 150 |
+
"R_stats": {
|
| 151 |
+
"threshold": 0.04,
|
| 152 |
+
"max_refs": 7,
|
| 153 |
+
"stride": 2,
|
| 154 |
+
"voxel_size": 0.01,
|
| 155 |
+
"num_targets": 65,
|
| 156 |
+
"num_candidates": 23,
|
| 157 |
+
"best_iou": 0.07454549403766372,
|
| 158 |
+
"avg_iou": 0.040257401322028034
|
| 159 |
+
},
|
| 160 |
+
"projection_channels": [
|
| 161 |
+
"rgb"
|
| 162 |
+
],
|
| 163 |
+
"output_size": [
|
| 164 |
+
480,
|
| 165 |
+
832
|
| 166 |
+
],
|
| 167 |
+
"add_fg_to_projection": true,
|
| 168 |
+
"has_fg": true,
|
| 169 |
+
"src_aug_applied": false,
|
| 170 |
+
"naming": "figure"
|
| 171 |
+
}
|
examples/training_sample/processed/00000000/train_target_fg_rgb.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
A white van drives forward, followed by an orange and white bus. A large truck passes on the right. A yellow taxi speeds past the bus.
|
examples/training_sample/processed/00000000/train_target_proj_fg_rgb.mp4
ADDED
|
Binary file (34.3 kB). View file
|
|
|
examples/training_sample/processed/00000000/train_target_rgb.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:629b6e0fb5de1a0a2b0ede81d5ce8a48c044559b8f2b35bbd740ac09257757a7
|
| 3 |
+
size 808291
|
examples/training_sample/processed/00000000/train_target_rgb.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
The vehicle glides forward along a wide urban avenue, flanked by bare trees and modern buildings under an overcast sky. Traffic flows steadily, including a prominent orange-and-white bus and a large truck passing by. Pedestrians stroll along sidewalks beside metal railings, while streetlights and traffic signals punctuate the scene. The camera moves smoothly ahead, capturing the rhythm of city life with muted tones and quiet motion.
|
examples/training_sample/processed/00000000/train_target_scene_proj_fg_overlay_rgb.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01f26e2972c6b80172049df7774a6cfc6e74fdf14766439947e5139a63d8ee47
|
| 3 |
+
size 832132
|
examples/training_sample/processed/00000000/train_target_scene_proj_rgb.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c08b45fbdfecea9614eb673ebc91a794728fe281cc848d53d462a109a8e36764
|
| 3 |
+
size 820568
|
examples/training_sample/processed/00000000/train_target_scene_proj_rgb_orig.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c08b45fbdfecea9614eb673ebc91a794728fe281cc848d53d462a109a8e36764
|
| 3 |
+
size 820568
|
examples/training_sample/processed/00000000/train_target_scene_rgb.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Urban street flanked by bare trees and modern buildings under overcast skies. Concrete sidewalks and metal railings line the road. Streetlights and traffic signals punctuate the scene. Buildings display varied architectural styles. The atmosphere is muted, with diffused lighting enhancing the city’s quiet, subdued ambiance. Camera moves steadily forward.
|
examples/training_sample/processed/00000001/clip.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:192530efced1bb9553bff5bb56af57874d412e82a77f321ad22ccc4ba4cc923c
|
| 3 |
+
size 1694156
|
examples/training_sample/processed/00000001/dynamic_masks.mp4
ADDED
|
Binary file (6.14 kB). View file
|
|
|
examples/training_sample/processed/00000001/dynamic_prompts.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"raw": "Nothing",
|
| 3 |
+
"entities": [],
|
| 4 |
+
"prompts": [],
|
| 5 |
+
"status": "nothing"
|
| 6 |
+
}
|
examples/training_sample/processed/00000001/geometry.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25ca3cd74738693835fee9a4b51afcfadf6e0ad8bd26b599d2d48e3113efc1f7
|
| 3 |
+
size 162947301
|
examples/training_sample/processed/00000001/latents.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09aec048eccdf9ebd9252776729df42ff301af7fb22709502ac372ef5c16420e
|
| 3 |
+
size 17376130
|
examples/training_sample/processed/00000001/train_preceding_proj_fg_rgb_1.mp4
ADDED
|
Binary file (1.63 kB). View file
|
|
|
examples/training_sample/processed/00000001/train_preceding_proj_fg_rgb_9.mp4
ADDED
|
Binary file (1.97 kB). View file
|
|
|
examples/training_sample/processed/00000001/train_preceding_rgb_1.mp4
ADDED
|
Binary file (21.4 kB). View file
|
|
|
examples/training_sample/processed/00000001/train_preceding_rgb_9.mp4
ADDED
|
Binary file (74.6 kB). View file
|
|
|
examples/training_sample/processed/00000001/train_preceding_scene_proj_fg_overlay_rgb_1.mp4
ADDED
|
Binary file (12.1 kB). View file
|
|
|
examples/training_sample/processed/00000001/train_preceding_scene_proj_fg_overlay_rgb_9.mp4
ADDED
|
Binary file (59.6 kB). View file
|
|
|
examples/training_sample/processed/00000001/train_preceding_scene_proj_rgb_1.mp4
ADDED
|
Binary file (12.1 kB). View file
|
|
|