sam-motamed commited on
Commit
d533e7a
·
verified ·
1 Parent(s): 29b096c

Upload 19 files

Browse files
.gitattributes CHANGED
@@ -53,3 +53,13 @@ sample/toast-shmello/first_frame.jpg filter=lfs diff=lfs merge=lfs -text
53
  sample/crush-can/first_frame.jpg filter=lfs diff=lfs merge=lfs -text
54
  sample/bowling/first_frame.jpg filter=lfs diff=lfs merge=lfs -text
55
  sample/BigBen/input_video.mp4 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
53
  sample/crush-can/first_frame.jpg filter=lfs diff=lfs merge=lfs -text
54
  sample/bowling/first_frame.jpg filter=lfs diff=lfs merge=lfs -text
55
  sample/BigBen/input_video.mp4 filter=lfs diff=lfs merge=lfs -text
56
+ sample/ducky-float/first_frame.jpg filter=lfs diff=lfs merge=lfs -text
57
+ sample/ducky-float/input_video.mp4 filter=lfs diff=lfs merge=lfs -text
58
+ sample/ducky-float/trimask_quadmask.mp4 filter=lfs diff=lfs merge=lfs -text
59
+ sample/spinner/input_video.mp4 filter=lfs diff=lfs merge=lfs -text
60
+ sample/spinner/trimask_quadmask.mp4 filter=lfs diff=lfs merge=lfs -text
61
+ sample/trampoline/input_video.mp4 filter=lfs diff=lfs merge=lfs -text
62
+ sample/trampoline/masks_output/sam3_masks/black_mask.mp4 filter=lfs diff=lfs merge=lfs -text
63
+ sample/trampoline/masks_output/sam3_masks/grey_mask.mp4 filter=lfs diff=lfs merge=lfs -text
64
+ sample/trampoline/masks_output/sam3_masks/input_video.mp4 filter=lfs diff=lfs merge=lfs -text
65
+ sample/trampoline/trimask_quadmask.mp4 filter=lfs diff=lfs merge=lfs -text
sample/ducky-float/.DS_Store ADDED
Binary file (6.15 kB). View file
 
sample/ducky-float/first_frame.jpg ADDED

Git LFS Details

  • SHA256: b0ae8ef102a728f31196128ba1cf301dc9fc43f0f73bade49fee7b0ebfcec34e
  • Pointer size: 131 Bytes
  • Size of remote file: 141 kB
sample/ducky-float/input_video.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07acee2c2d67a7ac912aa044aeff5af6edf68189681b5cd7c0ab9969e697d171
3
+ size 1327905
sample/ducky-float/prompt.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "bg": "A video of water with a floating object."
3
+ }
sample/ducky-float/segmentation_info.json ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_frames": 62,
3
+ "frame_width": 1296,
4
+ "frame_height": 720,
5
+ "fps": 12.0,
6
+ "num_points": 23,
7
+ "points_by_frame": {
8
+ "7": [
9
+ [
10
+ 268,
11
+ 16
12
+ ],
13
+ [
14
+ 353,
15
+ 16
16
+ ],
17
+ [
18
+ 740,
19
+ 14
20
+ ],
21
+ [
22
+ 900,
23
+ 14
24
+ ]
25
+ ],
26
+ "18": [
27
+ [
28
+ 931,
29
+ 121
30
+ ],
31
+ [
32
+ 688,
33
+ 121
34
+ ],
35
+ [
36
+ 511,
37
+ 121
38
+ ],
39
+ [
40
+ 283,
41
+ 110
42
+ ],
43
+ [
44
+ 549,
45
+ 6
46
+ ],
47
+ [
48
+ 657,
49
+ 17
50
+ ],
51
+ [
52
+ 771,
53
+ 27
54
+ ],
55
+ [
56
+ 461,
57
+ 66
58
+ ]
59
+ ],
60
+ "36": [
61
+ [
62
+ 821,
63
+ 46
64
+ ],
65
+ [
66
+ 636,
67
+ 46
68
+ ],
69
+ [
70
+ 445,
71
+ 50
72
+ ],
73
+ [
74
+ 401,
75
+ 50
76
+ ],
77
+ [
78
+ 476,
79
+ 186
80
+ ],
81
+ [
82
+ 733,
83
+ 187
84
+ ],
85
+ [
86
+ 730,
87
+ 333
88
+ ],
89
+ [
90
+ 531,
91
+ 349
92
+ ],
93
+ [
94
+ 364,
95
+ 332
96
+ ],
97
+ [
98
+ 264,
99
+ 317
100
+ ],
101
+ [
102
+ 941,
103
+ 322
104
+ ]
105
+ ]
106
+ },
107
+ "video_path": "teaser4/ducky-float.mp4",
108
+ "instruction": "",
109
+ "primary_points_by_frame": {
110
+ "7": [
111
+ [
112
+ 268,
113
+ 16
114
+ ],
115
+ [
116
+ 353,
117
+ 16
118
+ ],
119
+ [
120
+ 740,
121
+ 14
122
+ ],
123
+ [
124
+ 900,
125
+ 14
126
+ ]
127
+ ],
128
+ "18": [
129
+ [
130
+ 931,
131
+ 121
132
+ ],
133
+ [
134
+ 688,
135
+ 121
136
+ ],
137
+ [
138
+ 511,
139
+ 121
140
+ ],
141
+ [
142
+ 283,
143
+ 110
144
+ ],
145
+ [
146
+ 549,
147
+ 6
148
+ ],
149
+ [
150
+ 657,
151
+ 17
152
+ ],
153
+ [
154
+ 771,
155
+ 27
156
+ ],
157
+ [
158
+ 461,
159
+ 66
160
+ ]
161
+ ],
162
+ "36": [
163
+ [
164
+ 821,
165
+ 46
166
+ ],
167
+ [
168
+ 636,
169
+ 46
170
+ ],
171
+ [
172
+ 445,
173
+ 50
174
+ ],
175
+ [
176
+ 401,
177
+ 50
178
+ ],
179
+ [
180
+ 476,
181
+ 186
182
+ ],
183
+ [
184
+ 733,
185
+ 187
186
+ ],
187
+ [
188
+ 730,
189
+ 333
190
+ ],
191
+ [
192
+ 531,
193
+ 349
194
+ ],
195
+ [
196
+ 364,
197
+ 332
198
+ ],
199
+ [
200
+ 264,
201
+ 317
202
+ ],
203
+ [
204
+ 941,
205
+ 322
206
+ ]
207
+ ]
208
+ },
209
+ "primary_frames": [
210
+ 7,
211
+ 18,
212
+ 36
213
+ ],
214
+ "first_appears_frame": 7
215
+ }
sample/ducky-float/trimask_quadmask.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5754998a3cafd74d9136f392240b2a8932a7d2dff452a8c392800191043becf0
3
+ size 366065
sample/spinner/input_video.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc5e7d25aaa1df31ace8c4e7aaf8bd19bbe0e7db9756e2d05360f6ef966120ec
3
+ size 4652019
sample/spinner/prompt.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "bg": "a video of spinners spinning"
3
+ }
sample/spinner/trimask_quadmask.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a432d080d4b3e306e5a1d25257074732ae2036473ac2402251b31e49c1c78e9
3
+ size 1745346
sample/trampoline/input_video.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c82d889212e4d34d1348db7f1f85977e19c8a5bd6d56663f9eb7155aeb2bf700
3
+ size 3690934
sample/trampoline/masks_output/sam3_masks/black_mask.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdc08ea77db7442e41f4488dab89d713d58a2bea000f8f2abae77eca93e668a4
3
+ size 2716337
sample/trampoline/masks_output/sam3_masks/debug_grey_mask_frame1.png ADDED
sample/trampoline/masks_output/sam3_masks/find.png ADDED
sample/trampoline/masks_output/sam3_masks/find_objects.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_objects": 9,
3
+ "scaling_factor": 50,
4
+ "note": "Pixel values in find.png are scaled by 50x for visibility. Object 1 = value 50, Object 2 = value 100, etc.",
5
+ "objects": [
6
+ {
7
+ "id": 1,
8
+ "noun": "trampoline",
9
+ "description": "The main rectangular trampoline with a blue padded frame and white mesh bed in the foreground.",
10
+ "pixel_count": 389175
11
+ },
12
+ {
13
+ "id": 2,
14
+ "noun": "mat",
15
+ "description": "A blue rectangular mat lying flat on the trampoline frame on the left side.",
16
+ "pixel_count": 1660139
17
+ },
18
+ {
19
+ "id": 3,
20
+ "noun": "mat",
21
+ "description": "A large, thick blue mat standing vertically against the wall in the background.",
22
+ "pixel_count": 1660139
23
+ },
24
+ {
25
+ "id": 4,
26
+ "noun": "mat",
27
+ "description": "Another large, thick blue mat standing vertically against the wall on the left.",
28
+ "pixel_count": 1660139
29
+ },
30
+ {
31
+ "id": 5,
32
+ "noun": "mat",
33
+ "description": "A folded red and yellow mat placed on the right side of the trampoline area.",
34
+ "pixel_count": 1660139
35
+ },
36
+ {
37
+ "id": 6,
38
+ "noun": "mat",
39
+ "description": "A stack of dark green/grey mats piled up in the background.",
40
+ "pixel_count": 1660139
41
+ },
42
+ {
43
+ "id": 7,
44
+ "noun": "door",
45
+ "description": "A wooden door visible on the left side of the room.",
46
+ "pixel_count": 36410
47
+ },
48
+ {
49
+ "id": 8,
50
+ "noun": "mat",
51
+ "description": "A blue mat placed on top of the trampoline frame in the background.",
52
+ "pixel_count": 1660139
53
+ },
54
+ {
55
+ "id": 9,
56
+ "noun": "mat",
57
+ "description": "A red mat visible on the floor to the left.",
58
+ "pixel_count": 1660139
59
+ }
60
+ ]
61
+ }
sample/trampoline/masks_output/sam3_masks/grey_mask.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aaa8f346241e27c05f3bdab6a11a43dcbf6b18f5578e536035e6d83783d5a9d
3
+ size 2392634
sample/trampoline/masks_output/sam3_masks/input_video.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c82d889212e4d34d1348db7f1f85977e19c8a5bd6d56663f9eb7155aeb2bf700
3
+ size 3690934
sample/trampoline/masks_output/sam3_masks/vlm_analysis.json ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "primary_subject": {
3
+ "noun": "person",
4
+ "aliases": [
5
+ "man",
6
+ "gymnast",
7
+ "athlete"
8
+ ],
9
+ "position": [
10
+ [
11
+ 1,
12
+ 3
13
+ ],
14
+ [
15
+ 1,
16
+ 4
17
+ ],
18
+ [
19
+ 2,
20
+ 2
21
+ ],
22
+ [
23
+ 2,
24
+ 3
25
+ ],
26
+ [
27
+ 2,
28
+ 4
29
+ ],
30
+ [
31
+ 3,
32
+ 3
33
+ ],
34
+ [
35
+ 3,
36
+ 4
37
+ ],
38
+ [
39
+ 4,
40
+ 3
41
+ ],
42
+ [
43
+ 4,
44
+ 4
45
+ ],
46
+ [
47
+ 5,
48
+ 3
49
+ ],
50
+ [
51
+ 5,
52
+ 4
53
+ ],
54
+ [
55
+ 5,
56
+ 5
57
+ ]
58
+ ],
59
+ "reasoning": "The person's head starts in row 1 (cols 3-4), torso and arms occupy rows 2-3 (cols 2-4), and legs/feet extend down through rows 4-5 (cols 3-5)."
60
+ },
61
+ "integral_belongings": [],
62
+ "affected_subjects": [
63
+ {
64
+ "noun": "shadow",
65
+ "why": "cast by the person on the trampoline surface and surrounding mats",
66
+ "current_position": [
67
+ [
68
+ 5,
69
+ 3
70
+ ],
71
+ [
72
+ 5,
73
+ 4
74
+ ],
75
+ [
76
+ 5,
77
+ 5
78
+ ],
79
+ [
80
+ 6,
81
+ 4
82
+ ],
83
+ [
84
+ 6,
85
+ 5
86
+ ],
87
+ [
88
+ 6,
89
+ 6
90
+ ]
91
+ ],
92
+ "current_reasoning": "Shadows are visible directly under the feet on the blue padding at [5,3], [5,4], and [5,5], and a projected body shadow falls on the white trampoline mesh in row 6, columns 4-6.",
93
+ "contact_position": [
94
+ [
95
+ 5,
96
+ 3
97
+ ],
98
+ [
99
+ 5,
100
+ 4
101
+ ],
102
+ [
103
+ 5,
104
+ 5
105
+ ]
106
+ ],
107
+ "contact_reasoning": "The feet are in physical contact with the blue padding at [5,3], [5,4], and [5,5], which is where the contact shadow originates.",
108
+ "will_move": false,
109
+ "movement_path": [],
110
+ "final_position": [],
111
+ "movement_reasoning": "The shadow is a visual artifact caused by the person blocking light; removing the person removes the shadow."
112
+ },
113
+ {
114
+ "noun": "trampoline bed",
115
+ "why": "depressed and rebounding due to the person's weight and impact",
116
+ "current_position": [
117
+ [
118
+ 6,
119
+ 4
120
+ ],
121
+ [
122
+ 6,
123
+ 5
124
+ ],
125
+ [
126
+ 6,
127
+ 6
128
+ ],
129
+ [
130
+ 6,
131
+ 7
132
+ ],
133
+ [
134
+ 6,
135
+ 8
136
+ ],
137
+ [
138
+ 6,
139
+ 9
140
+ ],
141
+ [
142
+ 7,
143
+ 4
144
+ ],
145
+ [
146
+ 7,
147
+ 5
148
+ ],
149
+ [
150
+ 7,
151
+ 6
152
+ ],
153
+ [
154
+ 7,
155
+ 7
156
+ ],
157
+ [
158
+ 7,
159
+ 8
160
+ ],
161
+ [
162
+ 7,
163
+ 9
164
+ ]
165
+ ],
166
+ "current_reasoning": "The white mesh surface of the trampoline spans rows 6-7 and columns 4-9. It is currently reacting to the gymnast's movement.",
167
+ "contact_position": [],
168
+ "contact_reasoning": "In this specific frame, the person's feet are positioned on the blue padding frame (rows 5, cols 3-5), not directly touching the white mesh bed surface, though the bed is reacting to the motion.",
169
+ "will_move": true,
170
+ "movement_path": [
171
+ [
172
+ 6,
173
+ 4
174
+ ],
175
+ [
176
+ 6,
177
+ 5
178
+ ],
179
+ [
180
+ 6,
181
+ 6
182
+ ],
183
+ [
184
+ 6,
185
+ 7
186
+ ],
187
+ [
188
+ 6,
189
+ 8
190
+ ],
191
+ [
192
+ 6,
193
+ 9
194
+ ],
195
+ [
196
+ 7,
197
+ 4
198
+ ],
199
+ [
200
+ 7,
201
+ 5
202
+ ],
203
+ [
204
+ 7,
205
+ 6
206
+ ],
207
+ [
208
+ 7,
209
+ 7
210
+ ],
211
+ [
212
+ 7,
213
+ 8
214
+ ],
215
+ [
216
+ 7,
217
+ 9
218
+ ]
219
+ ],
220
+ "final_position": [
221
+ [
222
+ 6,
223
+ 4
224
+ ],
225
+ [
226
+ 6,
227
+ 5
228
+ ],
229
+ [
230
+ 6,
231
+ 6
232
+ ],
233
+ [
234
+ 6,
235
+ 7
236
+ ],
237
+ [
238
+ 6,
239
+ 8
240
+ ],
241
+ [
242
+ 6,
243
+ 9
244
+ ],
245
+ [
246
+ 7,
247
+ 4
248
+ ],
249
+ [
250
+ 7,
251
+ 5
252
+ ],
253
+ [
254
+ 7,
255
+ 6
256
+ ],
257
+ [
258
+ 7,
259
+ 7
260
+ ],
261
+ [
262
+ 7,
263
+ 8
264
+ ],
265
+ [
266
+ 7,
267
+ 9
268
+ ]
269
+ ],
270
+ "movement_reasoning": "The trampoline bed will cease vibrating/oscillating and settle into a perfectly flat, static position once the influence of the person is removed."
271
+ }
272
+ ],
273
+ "scene_description": "A gymnasium scene featuring a trampoline with a white mesh bed and blue padding, surrounded by other gymnastic equipment. Without the person, the trampoline bed would appear static and flat.",
274
+ "camera_motion_note": "The camera is static throughout the shot.",
275
+ "confidence": 0.95,
276
+ "grid_rows": 8,
277
+ "grid_cols": 14
278
+ }
sample/trampoline/prompt.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "bg": "A gymnasium scene featuring a trampoline with a white mesh bed and blue padding, surrounded by other gymnastic equipment. Without the person, the trampoline bed would appear static and flat."
3
+ }
sample/trampoline/trimask_quadmask.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e6d5293398af384dd9e90d4cb34b5e6d054ed98fc573ae0ac1260cf0ba83157
3
+ size 2714264