Image-to-Video
Diffusers
Safetensors
SsharvienKumar commited on
Commit
5e081ca
·
verified ·
1 Parent(s): 1bce491

Upload 28 files

Browse files
Files changed (28) hide show
  1. checkpoints/Cataract-1K/controlnet_img_graph_vid/checkpoint/controlnet/config.json +77 -0
  2. checkpoints/Cataract-1K/controlnet_img_graph_vid/checkpoint/controlnet/diffusion_pytorch_model.safetensors +3 -0
  3. checkpoints/Cataract-1K/controlnet_img_graph_vid/checkpoint/optimizer.bin +3 -0
  4. checkpoints/Cataract-1K/controlnet_img_graph_vid/checkpoint/random_states_0.pkl +3 -0
  5. checkpoints/Cataract-1K/controlnet_img_graph_vid/checkpoint/scheduler.bin +3 -0
  6. checkpoints/Cataract-1K/controlnet_ximg_graph_vid/checkpoint/controlnet/config.json +76 -0
  7. checkpoints/Cataract-1K/controlnet_ximg_graph_vid/checkpoint/controlnet/diffusion_pytorch_model.safetensors +3 -0
  8. checkpoints/Cataract-1K/controlnet_ximg_graph_vid/checkpoint/optimizer.bin +3 -0
  9. checkpoints/Cataract-1K/controlnet_ximg_graph_vid/checkpoint/random_states_0.pkl +3 -0
  10. checkpoints/Cataract-1K/controlnet_ximg_graph_vid/checkpoint/scheduler.bin +3 -0
  11. checkpoints/Cataract-1K/diffusion_img_graph_xvid/checkpoint/optimizer.bin +3 -0
  12. checkpoints/Cataract-1K/diffusion_img_graph_xvid/checkpoint/random_states_0.pkl +3 -0
  13. checkpoints/Cataract-1K/diffusion_img_graph_xvid/checkpoint/scheduler.bin +3 -0
  14. checkpoints/Cataract-1K/diffusion_img_graph_xvid/checkpoint/unet/config.json +79 -0
  15. checkpoints/Cataract-1K/diffusion_img_graph_xvid/checkpoint/unet/diffusion_pytorch_model.safetensors +3 -0
  16. checkpoints/Cataract-1K/diffusion_ximg_graph_xvid/checkpoint/optimizer.bin +3 -0
  17. checkpoints/Cataract-1K/diffusion_ximg_graph_xvid/checkpoint/random_states_0.pkl +3 -0
  18. checkpoints/Cataract-1K/diffusion_ximg_graph_xvid/checkpoint/scheduler.bin +3 -0
  19. checkpoints/Cataract-1K/diffusion_ximg_graph_xvid/checkpoint/unet/config.json +79 -0
  20. checkpoints/Cataract-1K/diffusion_ximg_graph_xvid/checkpoint/unet/diffusion_pytorch_model.safetensors +3 -0
  21. checkpoints/Cataract-1K/graphencoder_masked/best_val_loss.pth +3 -0
  22. checkpoints/Cataract-1K/graphencoder_segclip/best_val_loss.pth +3 -0
  23. checkpoints/Cataract-1K/vae_vid_diffusion/vae/config.json +37 -0
  24. checkpoints/Cataract-1K/vae_vid_diffusion/vae/diffusion_pytorch_model.safetensors +3 -0
  25. checkpoints/Cataract-1K/vqgan_image/checkpoint.ckpt +3 -0
  26. checkpoints/Cataract-1K/vqgan_image/config.yaml +57 -0
  27. checkpoints/Cataract-1K/vqgan_segmentation/checkpoint.ckpt +3 -0
  28. checkpoints/Cataract-1K/vqgan_segmentation/config.yaml +52 -0
checkpoints/Cataract-1K/controlnet_img_graph_vid/checkpoint/controlnet/config.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "ControlNetModel",
3
+ "_diffusers_version": "0.21.2",
4
+ "_name_or_path": "./checkpoints/Cataract-1k/video_diffusion/surgsimbridge_training_img_graph_vid_cataract1k-2026-04-15T07-35-15/checkpoints/checkpoint-145000",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": null,
7
+ "addition_embed_type_num_heads": 64,
8
+ "addition_time_embed_dim": null,
9
+ "attention_head_dim": [
10
+ 5,
11
+ 10,
12
+ 20,
13
+ 20
14
+ ],
15
+ "attention_type": "default",
16
+ "augment_temporal_attention": true,
17
+ "block_out_channels": [
18
+ 320,
19
+ 640,
20
+ 1280,
21
+ 1280
22
+ ],
23
+ "class_embed_type": "identity",
24
+ "class_embeddings_concat": true,
25
+ "conditioning_channels": 3,
26
+ "conditioning_embedding_out_channels": [
27
+ 16,
28
+ 32,
29
+ 96,
30
+ 256
31
+ ],
32
+ "conv_in_kernel": 3,
33
+ "cross_attention_dim": 1024,
34
+ "cross_attention_norm": null,
35
+ "down_block_types": [
36
+ "CrossAttnDownBlock2D",
37
+ "CrossAttnDownBlock2D",
38
+ "CrossAttnDownBlock2D",
39
+ "DownBlock2D"
40
+ ],
41
+ "downsample_padding": 1,
42
+ "dropout": 0.0,
43
+ "dual_cross_attention": false,
44
+ "encoder_hid_dim": null,
45
+ "encoder_hid_dim_type": null,
46
+ "first_frame_condition_mode": "concat",
47
+ "flip_sin_to_cos": true,
48
+ "freq_shift": 0,
49
+ "global_pool_conditions": false,
50
+ "in_channels": 4,
51
+ "layers_per_block": 2,
52
+ "mid_block_only_cross_attention": null,
53
+ "mid_block_scale_factor": 1,
54
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
55
+ "n_frames": 16,
56
+ "n_temp_heads": 8,
57
+ "norm_eps": 1e-05,
58
+ "norm_num_groups": 32,
59
+ "num_attention_heads": null,
60
+ "num_class_embeds": null,
61
+ "only_cross_attention": false,
62
+ "projection_class_embeddings_input_dim": null,
63
+ "resnet_out_scale_factor": 1.0,
64
+ "resnet_skip_time_act": false,
65
+ "resnet_time_scale_shift": "default",
66
+ "temp_pos_embedding": "rotary",
67
+ "time_cond_proj_dim": null,
68
+ "time_embedding_act_fn": null,
69
+ "time_embedding_dim": 512,
70
+ "time_embedding_type": "positional",
71
+ "timestep_post_act": null,
72
+ "transformer_layers_per_block": 1,
73
+ "upcast_attention": false,
74
+ "use_frame_stride_condition": false,
75
+ "use_linear_projection": true,
76
+ "use_temporal": true
77
+ }
checkpoints/Cataract-1K/controlnet_img_graph_vid/checkpoint/controlnet/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:820df30af2eb1f58e7482527ba44e6f032b207aa8158d2316bcd556c976ddc28
3
+ size 2102262840
checkpoints/Cataract-1K/controlnet_img_graph_vid/checkpoint/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8354bc5f78800d0550eb25175805b996820198d470e8dd8d94b1f66af130d61b
3
+ size 4165435906
checkpoints/Cataract-1K/controlnet_img_graph_vid/checkpoint/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:496c5491d5471c8b259125aac6d453c89b669f87e41cb199b7cdac71d09de86f
3
+ size 15060
checkpoints/Cataract-1K/controlnet_img_graph_vid/checkpoint/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaa9a799183c7caa950febe94a4dab7ab200474b49179a367833ffed6b68d3c1
3
+ size 1000
checkpoints/Cataract-1K/controlnet_ximg_graph_vid/checkpoint/controlnet/config.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "ControlNetModel",
3
+ "_diffusers_version": "0.21.2",
4
+ "act_fn": "silu",
5
+ "addition_embed_type": null,
6
+ "addition_embed_type_num_heads": 64,
7
+ "addition_time_embed_dim": null,
8
+ "attention_head_dim": [
9
+ 5,
10
+ 10,
11
+ 20,
12
+ 20
13
+ ],
14
+ "attention_type": "default",
15
+ "augment_temporal_attention": true,
16
+ "block_out_channels": [
17
+ 320,
18
+ 640,
19
+ 1280,
20
+ 1280
21
+ ],
22
+ "class_embed_type": "identity",
23
+ "class_embeddings_concat": true,
24
+ "conditioning_channels": 3,
25
+ "conditioning_embedding_out_channels": [
26
+ 16,
27
+ 32,
28
+ 96,
29
+ 256
30
+ ],
31
+ "conv_in_kernel": 3,
32
+ "cross_attention_dim": 1024,
33
+ "cross_attention_norm": null,
34
+ "down_block_types": [
35
+ "CrossAttnDownBlock2D",
36
+ "CrossAttnDownBlock2D",
37
+ "CrossAttnDownBlock2D",
38
+ "DownBlock2D"
39
+ ],
40
+ "downsample_padding": 1,
41
+ "dropout": 0.0,
42
+ "dual_cross_attention": false,
43
+ "encoder_hid_dim": null,
44
+ "encoder_hid_dim_type": null,
45
+ "first_frame_condition_mode": "none",
46
+ "flip_sin_to_cos": true,
47
+ "freq_shift": 0,
48
+ "global_pool_conditions": false,
49
+ "in_channels": 4,
50
+ "layers_per_block": 2,
51
+ "mid_block_only_cross_attention": null,
52
+ "mid_block_scale_factor": 1,
53
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
54
+ "n_frames": 16,
55
+ "n_temp_heads": 8,
56
+ "norm_eps": 1e-05,
57
+ "norm_num_groups": 32,
58
+ "num_attention_heads": null,
59
+ "num_class_embeds": null,
60
+ "only_cross_attention": false,
61
+ "projection_class_embeddings_input_dim": null,
62
+ "resnet_out_scale_factor": 1.0,
63
+ "resnet_skip_time_act": false,
64
+ "resnet_time_scale_shift": "default",
65
+ "temp_pos_embedding": "rotary",
66
+ "time_cond_proj_dim": null,
67
+ "time_embedding_act_fn": null,
68
+ "time_embedding_dim": 512,
69
+ "time_embedding_type": "positional",
70
+ "timestep_post_act": null,
71
+ "transformer_layers_per_block": 1,
72
+ "upcast_attention": false,
73
+ "use_frame_stride_condition": false,
74
+ "use_linear_projection": true,
75
+ "use_temporal": true
76
+ }
checkpoints/Cataract-1K/controlnet_ximg_graph_vid/checkpoint/controlnet/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:859a5b41e7b2044c643f6677da64ed4dd7b1220e2de20c6a29b4b89cabab4ef1
3
+ size 2102262840
checkpoints/Cataract-1K/controlnet_ximg_graph_vid/checkpoint/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8045adfdaa09aa28b1fa7c556aaa02714d3e0244a78e401e74206b9b5b9c6e69
3
+ size 4165427650
checkpoints/Cataract-1K/controlnet_ximg_graph_vid/checkpoint/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ebb76978b0d6edffa23a13921f66f7f593a0b46ede1ede4cc8dc2e7fd7b3e11
3
+ size 15060
checkpoints/Cataract-1K/controlnet_ximg_graph_vid/checkpoint/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:532bd73f2b1d01dc0536daabcec58f40e379e94a680763a6e8bc6c9fc3e6d1c8
3
+ size 1000
checkpoints/Cataract-1K/diffusion_img_graph_xvid/checkpoint/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:415b91ff1fe392da4416455500da6eb8d67edefdaf13408c838df9e03dbdb79e
3
+ size 9847488907
checkpoints/Cataract-1K/diffusion_img_graph_xvid/checkpoint/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c063c9a457d2f978404daef7c46ab0170e4a81320bd443c6daf6eb1b2dc483d3
3
+ size 15124
checkpoints/Cataract-1K/diffusion_img_graph_xvid/checkpoint/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94f607683c090632e8d779f519774dd0a2133d20ff4299b9edecbd7b1661901e
3
+ size 1000
checkpoints/Cataract-1K/diffusion_img_graph_xvid/checkpoint/unet/config.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "VideoLDMUNet3DConditionModel",
3
+ "_diffusers_version": "0.21.2",
4
+ "_name_or_path": "/gris/gris-f/homestud/ssivakum/SurgSimBridge_Gen/checkpoints/Cataract-1k/video_diffusion/surgsimbridge_training_img_graph_xvid_cataract1k-2026-03-26T20-30-27/checkpoints/checkpoint-245000",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": null,
7
+ "addition_embed_type_num_heads": 64,
8
+ "addition_time_embed_dim": null,
9
+ "attention_head_dim": [
10
+ 5,
11
+ 10,
12
+ 20,
13
+ 20
14
+ ],
15
+ "attention_type": "default",
16
+ "augment_temporal_attention": true,
17
+ "block_out_channels": [
18
+ 320,
19
+ 640,
20
+ 1280,
21
+ 1280
22
+ ],
23
+ "center_input_sample": false,
24
+ "class_embed_type": "identity",
25
+ "class_embeddings_concat": true,
26
+ "conv_in_kernel": 3,
27
+ "conv_out_kernel": 3,
28
+ "cross_attention_dim": 1024,
29
+ "cross_attention_norm": null,
30
+ "down_block_types": [
31
+ "CrossAttnDownBlock2D",
32
+ "CrossAttnDownBlock2D",
33
+ "CrossAttnDownBlock2D",
34
+ "DownBlock2D"
35
+ ],
36
+ "downsample_padding": 1,
37
+ "dropout": 0.0,
38
+ "dual_cross_attention": false,
39
+ "encoder_hid_dim": null,
40
+ "encoder_hid_dim_type": null,
41
+ "first_frame_condition_mode": "concat",
42
+ "flip_sin_to_cos": true,
43
+ "freq_shift": 0,
44
+ "in_channels": 4,
45
+ "layers_per_block": 2,
46
+ "mid_block_only_cross_attention": null,
47
+ "mid_block_scale_factor": 1,
48
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
49
+ "n_frames": 16,
50
+ "n_temp_heads": 8,
51
+ "norm_eps": 1e-05,
52
+ "norm_num_groups": 32,
53
+ "num_attention_heads": null,
54
+ "num_class_embeds": null,
55
+ "only_cross_attention": false,
56
+ "out_channels": 4,
57
+ "projection_class_embeddings_input_dim": null,
58
+ "resnet_out_scale_factor": 1.0,
59
+ "resnet_skip_time_act": false,
60
+ "resnet_time_scale_shift": "default",
61
+ "sample_size": 64,
62
+ "temp_pos_embedding": "rotary",
63
+ "time_cond_proj_dim": null,
64
+ "time_embedding_act_fn": null,
65
+ "time_embedding_dim": 512,
66
+ "time_embedding_type": "positional",
67
+ "timestep_post_act": null,
68
+ "transformer_layers_per_block": 1,
69
+ "up_block_types": [
70
+ "UpBlock2D",
71
+ "CrossAttnUpBlock2D",
72
+ "CrossAttnUpBlock2D",
73
+ "CrossAttnUpBlock2D"
74
+ ],
75
+ "upcast_attention": false,
76
+ "use_frame_stride_condition": false,
77
+ "use_linear_projection": true,
78
+ "use_temporal": true
79
+ }
checkpoints/Cataract-1K/diffusion_img_graph_xvid/checkpoint/unet/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e5fd513ed200886c036a1da211a097784942a73340d9ddde34645aade9b64d8
3
+ size 4964732628
checkpoints/Cataract-1K/diffusion_ximg_graph_xvid/checkpoint/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37d6c917c05ab185f9d47db0e4fbb96b1c5ce2d47e0cf3ef4f2c01f427f9e1b4
3
+ size 9847488907
checkpoints/Cataract-1K/diffusion_ximg_graph_xvid/checkpoint/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ce6f7a255ed67162de7f4c95beb6c09c9276622d1c43ff280d3c1c1120500cc
3
+ size 15060
checkpoints/Cataract-1K/diffusion_ximg_graph_xvid/checkpoint/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f84e5f00e0f88087868886d1779ea0db653ba13f6a46103160bf4e63f9d73045
3
+ size 1000
checkpoints/Cataract-1K/diffusion_ximg_graph_xvid/checkpoint/unet/config.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "VideoLDMUNet3DConditionModel",
3
+ "_diffusers_version": "0.21.2",
4
+ "_name_or_path": "/gris/gris-f/homestud/ssivakum/SurgSimBridge_Gen/checkpoints/Cataract-1k/video_diffusion/surgsimbridge_training_ximg_graph_xvid_cataract1k-2026-03-29T17-21-00/checkpoints/checkpoint-240000",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": null,
7
+ "addition_embed_type_num_heads": 64,
8
+ "addition_time_embed_dim": null,
9
+ "attention_head_dim": [
10
+ 5,
11
+ 10,
12
+ 20,
13
+ 20
14
+ ],
15
+ "attention_type": "default",
16
+ "augment_temporal_attention": true,
17
+ "block_out_channels": [
18
+ 320,
19
+ 640,
20
+ 1280,
21
+ 1280
22
+ ],
23
+ "center_input_sample": false,
24
+ "class_embed_type": "identity",
25
+ "class_embeddings_concat": true,
26
+ "conv_in_kernel": 3,
27
+ "conv_out_kernel": 3,
28
+ "cross_attention_dim": 1024,
29
+ "cross_attention_norm": null,
30
+ "down_block_types": [
31
+ "CrossAttnDownBlock2D",
32
+ "CrossAttnDownBlock2D",
33
+ "CrossAttnDownBlock2D",
34
+ "DownBlock2D"
35
+ ],
36
+ "downsample_padding": 1,
37
+ "dropout": 0.0,
38
+ "dual_cross_attention": false,
39
+ "encoder_hid_dim": null,
40
+ "encoder_hid_dim_type": null,
41
+ "first_frame_condition_mode": "none",
42
+ "flip_sin_to_cos": true,
43
+ "freq_shift": 0,
44
+ "in_channels": 4,
45
+ "layers_per_block": 2,
46
+ "mid_block_only_cross_attention": null,
47
+ "mid_block_scale_factor": 1,
48
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
49
+ "n_frames": 16,
50
+ "n_temp_heads": 8,
51
+ "norm_eps": 1e-05,
52
+ "norm_num_groups": 32,
53
+ "num_attention_heads": null,
54
+ "num_class_embeds": null,
55
+ "only_cross_attention": false,
56
+ "out_channels": 4,
57
+ "projection_class_embeddings_input_dim": null,
58
+ "resnet_out_scale_factor": 1.0,
59
+ "resnet_skip_time_act": false,
60
+ "resnet_time_scale_shift": "default",
61
+ "sample_size": 64,
62
+ "temp_pos_embedding": "rotary",
63
+ "time_cond_proj_dim": null,
64
+ "time_embedding_act_fn": null,
65
+ "time_embedding_dim": 512,
66
+ "time_embedding_type": "positional",
67
+ "timestep_post_act": null,
68
+ "transformer_layers_per_block": 1,
69
+ "up_block_types": [
70
+ "UpBlock2D",
71
+ "CrossAttnUpBlock2D",
72
+ "CrossAttnUpBlock2D",
73
+ "CrossAttnUpBlock2D"
74
+ ],
75
+ "upcast_attention": false,
76
+ "use_frame_stride_condition": false,
77
+ "use_linear_projection": true,
78
+ "use_temporal": true
79
+ }
checkpoints/Cataract-1K/diffusion_ximg_graph_xvid/checkpoint/unet/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7b626c753583df2dcbcab69355cb4769876782d990e97b169168195ad66e2e8
3
+ size 4964732628
checkpoints/Cataract-1K/graphencoder_masked/best_val_loss.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60850527fd42a58b8422c757abfd72198e4b7602d757b74ba14480fc26fa2f86
3
+ size 350312386
checkpoints/Cataract-1K/graphencoder_segclip/best_val_loss.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63e107df17abb7ae93f6e9f0e7c2192b1e0026de83e75c64f8b14d6bf9332e6c
3
+ size 277281262
checkpoints/Cataract-1K/vae_vid_diffusion/vae/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.31.0",
4
+ "act_fn": "silu",
5
+ "block_out_channels": [
6
+ 128,
7
+ 256,
8
+ 512,
9
+ 512
10
+ ],
11
+ "down_block_types": [
12
+ "DownEncoderBlock2D",
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D"
16
+ ],
17
+ "force_upcast": true,
18
+ "in_channels": 3,
19
+ "latent_channels": 4,
20
+ "latents_mean": null,
21
+ "latents_std": null,
22
+ "layers_per_block": 2,
23
+ "mid_block_add_attention": true,
24
+ "norm_num_groups": 32,
25
+ "out_channels": 3,
26
+ "sample_size": 512,
27
+ "scaling_factor": 0.18215,
28
+ "shift_factor": null,
29
+ "up_block_types": [
30
+ "UpDecoderBlock2D",
31
+ "UpDecoderBlock2D",
32
+ "UpDecoderBlock2D",
33
+ "UpDecoderBlock2D"
34
+ ],
35
+ "use_post_quant_conv": true,
36
+ "use_quant_conv": true
37
+ }
checkpoints/Cataract-1K/vae_vid_diffusion/vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c092d4ddabf78277ce148cf7cbaf7f115119ef79b352a1a44d86f697f7052d51
3
+ size 334643268
checkpoints/Cataract-1K/vqgan_image/checkpoint.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b02b93c343804b00d63ea69003490a27a7d3bb61825fbfa374c1a8ba14ff1ed
3
+ size 878907278
checkpoints/Cataract-1K/vqgan_image/config.yaml ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 4.5e-06
3
+ target: surgsimbridge.taming.taming.models.vqgan.VQModel
4
+ params:
5
+ embed_dim: 1
6
+ n_embed: 16384
7
+ image_key: image
8
+ ddconfig:
9
+ double_z: false
10
+ z_channels: 1
11
+ resolution: 128
12
+ in_channels: 3
13
+ out_ch: 3
14
+ ch: 128
15
+ ch_mult:
16
+ - 1
17
+ - 2
18
+ - 2
19
+ - 4
20
+ num_res_blocks: 2
21
+ attn_resolutions:
22
+ - 16
23
+ dropout: 0.0
24
+ lossconfig:
25
+ target: surgsimbridge.taming.taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
26
+ params:
27
+ disc_conditional: false
28
+ disc_in_channels: 3
29
+ disc_num_layers: 2
30
+ disc_start: 1
31
+ disc_weight: 0.6
32
+ codebook_weight: 1.0
33
+ data:
34
+ target: main.DataModuleFromConfig
35
+ params:
36
+ batch_size: 16
37
+ num_workers: 16
38
+ train:
39
+ target: surgsimbridge.taming.taming.data.surgicaldataset.CataractTrain
40
+ params:
41
+ size: 128
42
+ num_label: 14
43
+ augment: true
44
+ txt_file:
45
+ - /gris/scratch-gris-filesrv/sharvien/SurgSimBridge/Cataract-1K/splits/train.txt
46
+ data_root:
47
+ - /gris/scratch-gris-filesrv/sharvien/SurgSimBridge/Cataract-1K
48
+ validation:
49
+ target: surgsimbridge.taming.taming.data.surgicaldataset.CataractValidation
50
+ params:
51
+ size: 128
52
+ num_label: 14
53
+ augment: false
54
+ txt_file:
55
+ - /gris/scratch-gris-filesrv/sharvien/SurgSimBridge/Cataract-1K/splits/val.txt
56
+ data_root:
57
+ - /gris/scratch-gris-filesrv/sharvien/SurgSimBridge/Cataract-1K
checkpoints/Cataract-1K/vqgan_segmentation/checkpoint.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df08b568767b66d718678837320f822184a98d3fd4d1194250cc43b34d77bd17
3
+ size 812256860
checkpoints/Cataract-1K/vqgan_segmentation/config.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 4.5e-06
3
+ target: surgsimbridge.taming.taming.models.vqgan.VQSegmentationModel
4
+ params:
5
+ embed_dim: 1
6
+ n_embed: 8192
7
+ image_key: segmentation
8
+ ddconfig:
9
+ double_z: false
10
+ z_channels: 1
11
+ resolution: 128
12
+ in_channels: 14
13
+ out_ch: 14
14
+ ch: 128
15
+ ch_mult:
16
+ - 1
17
+ - 2
18
+ - 2
19
+ - 4
20
+ num_res_blocks: 2
21
+ attn_resolutions:
22
+ - 16
23
+ dropout: 0.0
24
+ lossconfig:
25
+ target: surgsimbridge.taming.taming.modules.losses.segmentation.BCELossWithQuant
26
+ params:
27
+ codebook_weight: 1.0
28
+ data:
29
+ target: main.DataModuleFromConfig
30
+ params:
31
+ batch_size: 16
32
+ num_workers: 16
33
+ train:
34
+ target: surgsimbridge.taming.taming.data.surgicaldataset.CataractTrain
35
+ params:
36
+ size: 128
37
+ num_label: 14
38
+ augment: true
39
+ txt_file:
40
+ - /gris/scratch-gris-filesrv/sharvien/SurgSimBridge/Cataract-1K/splits/train.txt
41
+ data_root:
42
+ - /gris/scratch-gris-filesrv/sharvien/SurgSimBridge/Cataract-1K
43
+ validation:
44
+ target: surgsimbridge.taming.taming.data.surgicaldataset.CataractValidation
45
+ params:
46
+ size: 128
47
+ num_label: 14
48
+ augment: false
49
+ txt_file:
50
+ - /gris/scratch-gris-filesrv/sharvien/SurgSimBridge/Cataract-1K/splits/val.txt
51
+ data_root:
52
+ - /gris/scratch-gris-filesrv/sharvien/SurgSimBridge/Cataract-1K