Image-to-Video
Safetensors
Wan2.2
English
Chinese
diffsynth
scope
world-model
video-generation
action-conditioned
game-world-model
first-person-shooter
diffusion
transformer
Instructions to use zizhaotong/SCOPE with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Wan2.2
How to use zizhaotong/SCOPE with Wan2.2:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
| { | |
| "architectures": ["WanModel"], | |
| "model_type": "scope", | |
| "base_model": "Wan2.2-TI2V-5B", | |
| "torch_dtype": "bfloat16", | |
| "enable_action": true, | |
| "action_config": { | |
| "mouse_dim_in": 4, | |
| "keyboard_dim_in": 6, | |
| "dim": 3072, | |
| "num_heads": 24, | |
| "vae_time_compression_ratio": 4, | |
| "windows_size": 4 | |
| }, | |
| "model_config": { | |
| "has_image_input": false, | |
| "patch_size": [1, 2, 2], | |
| "in_dim": 48, | |
| "dim": 3072, | |
| "ffn_dim": 14336, | |
| "freq_dim": 256, | |
| "text_dim": 4096, | |
| "out_dim": 48, | |
| "num_heads": 24, | |
| "num_layers": 30, | |
| "eps": 1e-06, | |
| "seperated_timestep": true, | |
| "require_clip_embedding": false, | |
| "require_vae_embedding": false, | |
| "fuse_vae_embedding_in_latents": true | |
| }, | |
| "action_buttons": [ | |
| "right_trigger", | |
| "left_trigger", | |
| "south", | |
| "right_thumb", | |
| "west", | |
| "north" | |
| ], | |
| "num_parameters": 1575 | |
| } | |