Upload folder using huggingface_hub
Browse files- .gitattributes +3 -0
- README.md +31 -3
- chinese-hubert-base/README.md +48 -0
- chinese-hubert-base/config.json +71 -0
- chinese-hubert-base/preprocessor_config.json +9 -0
- chinese-hubert-base/pytorch_model.bin +3 -0
- eval_model/best_model.pt +3 -0
- face_vqvae/mat_final.npy +3 -0
- face_vqvae/mat_final_R_I.npy +3 -0
- face_vqvae/pytorch_model_face_fad2cl_260116_codesize2048_codelength512.bin +3 -0
- hubert_kmeans/model.mdl +3 -0
- llm/added_tokens.json +0 -0
- llm/config.json +28 -0
- llm/generation_config.json +14 -0
- llm/merges.txt +0 -0
- llm/model.safetensors +3 -0
- llm/special_tokens_map.json +20 -0
- llm/tokenizer.json +3 -0
- llm/tokenizer_config.json +3 -0
- llm/vocab.json +0 -0
- mask_transformer/config.json +22 -0
- mask_transformer/model.safetensors +3 -0
- mask_transformer/optimizer.pt +3 -0
- mask_transformer/rng_state_0.pth +3 -0
- mask_transformer/rng_state_1.pth +3 -0
- mask_transformer/rng_state_2.pth +3 -0
- mask_transformer/rng_state_3.pth +3 -0
- mask_transformer/scheduler.pt +3 -0
- mask_transformer/trainer_state.json +0 -0
- mask_transformer/training_args.bin +3 -0
- rvqvae/model/epoch_30.pth +3 -0
- rvqvae/opt.txt +58 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
hubert_kmeans/model.mdl filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
llm/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
llm/tokenizer_config.json filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -1,3 +1,31 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SentiAvatar Model Checkpoints
|
| 2 |
+
|
| 3 |
+
请从以下位置下载模型权重,并放置到此目录下:
|
| 4 |
+
|
| 5 |
+
## 目录结构
|
| 6 |
+
|
| 7 |
+
```
|
| 8 |
+
checkpoints/
|
| 9 |
+
├── llm/ # Qwen2-0.5B SFT (Motion Token Planner)
|
| 10 |
+
│ ├── config.json
|
| 11 |
+
│ ├── model.safetensors
|
| 12 |
+
│ ├── tokenizer.json
|
| 13 |
+
│ └── ...
|
| 14 |
+
├── mask_transformer/ # Audio-Motion Mask Transformer
|
| 15 |
+
│ ├── config.json
|
| 16 |
+
│ └── model.safetensors
|
| 17 |
+
├── rvqvae/ # Residual VQ-VAE
|
| 18 |
+
│ ├── opt.txt # 模型配置
|
| 19 |
+
│ └── model/
|
| 20 |
+
│ └── epoch_30.pth # 模型权重
|
| 21 |
+
├── face_vqvae/ # Face VQVAE
|
| 22 |
+
│ ├── pytorch_model_face_fad2cl_260116_codesize2048_codelength512.bin
|
| 23 |
+
│ ├── mat_final.npy
|
| 24 |
+
│ └── mat_final_R_I.npy
|
| 25 |
+
├── chinese-hubert-base/ # Chinese HuBERT
|
| 26 |
+
│ ├── config.json
|
| 27 |
+
│ ├── preprocessor_config.json
|
| 28 |
+
│ └── pytorch_model.bin
|
| 29 |
+
└── eval_model/ # ChronAccRet 评测模型
|
| 30 |
+
└── best_model.pt
|
| 31 |
+
```
|
chinese-hubert-base/README.md
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
---
|
| 4 |
+
Pretrained on 10k hours WenetSpeech L subset. More details in [TencentGameMate/chinese_speech_pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain)
|
| 5 |
+
|
| 6 |
+
This model does not have a tokenizer as it was pretrained on audio alone.
|
| 7 |
+
In order to use this model speech recognition, a tokenizer should be created and the model should be fine-tuned on labeled text data.
|
| 8 |
+
|
| 9 |
+
python package:
|
| 10 |
+
transformers==4.16.2
|
| 11 |
+
|
| 12 |
+
```python
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
import torch
|
| 16 |
+
import torch.nn.functional as F
|
| 17 |
+
import soundfile as sf
|
| 18 |
+
|
| 19 |
+
from transformers import (
|
| 20 |
+
Wav2Vec2FeatureExtractor,
|
| 21 |
+
HubertModel,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
model_path=""
|
| 26 |
+
wav_path=""
|
| 27 |
+
|
| 28 |
+
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_path)
|
| 29 |
+
model = HubertModel.from_pretrained(model_path)
|
| 30 |
+
|
| 31 |
+
# for pretrain: Wav2Vec2ForPreTraining
|
| 32 |
+
# model = Wav2Vec2ForPreTraining.from_pretrained(model_path)
|
| 33 |
+
|
| 34 |
+
model = model.to(device)
|
| 35 |
+
model = model.half()
|
| 36 |
+
model.eval()
|
| 37 |
+
|
| 38 |
+
wav, sr = sf.read(wav_path)
|
| 39 |
+
input_values = feature_extractor(wav, return_tensors="pt").input_values
|
| 40 |
+
input_values = input_values.half()
|
| 41 |
+
input_values = input_values.to(device)
|
| 42 |
+
|
| 43 |
+
with torch.no_grad():
|
| 44 |
+
outputs = model(input_values)
|
| 45 |
+
last_hidden_state = outputs.last_hidden_state
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
```
|
chinese-hubert-base/config.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"activation_dropout": 0.1,
|
| 3 |
+
"apply_spec_augment": true,
|
| 4 |
+
"architectures": [
|
| 5 |
+
"HubertModel"
|
| 6 |
+
],
|
| 7 |
+
"attention_dropout": 0.1,
|
| 8 |
+
"bos_token_id": 1,
|
| 9 |
+
"classifier_proj_size": 256,
|
| 10 |
+
"conv_bias": false,
|
| 11 |
+
"conv_dim": [
|
| 12 |
+
512,
|
| 13 |
+
512,
|
| 14 |
+
512,
|
| 15 |
+
512,
|
| 16 |
+
512,
|
| 17 |
+
512,
|
| 18 |
+
512
|
| 19 |
+
],
|
| 20 |
+
"conv_kernel": [
|
| 21 |
+
10,
|
| 22 |
+
3,
|
| 23 |
+
3,
|
| 24 |
+
3,
|
| 25 |
+
3,
|
| 26 |
+
2,
|
| 27 |
+
2
|
| 28 |
+
],
|
| 29 |
+
"conv_stride": [
|
| 30 |
+
5,
|
| 31 |
+
2,
|
| 32 |
+
2,
|
| 33 |
+
2,
|
| 34 |
+
2,
|
| 35 |
+
2,
|
| 36 |
+
2
|
| 37 |
+
],
|
| 38 |
+
"ctc_loss_reduction": "sum",
|
| 39 |
+
"ctc_zero_infinity": false,
|
| 40 |
+
"do_stable_layer_norm": false,
|
| 41 |
+
"eos_token_id": 2,
|
| 42 |
+
"feat_extract_activation": "gelu",
|
| 43 |
+
"feat_extract_norm": "group",
|
| 44 |
+
"feat_proj_dropout": 0.0,
|
| 45 |
+
"feat_proj_layer_norm": true,
|
| 46 |
+
"final_dropout": 0.1,
|
| 47 |
+
"hidden_act": "gelu",
|
| 48 |
+
"hidden_dropout": 0.1,
|
| 49 |
+
"hidden_size": 768,
|
| 50 |
+
"initializer_range": 0.02,
|
| 51 |
+
"intermediate_size": 3072,
|
| 52 |
+
"layer_norm_eps": 1e-05,
|
| 53 |
+
"layerdrop": 0.1,
|
| 54 |
+
"mask_feature_length": 10,
|
| 55 |
+
"mask_feature_min_masks": 0,
|
| 56 |
+
"mask_feature_prob": 0.0,
|
| 57 |
+
"mask_time_length": 10,
|
| 58 |
+
"mask_time_min_masks": 2,
|
| 59 |
+
"mask_time_prob": 0.05,
|
| 60 |
+
"model_type": "hubert",
|
| 61 |
+
"num_attention_heads": 12,
|
| 62 |
+
"num_conv_pos_embedding_groups": 16,
|
| 63 |
+
"num_conv_pos_embeddings": 128,
|
| 64 |
+
"num_feat_extract_layers": 7,
|
| 65 |
+
"num_hidden_layers": 12,
|
| 66 |
+
"pad_token_id": 0,
|
| 67 |
+
"torch_dtype": "float32",
|
| 68 |
+
"transformers_version": "4.20.0.dev0",
|
| 69 |
+
"use_weighted_layer_sum": false,
|
| 70 |
+
"vocab_size": 32
|
| 71 |
+
}
|
chinese-hubert-base/preprocessor_config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_normalize": true,
|
| 3 |
+
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
| 4 |
+
"feature_size": 1,
|
| 5 |
+
"padding_side": "right",
|
| 6 |
+
"padding_value": 0,
|
| 7 |
+
"return_attention_mask": false,
|
| 8 |
+
"sampling_rate": 16000
|
| 9 |
+
}
|
chinese-hubert-base/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2fefccd26c2794a583b80f6f7210c721873cb7ebae2c1cde3baf9b27855e24d8
|
| 3 |
+
size 377552987
|
eval_model/best_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:828fe6b931e1ca8cc8f092836290f998507be286f1ac5149ed503b49d65ddb01
|
| 3 |
+
size 454859165
|
face_vqvae/mat_final.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f055de09c64182696499a26c2d6109349c627195bcd40c6adc3dd27f3922b34b
|
| 3 |
+
size 21140
|
face_vqvae/mat_final_R_I.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67befad9e98e372995b5c5d6883bf98a4f6d993f09e139857d12fe16c7257242
|
| 3 |
+
size 21140
|
face_vqvae/pytorch_model_face_fad2cl_260116_codesize2048_codelength512.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1353b1f67308a4ebe6a5c81a0c8a255963b806125717c0d5eb32165767c974f0
|
| 3 |
+
size 51968811
|
hubert_kmeans/model.mdl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1faf1a70098f1853427347520475a802de9aaf7dfb955c3af2cd83b6ca3857cd
|
| 3 |
+
size 1538989
|
llm/added_tokens.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
llm/config.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen2ForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_dropout": 0.0,
|
| 6 |
+
"bos_token_id": 151643,
|
| 7 |
+
"eos_token_id": 151645,
|
| 8 |
+
"hidden_act": "silu",
|
| 9 |
+
"hidden_size": 896,
|
| 10 |
+
"initializer_range": 0.02,
|
| 11 |
+
"intermediate_size": 4864,
|
| 12 |
+
"max_position_embeddings": 32768,
|
| 13 |
+
"max_window_layers": 24,
|
| 14 |
+
"model_type": "qwen2",
|
| 15 |
+
"num_attention_heads": 14,
|
| 16 |
+
"num_hidden_layers": 24,
|
| 17 |
+
"num_key_value_heads": 2,
|
| 18 |
+
"rms_norm_eps": 1e-06,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 1000000.0,
|
| 21 |
+
"sliding_window": 32768,
|
| 22 |
+
"tie_word_embeddings": true,
|
| 23 |
+
"torch_dtype": "bfloat16",
|
| 24 |
+
"transformers_version": "4.50.0",
|
| 25 |
+
"use_cache": false,
|
| 26 |
+
"use_sliding_window": false,
|
| 27 |
+
"vocab_size": 225250
|
| 28 |
+
}
|
llm/generation_config.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 151643,
|
| 3 |
+
"do_sample": true,
|
| 4 |
+
"eos_token_id": [
|
| 5 |
+
151645,
|
| 6 |
+
151643
|
| 7 |
+
],
|
| 8 |
+
"pad_token_id": 151643,
|
| 9 |
+
"repetition_penalty": 1.1,
|
| 10 |
+
"temperature": 0.7,
|
| 11 |
+
"top_k": 20,
|
| 12 |
+
"top_p": 0.8,
|
| 13 |
+
"transformers_version": "4.50.0"
|
| 14 |
+
}
|
llm/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
llm/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:148ea91b5a9c20a6e388e4145b24228b178174c5d04cf2a24c8ee45d2a8426c7
|
| 3 |
+
size 1119476616
|
llm/special_tokens_map.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>"
|
| 5 |
+
],
|
| 6 |
+
"eos_token": {
|
| 7 |
+
"content": "<|im_end|>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false
|
| 12 |
+
},
|
| 13 |
+
"pad_token": {
|
| 14 |
+
"content": "<|endoftext|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false
|
| 19 |
+
}
|
| 20 |
+
}
|
llm/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77ea10323ffa96cd0baf4ad882dc2bced4d304f9fafd10ee05b4950d98c1179b
|
| 3 |
+
size 25286251
|
llm/tokenizer_config.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d454d38e6758b1e98515e8a7f5460aed3b0af55156b7dd1557cd0185114dd544
|
| 3 |
+
size 13133512
|
llm/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
mask_transformer/config.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"AudioMotionTransformer"
|
| 4 |
+
],
|
| 5 |
+
"audio_feat_dim": 768,
|
| 6 |
+
"codebook_size": 512,
|
| 7 |
+
"cond_drop_prob": 0.2,
|
| 8 |
+
"dropout": 0.2,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_size": 512,
|
| 12 |
+
"intermediate_size": 1536,
|
| 13 |
+
"max_position_embeddings": 512,
|
| 14 |
+
"model_type": "audio_motion_transformer",
|
| 15 |
+
"num_frames": 5,
|
| 16 |
+
"num_heads": 16,
|
| 17 |
+
"num_layers": 8,
|
| 18 |
+
"num_tokens_per_frame": 4,
|
| 19 |
+
"rms_norm_eps": 1e-06,
|
| 20 |
+
"transformers_version": "4.57.1",
|
| 21 |
+
"vocab_size": 2049
|
| 22 |
+
}
|
mask_transformer/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b8ed946d32423cc7bf393f9967f7d3fbd11894b5f0b67ff55dbffdb5471358bb
|
| 3 |
+
size 96170728
|
mask_transformer/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6113ea4928940205ec665c711dc80be83466909f2cbb7dcdc9a05479a1d5b970
|
| 3 |
+
size 192411979
|
mask_transformer/rng_state_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba6cdd9db0560aaf782233459fbc22e7b29251377516e638dc99072fc9275b68
|
| 3 |
+
size 15429
|
mask_transformer/rng_state_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f166488621222660ee59b49e91252a3d8629c4c55a1695f1ee697de614582c8c
|
| 3 |
+
size 15429
|
mask_transformer/rng_state_2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:abac5d9e6b4349d5e1569132ca1055d02315f27bf7dbc7383a633a787ac0de72
|
| 3 |
+
size 15429
|
mask_transformer/rng_state_3.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5ea0c1e69201fb66e3dede3dfb473772b1a42c59a22368456e5dfc002b652e3
|
| 3 |
+
size 15429
|
mask_transformer/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2a68cc89bc205077640e629149b54fb06ae537dfdeb010a8c11a0804c012cda
|
| 3 |
+
size 1465
|
mask_transformer/trainer_state.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
mask_transformer/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:845ce5cf164cb40b2109e8c7665fb79c6e8ebf8e4aadeab38af2e24879078735
|
| 3 |
+
size 5777
|
rvqvae/model/epoch_30.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92e3acf2b8a34a8705b24a8005b891d18b590d78fb2d37a11debe29389530a43
|
| 3 |
+
size 790198921
|
rvqvae/opt.txt
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
------------ Options -------------
|
| 2 |
+
batch_size: 256
|
| 3 |
+
body_dim: 153
|
| 4 |
+
body_joints_num: 24
|
| 5 |
+
body_parts: ['body', 'left', 'right', 'positions']
|
| 6 |
+
checkpoints_dir: ./checkpoints
|
| 7 |
+
code_dim: 512
|
| 8 |
+
commit: 0.02
|
| 9 |
+
data_root: /disk1/chuhao/dataset/mocap/mocap_susu_gen_demo/quat63nodes_v4_fix_pos
|
| 10 |
+
dataset_name: quat63nodes_v2_0120
|
| 11 |
+
debug: False
|
| 12 |
+
depth: 3
|
| 13 |
+
dilation_growth_rate: 3
|
| 14 |
+
down_t: 1
|
| 15 |
+
eval_every_e: 1
|
| 16 |
+
feat_bias: 5
|
| 17 |
+
fps: 20
|
| 18 |
+
gamma: 0.05
|
| 19 |
+
gpu_id: 0
|
| 20 |
+
is_continue: False
|
| 21 |
+
left_dim: 120
|
| 22 |
+
left_joints_num: 20
|
| 23 |
+
local_rank: 0
|
| 24 |
+
log_dir: ./log/vq
|
| 25 |
+
log_every: 10
|
| 26 |
+
loss_vel: 50.0
|
| 27 |
+
lr: 0.0001
|
| 28 |
+
max_epoch: 100
|
| 29 |
+
milestones: [50000, 1000000]
|
| 30 |
+
mu: 0.99
|
| 31 |
+
name: gqzV4
|
| 32 |
+
nb_code: 512
|
| 33 |
+
num_quantizers: 4
|
| 34 |
+
num_workers: 4
|
| 35 |
+
quantize_dropout_cutoff_index: 1
|
| 36 |
+
quantize_dropout_prob: 0.8
|
| 37 |
+
recons_loss: l1_smooth
|
| 38 |
+
right_dim: 120
|
| 39 |
+
right_joints_num: 20
|
| 40 |
+
save_every_e: 2
|
| 41 |
+
save_latest: 500
|
| 42 |
+
seed: 3407
|
| 43 |
+
shared_codebook: False
|
| 44 |
+
start_positions_epoch: 0
|
| 45 |
+
stride_t: 2
|
| 46 |
+
total_joints_num: 63
|
| 47 |
+
use_whole_encoder: False
|
| 48 |
+
vq_act: relu
|
| 49 |
+
vq_cnn_depth: 3
|
| 50 |
+
vq_norm: None
|
| 51 |
+
warm_up_iter: 2000
|
| 52 |
+
weight_decay: 0.0
|
| 53 |
+
weight_rec: 5.0
|
| 54 |
+
which_epoch: latest
|
| 55 |
+
whole_dim: 393
|
| 56 |
+
width: 512
|
| 57 |
+
window_size: 64
|
| 58 |
+
-------------- End ----------------
|