| { |
| "_name_or_path": "./checkpoints/vicuna-v1-3-13b", |
| "architectures": [ |
| "HandsOnVLMForCausalLM" |
| ], |
| "bos_token_id": 1, |
| "eos_token_id": 2, |
| "freeze_mm_mlp_adapter": false, |
| "fuse_input_mode": "origin", |
| "hand_token_id": 32100, |
| "hidden_act": "silu", |
| "hidden_size": 5120, |
| "hoi_lambda": 1.0, |
| "image_aspect_ratio": "square", |
| "image_grid_pinpoints": null, |
| "initializer_range": 0.02, |
| "input_type": "video", |
| "intermediate_size": 13824, |
| "lambda_last_hand": 0.001, |
| "lambda_obj": 0.1, |
| "lambda_obj_kl": 0.001, |
| "lambda_traj": 1, |
| "lambda_traj_kl": 0.001, |
| "max_position_embeddings": 2048, |
| "mm_hidden_size": 1024, |
| "mm_use_im_patch_token": false, |
| "mm_use_im_start_end": false, |
| "mm_vision_select_feature": "patch", |
| "mm_vision_select_layer": -2, |
| "mm_vision_tower": "openai/clip-vit-large-patch14", |
| "model_type": "handsonvlm", |
| "num_attention_heads": 40, |
| "num_frames": 100, |
| "num_hidden_layers": 40, |
| "num_key_value_heads": 40, |
| "num_time_tokens": 100, |
| "pad_token_id": 0, |
| "pretraining_tp": 1, |
| "rms_norm_eps": 1e-06, |
| "rope_scaling": null, |
| "tie_word_embeddings": false, |
| "time_token_ids": [ |
| 32000, |
| 32001, |
| 32002, |
| 32003, |
| 32004, |
| 32005, |
| 32006, |
| 32007, |
| 32008, |
| 32009, |
| 32010, |
| 32011, |
| 32012, |
| 32013, |
| 32014, |
| 32015, |
| 32016, |
| 32017, |
| 32018, |
| 32019, |
| 32020, |
| 32021, |
| 32022, |
| 32023, |
| 32024, |
| 32025, |
| 32026, |
| 32027, |
| 32028, |
| 32029, |
| 32030, |
| 32031, |
| 32032, |
| 32033, |
| 32034, |
| 32035, |
| 32036, |
| 32037, |
| 32038, |
| 32039, |
| 32040, |
| 32041, |
| 32042, |
| 32043, |
| 32044, |
| 32045, |
| 32046, |
| 32047, |
| 32048, |
| 32049, |
| 32050, |
| 32051, |
| 32052, |
| 32053, |
| 32054, |
| 32055, |
| 32056, |
| 32057, |
| 32058, |
| 32059, |
| 32060, |
| 32061, |
| 32062, |
| 32063, |
| 32064, |
| 32065, |
| 32066, |
| 32067, |
| 32068, |
| 32069, |
| 32070, |
| 32071, |
| 32072, |
| 32073, |
| 32074, |
| 32075, |
| 32076, |
| 32077, |
| 32078, |
| 32079, |
| 32080, |
| 32081, |
| 32082, |
| 32083, |
| 32084, |
| 32085, |
| 32086, |
| 32087, |
| 32088, |
| 32089, |
| 32090, |
| 32091, |
| 32092, |
| 32093, |
| 32094, |
| 32095, |
| 32096, |
| 32097, |
| 32098, |
| 32099 |
| ], |
| "torch_dtype": "bfloat16", |
| "traj_decoder": "CVAE", |
| "transformers_version": "4.31.0", |
| "tune_mm_mlp_adapter": false, |
| "use_cache": false, |
| "use_mm_proj": true, |
| "video_compress_mode": "temporal_spatial_pool", |
| "vocab_size": 32101 |
| } |
|
|