| import transformers |
|
|
| from typing import Dict, Optional, Sequence, List |
| from dataclasses import dataclass, field |
|
|
| @dataclass |
| class ModelArguments: |
| model_name_or_path: Optional[str] = field(default="facebook/opt-125m") |
| version: Optional[str] = field(default="v0") |
| freeze_backbone: bool = field(default=False) |
| tune_mm_mlp_adapter: bool = field(default=False) |
| vision_tower: Optional[str] = field(default=None) |
| mm_vision_select_layer: Optional[int] = field(default=-1) |
| pretrain_mm_mlp_adapter: Optional[str] = field(default=None) |
| mm_projector_type: Optional[str] = field(default='linear') |
| mm_use_start_end: bool = field(default=False) |
| mm_use_patch_token: bool = field(default=True) |
| mm_patch_merge_type: Optional[str] = field(default='flat') |
| mm_vision_select_feature: Optional[str] = field(default="patch") |
| image_grid_pinpoints: Optional[str] = field(default="[(448, 448)]") |
|
|
| img_size: int = 224 |
| drop_path_rate: float = 0. |
| vit_precision: Optional[str] = field(default="fp16") |
| vit_model_path: Optional[str] = field(default=None) |
| qformer_model_path: Optional[str] = field(default=None) |
| num_query_token: int = 32 |
|
|
| adapter_module_name: Optional[str] = field(default=None) |
| adapter_module_path: Optional[str] = field(default=None) |
|
|
| @dataclass |
| class DataArguments: |
| dataset_config: str = field(default="", |
| metadata={"help": "Training dataset config path"}) |
| |
| |
| lazy_preprocess: bool = False |
| is_multimodal: bool = False |
| |
| image_aspect_ratio: str = 'square' |
| |
| num_segments: int = 10 |
| sample_strategy: str = 'fps0.5' |
| external_args: dict = None |
| num_token_per_image: Optional[int] = field(default=32) |
|
|
|
|
| @dataclass |
| class TrainingArguments(transformers.TrainingArguments): |
| cache_dir: Optional[str] = field(default=None) |
| optim: str = field(default="adamw_torch") |
| remove_unused_columns: bool = field(default=False) |
| freeze_mm_mlp_adapter: bool = field(default=False) |
| freeze_qformer: bool = field(default=True) |
| freeze_adapter: bool = field(default=False) |
| mpt_attn_impl: Optional[str] = field(default="triton") |
| model_max_length: int = field( |
| default=512, |
| metadata={ |
| "help": |
| "Maximum sequence length. Sequences will be right padded (and possibly truncated)." |
| }, |
| ) |
| double_quant: bool = field( |
| default=True, |
| metadata={"help": "Compress the quantization statistics through double quantization."} |
| ) |
| quant_type: str = field( |
| default="nf4", |
| metadata={"help": "Quantization data type to use. Should be one of `fp4` or `nf4`."} |
| ) |
| bits: int = field( |
| default=16, |
| metadata={"help": "How many bits to use."} |
| ) |
| lora_enable: bool = False |
| lora_r: int = 64 |
| lora_alpha: int = 16 |
| lora_dropout: float = 0.05 |
| lora_weight_path: str = "" |
| lora_bias: str = "none" |
| mm_projector_lr: Optional[float] = None |
| lora_lr: Optional[float] = None |
| group_by_modality_length: bool = field(default=False) |