| |
| arch_dict = { |
| |
| "roberta": { |
| "config_names": { |
| "context_length": "max_position_embeddings", |
| "vocab_size": "vocab_size", |
| "width": "hidden_size", |
| "heads": "num_attention_heads", |
| "layers": "num_hidden_layers", |
| "layer_attr": "layer", |
| "token_embeddings_attr": "embeddings" |
| }, |
| "pooler": "mean_pooler", |
| }, |
| |
| "xlm-roberta": { |
| "config_names": { |
| "context_length": "max_position_embeddings", |
| "vocab_size": "vocab_size", |
| "width": "hidden_size", |
| "heads": "num_attention_heads", |
| "layers": "num_hidden_layers", |
| "layer_attr": "layer", |
| "token_embeddings_attr": "embeddings" |
| }, |
| "pooler": "mean_pooler", |
| }, |
| |
| "mt5": { |
| "config_names": { |
| |
| |
| |
| "context_length": "", |
| "vocab_size": "vocab_size", |
| "width": "d_model", |
| "heads": "num_heads", |
| "layers": "num_layers", |
| "layer_attr": "block", |
| "token_embeddings_attr": "embed_tokens" |
| }, |
| "pooler": "mean_pooler", |
| }, |
| "t5": { |
| "config_names": { |
| |
| |
| |
| "context_length": "", |
| "vocab_size": "vocab_size", |
| "width": "d_model", |
| "heads": "num_heads", |
| "layers": "num_layers", |
| "layer_attr": "block", |
| "token_embeddings_attr": "embed_tokens" |
| }, |
| "pooler": "mean_pooler", |
| }, |
| } |
|
|