| --- |
| license: mit |
| language: |
| - en |
| library_name: transformers |
| tags: |
| - 'vision ' |
| - speech |
| - image-text-text |
| - audio-text-text |
| - Multi-Modal |
| pipeline_tag: automatic-speech-recognition |
| --- |
| |
|
|
|
|
| # ADD VISION |
|
|
| ```python |
| |
| |
| |
| print('Add Vision...') |
| # ADD HEAD |
| # Combine pre-trained encoder and pre-trained decoder to form a Seq2Seq model |
| |
| |
| |
| Vmodel = VisionEncoderDecoderModel.from_encoder_decoder_pretrained( |
| "google/vit-base-patch16-224-in21k", "LeroyDyer/Mixtral_AI_Tiny" |
| ) |
| _Encoder_ImageProcessor = Vmodel.encoder |
| _Decoder_ImageTokenizer = Vmodel.decoder |
| _VisionEncoderDecoderModel = Vmodel |
| # Add Pad tokems |
| LM_MODEL.VisionEncoderDecoder = _VisionEncoderDecoderModel |
| # Add Sub Components |
| LM_MODEL.Encoder_ImageProcessor = _Encoder_ImageProcessor |
| LM_MODEL.Decoder_ImageTokenizer = _Decoder_ImageTokenizer |
| LM_MODEL |
| |
| |
| ``` |
|
|
| # ADD AUDIO |
|
|
| ```python |
| |
| |
| |
| print('Add Audio...') |
| #Add Head |
| # Combine pre-trained encoder and pre-trained decoder to form a Seq2Seq model |
| _AudioFeatureExtractor = AutoFeatureExtractor.from_pretrained("openai/whisper-small") |
| _AudioTokenizer = AutoTokenizer.from_pretrained("openai/whisper-small") |
| _SpeechEncoderDecoder = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained("openai/whisper-small","openai/whisper-small") |
| |
| # Add Pad tokems |
| _SpeechEncoderDecoder.config.decoder_start_token_id = _AudioTokenizer.cls_token_id |
| _SpeechEncoderDecoder.config.pad_token_id = _AudioTokenizer.pad_token_id |
| LM_MODEL.SpeechEncoderDecoder = _SpeechEncoderDecoder |
| # Add Sub Components |
| LM_MODEL.Decoder_AudioTokenizer = _AudioTokenizer |
| LM_MODEL.Encoder_AudioFeatureExtractor = _AudioFeatureExtractor |
| LM_MODEL |
| |
| ``` |
|
|
| # SAVE |
| ```python |
| print('Final stages:...') |
| print('Add tokenizer...') |
| LM_MODEL.resize_token_embeddings(len(tokenizer)) |
| LM_MODEL.tokenizer = tokenizer |
| print('Save model...') |
| LM_MODEL.to(torch.float16) |
| LM_MODEL.save_pretrained("Mixtral_AI_MiniModalTron") |
| print('Save tokenizer...') |
| tokenizer.save_pretrained("Mixtral_AI_MiniModalTron") |
| |
| ``` |