{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", "\n" ] } ], "source": [ "from transformers import AutoModel\n", "\n", "bert_model = AutoModel.from_pretrained('bert-base-cased')\n", "print(type(bert_model))\n", "\n", "gpt_model = AutoModel.from_pretrained('gpt2')\n", "print(type(gpt_model))\n", "\n", "bart_model = AutoModel.from_pretrained('facebook/bart-base')\n", "print(type(bart_model))" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", "\n", "\n", "\n" ] } ], "source": [ "from transformers import AutoConfig\n", "\n", "bert_config_by_autoconfig = AutoConfig.from_pretrained('bert-base-cased')\n", "print(type(bert_config_by_autoconfig))\n", "\n", "gpt_config = AutoConfig.from_pretrained('gpt2')\n", "print(type(gpt_config))\n", "\n", "bart_config = AutoConfig.from_pretrained('facebook/bart-base')\n", "print(type(bart_config))\n", "\n", "'''You can also use a specific class corresponding to a checkpoint\n", "to get the same output as above'''\n", "\n", "from transformers import BertConfig\n", "bert_config_by_selfconfig = BertConfig.from_pretrained('bert-base-cased')\n", "print(type(bert_config_by_selfconfig))\n", "\n", "\n", "from transformers import GPT2Config\n", "gpt_config_byselfconfig = GPT2Config.from_pretrained('gpt2')\n", "print(type(gpt_config_byselfconfig))\n", "\n", "from transformers import BartConfig\n", "bart_config_byselfconfig = BartConfig.from_pretrained('facebook/bart-base')\n", "print(type(bart_config_byselfconfig)) " ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "BertConfig {\n", " \"architectures\": [\n", " \"BertForMaskedLM\"\n", " ],\n", " \"attention_probs_dropout_prob\": 0.1,\n", " \"classifier_dropout\": null,\n", " \"gradient_checkpointing\": false,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout_prob\": 0.1,\n", " \"hidden_size\": 768,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 3072,\n", " \"layer_norm_eps\": 1e-12,\n", " \"max_position_embeddings\": 512,\n", " \"model_type\": \"bert\",\n", " \"num_attention_heads\": 12,\n", " \"num_hidden_layers\": 12,\n", " \"pad_token_id\": 0,\n", " \"position_embedding_type\": \"absolute\",\n", " \"transformers_version\": \"4.34.1\",\n", " \"type_vocab_size\": 2,\n", " \"use_cache\": true,\n", " \"vocab_size\": 28996\n", "}\n", "\n" ] }, { "data": { "text/plain": [ "'\\nThe configuration of a model is a blueprint that has all the information\\nnecessarty to create the model architecture\\nfor instance, the bert model associated with the bert-base-cased checkpoint\\nhas 12 layers and 768 hidden units in each layer\\n'" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import BertConfig\n", "\n", "bert_config = BertConfig.from_pretrained('bert-base-cased')\n", "print(bert_config)\n", "\n", "'''\n", "The configuration of a model is a blueprint that has all the information\n", "necessarty to create the model architecture\n", "for instance, the bert model associated with the bert-base-cased checkpoint\n", "has 12 layers and 768 hidden units in each layer\n", "'''" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "'''\n", "Once we have a configuration of a model we can create\n", "a model that has the same architecture as our checkpoint\n", "but is randomly initialized\n", "We can then train it from scratch like any pytorch or \n", "tensorflow model\n", "We can also change any part of the configuration by using\n", "key word arguments\n", "'''\n", "# Same architecture as bert-base-cased\n", "from transformers import BertConfig, BertModel\n", "\n", "bert_config = BertConfig.from_pretrained('bert-base-cased')\n", "bert_model = BertModel(bert_config)\n", "\n", "# Using only 10 layers instead of 12\n", "from transformers import BertConfig, BertModel\n", "\n", "bert_config = BertConfig.from_pretrained('bert-base-cased', num_hidden_layers=10)\n", "bert_model = BertModel(bert_config)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "'''Saving a model after it's been trained or fine-tuned\n", "is very easy.\n", "Just use the save_pretrained method\n", "'''\n", "# Saving a model:\n", "\n", "from transformers import BertConfig, BertModel\n", "bert_config = BertConfig.from_pretrained('bert-base-cased')\n", "bert_model = BertModel(bert_config)\n", "\n", "# Training code goes here\n", "\n", "bert_model.save_pretrained('my-bert-model')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Reloading a saved model\n", "from transformers import BertModel\n", "\n", "bert_model = BertModel.from_pretrained('my-bert-model')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 2 }