{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "4debe50db88e48fa9a083d691aad2bd5": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_21745f14645b4786a85261621411be2f", "IPY_MODEL_6206fd713e3545bbbcd02952c23711f3", "IPY_MODEL_27ebe53cd42445c9943c11ea627da7d9" ], "layout": "IPY_MODEL_1af147c6ea114527957c9e2a2234e68e" } }, "21745f14645b4786a85261621411be2f": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_010bc2703d564b39b65c42a9a773ebe6", "placeholder": "​", "style": "IPY_MODEL_61de1507580c493782eab17a91b71b6b", "value": "tokenizer_config.json: 100%" } }, "6206fd713e3545bbbcd02952c23711f3": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_76ca4e2a2a624d0981adf03e15493651", "max": 48, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_dd78effe9fdc4365befb3c446ea9b5f7", "value": 48 } }, "27ebe53cd42445c9943c11ea627da7d9": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0fbd8ff0d95e4df1bb1331599f8f653b", "placeholder": "​", "style": "IPY_MODEL_e529c1703d61450b8534d2a0a2123645", "value": " 48.0/48.0 [00:00<00:00, 706B/s]" } }, "1af147c6ea114527957c9e2a2234e68e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "010bc2703d564b39b65c42a9a773ebe6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "61de1507580c493782eab17a91b71b6b": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "76ca4e2a2a624d0981adf03e15493651": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "dd78effe9fdc4365befb3c446ea9b5f7": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "0fbd8ff0d95e4df1bb1331599f8f653b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e529c1703d61450b8534d2a0a2123645": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f5825285fb4743e6a62aad8745d13371": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_f6e41d5dac874fdcb7d631969646ce33", "IPY_MODEL_7e97c7a956984d30b7a5d9432208b2c6", "IPY_MODEL_9d94f4a9ee904a8f9e069b105571cc36" ], "layout": "IPY_MODEL_908871f8d1bb4bedb135fd55cbbbd0ab" } }, "f6e41d5dac874fdcb7d631969646ce33": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_84c50c244b7a4a9886b70b0ee266d053", "placeholder": "​", "style": "IPY_MODEL_7802d3cce31944e983f814a34e5e3319", "value": "vocab.txt: 100%" } }, "7e97c7a956984d30b7a5d9432208b2c6": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_ede6611a3b244f9a957dd5d5010e1b26", "max": 231508, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_bd03f4735d5e40e8817a2f019902c104", "value": 231508 } }, "9d94f4a9ee904a8f9e069b105571cc36": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1c739bbc97ae4a67bb3722863200e1b1", "placeholder": "​", "style": "IPY_MODEL_7b787aca59a14b0b8ae33f66c0f99d78", "value": " 232k/232k [00:00<00:00, 7.94MB/s]" } }, "908871f8d1bb4bedb135fd55cbbbd0ab": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "84c50c244b7a4a9886b70b0ee266d053": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7802d3cce31944e983f814a34e5e3319": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ede6611a3b244f9a957dd5d5010e1b26": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bd03f4735d5e40e8817a2f019902c104": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "1c739bbc97ae4a67bb3722863200e1b1": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7b787aca59a14b0b8ae33f66c0f99d78": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "1e2cdb9ebda24fd38995a2f5b8d9605c": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_69a259273b4340d48ad5a7b382918596", "IPY_MODEL_f3b21ba904fa4bc387b632051accc067", "IPY_MODEL_880b1be1091440a780a49f632382f6a9" ], "layout": "IPY_MODEL_2893974093f64d49811ab2590888bed3" } }, "69a259273b4340d48ad5a7b382918596": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e8f8658399e64e14b8b644fa25156e42", "placeholder": "​", "style": "IPY_MODEL_680cd806532b40b2894682ec9915bbaf", "value": "tokenizer.json: 100%" } }, "f3b21ba904fa4bc387b632051accc067": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5ccccb8c8491468ead3da3618a8b43d2", "max": 466062, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_cf44e869239a4e4e86eff7eaa04e23b2", "value": 466062 } }, "880b1be1091440a780a49f632382f6a9": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_87060f8fdf7e4cd1a72e7866d87d3824", "placeholder": "​", "style": "IPY_MODEL_f91f43e7cd33413b8effe56fd285bf50", "value": " 466k/466k [00:00<00:00, 741kB/s]" } }, "2893974093f64d49811ab2590888bed3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e8f8658399e64e14b8b644fa25156e42": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "680cd806532b40b2894682ec9915bbaf": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "5ccccb8c8491468ead3da3618a8b43d2": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "cf44e869239a4e4e86eff7eaa04e23b2": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "87060f8fdf7e4cd1a72e7866d87d3824": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f91f43e7cd33413b8effe56fd285bf50": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "24642e27df804bc6a1ef3ff9e0480b8d": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_6b6fa065b6454c538a1e8a9178ad1ba3", "IPY_MODEL_814babcb10244851a7e1ab157ff76768", "IPY_MODEL_e3dfe0725f77407bb4e63bd289b5ad0e" ], "layout": "IPY_MODEL_6a2722be3b504eb7906e9f487a107441" } }, "6b6fa065b6454c538a1e8a9178ad1ba3": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_bf8cce2d8c07425abc4ba50e27ef79c6", "placeholder": "​", "style": "IPY_MODEL_0750e23cddd64a699d56af4c6cdd59e7", "value": "model.safetensors: 100%" } }, "814babcb10244851a7e1ab157ff76768": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_741d6b125d5b4d179c7deeefdeb1cdbb", "max": 440449768, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_a54383977c4147d4a6ea713d282f5625", "value": 440449768 } }, "e3dfe0725f77407bb4e63bd289b5ad0e": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f9f67b577746465b8125dda609933c0c", "placeholder": "​", "style": "IPY_MODEL_978fa96cbc724e2a95b1cdb6c986a75d", "value": " 440M/440M [00:00<00:00, 210MB/s]" } }, "6a2722be3b504eb7906e9f487a107441": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bf8cce2d8c07425abc4ba50e27ef79c6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0750e23cddd64a699d56af4c6cdd59e7": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "741d6b125d5b4d179c7deeefdeb1cdbb": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a54383977c4147d4a6ea713d282f5625": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "f9f67b577746465b8125dda609933c0c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "978fa96cbc724e2a95b1cdb6c986a75d": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "markdown", "source": [ "## Penjelasan BERT\n", "BERT (Bidirectional Encoder Representations from Transformers) adalah model bahasa yang sangat populer dalam pemrosesan bahasa alami (NLP). BERT menggunakan transformer yang membaca teks secara bidirectional, berbeda dengan model tradisional yang hanya membaca dari kiri ke kanan atau sebaliknya.\n", "\n", "### Pemahaman Dasar BERT\n", "* Arsitektur: BERT terdiri dari beberapa lapisan encoder dari transformer. Inputnya berupa urutan token (kata) yang dikonversi menjadi embedding.\n", "* Pre-training: BERT dilatih dalam dua tahap: Masked Language Modeling (MLM) dan Next Sentence Prediction (NSP).\n", "* Fine-tuning: BERT dapat disesuaikan untuk tugas spesifik seperti klasifikasi teks, penerjemahan, atau analisis sentimen." ], "metadata": { "id": "zlg9u3KJ_mHW" } }, { "cell_type": "code", "source": [ "import torch\n", "from torch.utils.data import DataLoader, Dataset\n", "from transformers import BertTokenizer, BertForSequenceClassification, AdamW\n", "from tqdm import tqdm\n", "\n", "# Dataset sederhana\n", "class SimpleDataset(Dataset):\n", " def __init__(self, texts, labels, tokenizer, max_len):\n", " self.texts = texts\n", " self.labels = labels\n", " self.tokenizer = tokenizer\n", " self.max_len = max_len\n", "\n", " def __len__(self):\n", " return len(self.texts)\n", "\n", " def __getitem__(self, idx):\n", " text = self.texts[idx]\n", " label = self.labels[idx]\n", " encoding = self.tokenizer.encode_plus(\n", " text,\n", " add_special_tokens=True,\n", " max_length=self.max_len,\n", " return_token_type_ids=False,\n", " padding='max_length',\n", " return_attention_mask=True,\n", " return_tensors='pt',\n", " truncation=True\n", " )\n", " return {\n", " 'input_ids': encoding['input_ids'].flatten(),\n", " 'attention_mask': encoding['attention_mask'].flatten(),\n", " 'labels': torch.tensor(label, dtype=torch.long)\n", " }\n", "\n", "# Contoh data kecil\n", "texts = [\n", " \"I love this movie!\",\n", " \"This film was terrible.\",\n", " \"Amazing acting and storyline.\",\n", " \"The plot was boring.\",\n", " \"Great visuals and sound!\",\n", " \"I hate this movie.\",\n", " \"Fantastic experience!\",\n", " \"It was a waste of time.\"\n", "]\n", "labels = [1, 0, 1, 0, 1, 0, 1, 0] # 1: Positif, 0: Negatif\n", "\n", "# Load tokenizer dan model BERT\n", "tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')\n", "model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)\n", "\n", "# Membuat dataset dan DataLoader\n", "dataset = SimpleDataset(texts, labels, tokenizer, max_len=16)\n", "dataloader = DataLoader(dataset, batch_size=2, shuffle=True)\n", "\n", "# Memindahkan model ke GPU jika tersedia\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "model.to(device)\n", "\n", "# Optimizer\n", "optimizer = AdamW(model.parameters(), lr=2e-5)\n", "\n", "# Training loop sederhana\n", "model.train()\n", "for epoch in range(3): # Melatih selama 3 epoch\n", " print(f\"Epoch {epoch+1}\")\n", "\n", " for batch in tqdm(dataloader):\n", " # Memindahkan batch ke GPU jika tersedia\n", " input_ids = batch['input_ids'].to(device)\n", " attention_mask = batch['attention_mask'].to(device)\n", " labels = batch['labels'].to(device)\n", "\n", " # Forward pass\n", " outputs = model(input_ids, attention_mask=attention_mask, labels=labels)\n", " loss = outputs.loss\n", "\n", " # Backward pass\n", " optimizer.zero_grad()\n", " loss.backward()\n", " optimizer.step()\n", "\n", " print(f\"Loss: {loss.item()}\")\n", "\n", "# Evaluasi sederhana\n", "model.eval()\n", "with torch.no_grad():\n", " for i, text in enumerate(texts):\n", " encoding = tokenizer.encode_plus(\n", " text,\n", " add_special_tokens=True,\n", " max_length=16,\n", " return_token_type_ids=False,\n", " padding='max_length',\n", " return_attention_mask=True,\n", " return_tensors='pt',\n", " truncation=True\n", " )\n", " input_ids = encoding['input_ids'].to(device)\n", " attention_mask = encoding['attention_mask'].to(device)\n", "\n", " output = model(input_ids, attention_mask=attention_mask)\n", " prediction = torch.argmax(output.logits, dim=-1).item()\n", "\n", " label = \"Positive\" if prediction == 1 else \"Negative\"\n", " print(f\"Text: {text}\\nPredicted Label: {label}\\n\")\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000, "referenced_widgets": [ "4debe50db88e48fa9a083d691aad2bd5", "21745f14645b4786a85261621411be2f", "6206fd713e3545bbbcd02952c23711f3", "27ebe53cd42445c9943c11ea627da7d9", "1af147c6ea114527957c9e2a2234e68e", "010bc2703d564b39b65c42a9a773ebe6", "61de1507580c493782eab17a91b71b6b", "76ca4e2a2a624d0981adf03e15493651", "dd78effe9fdc4365befb3c446ea9b5f7", "0fbd8ff0d95e4df1bb1331599f8f653b", "e529c1703d61450b8534d2a0a2123645", "f5825285fb4743e6a62aad8745d13371", "f6e41d5dac874fdcb7d631969646ce33", "7e97c7a956984d30b7a5d9432208b2c6", "9d94f4a9ee904a8f9e069b105571cc36", "908871f8d1bb4bedb135fd55cbbbd0ab", "84c50c244b7a4a9886b70b0ee266d053", "7802d3cce31944e983f814a34e5e3319", "ede6611a3b244f9a957dd5d5010e1b26", "bd03f4735d5e40e8817a2f019902c104", "1c739bbc97ae4a67bb3722863200e1b1", "7b787aca59a14b0b8ae33f66c0f99d78", "1e2cdb9ebda24fd38995a2f5b8d9605c", "69a259273b4340d48ad5a7b382918596", "f3b21ba904fa4bc387b632051accc067", "880b1be1091440a780a49f632382f6a9", "2893974093f64d49811ab2590888bed3", "e8f8658399e64e14b8b644fa25156e42", "680cd806532b40b2894682ec9915bbaf", "5ccccb8c8491468ead3da3618a8b43d2", "cf44e869239a4e4e86eff7eaa04e23b2", "87060f8fdf7e4cd1a72e7866d87d3824", "f91f43e7cd33413b8effe56fd285bf50", "24642e27df804bc6a1ef3ff9e0480b8d", "6b6fa065b6454c538a1e8a9178ad1ba3", "814babcb10244851a7e1ab157ff76768", "e3dfe0725f77407bb4e63bd289b5ad0e", "6a2722be3b504eb7906e9f487a107441", "bf8cce2d8c07425abc4ba50e27ef79c6", "0750e23cddd64a699d56af4c6cdd59e7", "741d6b125d5b4d179c7deeefdeb1cdbb", "a54383977c4147d4a6ea713d282f5625", "f9f67b577746465b8125dda609933c0c", "978fa96cbc724e2a95b1cdb6c986a75d" ] }, "id": "yCNRpB1WVNQm", "outputId": "c982edd7-088f-400e-889c-5f74b0447a76" }, "execution_count": 26, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "tokenizer_config.json: 0%| | 0.00/48.0 [00:00