{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "ed08792b62e14889b92ce01d10520ed4": { "model_module": "@jupyter-widgets/controls", "model_name": "VBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "VBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "VBoxView", "box_style": "", "children": [ "IPY_MODEL_7beaed2d230d42e79106b3181d7774b1", "IPY_MODEL_43dc5b885de04d70a6fb2ba162d1343b", "IPY_MODEL_776ffb2a9e5644af8dfdea7d16f4ba2b", "IPY_MODEL_f6a65b6db69246e389284d920ae95b53" ], "layout": "IPY_MODEL_141398f982974bbb85db2a555d4d007e" } }, "c0e08f3c449948e4971c9dc4934840c2": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8067924b93a049c3a33c2f196751d572", "placeholder": "​", "style": "IPY_MODEL_b04ed66f9a4f41f3a2a07de004e8f4d9", "value": "

Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file.
" } }, "b7ea807d74d841368a512deadbaeccb3": { "model_module": "@jupyter-widgets/controls", "model_name": "PasswordModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "PasswordModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "PasswordView", "continuous_update": true, "description": "Token:", "description_tooltip": null, "disabled": false, "layout": "IPY_MODEL_55e9bba010344ca4beb985df6e19fa0f", "placeholder": "​", "style": "IPY_MODEL_d6b83cedf72b4b6f8064b99341f67a24", "value": "" } }, "632ed488a1a04fc2afe287fa5275c87a": { "model_module": "@jupyter-widgets/controls", "model_name": "CheckboxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "CheckboxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "CheckboxView", "description": "Add token as git credential?", "description_tooltip": null, "disabled": false, "indent": true, "layout": "IPY_MODEL_504ebe991a2744129fe505e11eda37b4", "style": "IPY_MODEL_b558200eabf1452da063f6fd765407fb", "value": true } }, "fe8e2d9c438d4d45bf5039db91b3bd33": { "model_module": "@jupyter-widgets/controls", "model_name": "ButtonModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ButtonModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ButtonView", "button_style": "", "description": "Login", "disabled": false, "icon": "", "layout": "IPY_MODEL_0ab6065d9f2b45879b71bfdd49a7b839", "style": "IPY_MODEL_048a8ad112794f628dfacaa6afc3392b", "tooltip": "" } }, "d148fb7b4d4b4571804e8e290fad547c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9db360e78485441aaa8e1ded2e68dedd", "placeholder": "​", "style": "IPY_MODEL_11ffd14bba034f50867e369fbf5daef1", "value": "\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. " } }, "141398f982974bbb85db2a555d4d007e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": "center", "align_self": null, "border": null, "bottom": null, "display": "flex", "flex": null, "flex_flow": "column", "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": "50%" } }, "8067924b93a049c3a33c2f196751d572": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b04ed66f9a4f41f3a2a07de004e8f4d9": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "55e9bba010344ca4beb985df6e19fa0f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d6b83cedf72b4b6f8064b99341f67a24": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "504ebe991a2744129fe505e11eda37b4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b558200eabf1452da063f6fd765407fb": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "0ab6065d9f2b45879b71bfdd49a7b839": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "048a8ad112794f628dfacaa6afc3392b": { "model_module": "@jupyter-widgets/controls", "model_name": "ButtonStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ButtonStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "button_color": null, "font_weight": "" } }, "9db360e78485441aaa8e1ded2e68dedd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "11ffd14bba034f50867e369fbf5daef1": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ff52a5a13235408a829a3d1f8774e3a6": { "model_module": "@jupyter-widgets/controls", "model_name": "LabelModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "LabelModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "LabelView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a40b0ea231da481099657870d5eee2c1", "placeholder": "​", "style": "IPY_MODEL_3d0824795c76430285086b909b3f5338", "value": "Connecting..." } }, "a40b0ea231da481099657870d5eee2c1": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3d0824795c76430285086b909b3f5338": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "7beaed2d230d42e79106b3181d7774b1": { "model_module": "@jupyter-widgets/controls", "model_name": "LabelModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "LabelModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "LabelView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8d540e71071a48c09aa9649926409f7a", "placeholder": "​", "style": "IPY_MODEL_7a70671da3b94fa2a5184a4f871ffca5", "value": "Token is valid (permission: write)." } }, "43dc5b885de04d70a6fb2ba162d1343b": { "model_module": "@jupyter-widgets/controls", "model_name": "LabelModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "LabelModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "LabelView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_7007fae84ff045bf907324783e43c76a", "placeholder": "​", "style": "IPY_MODEL_d433c5972dd14c618cdb3c0f34389475", "value": "Your token has been saved in your configured git credential helpers (store)." } }, "776ffb2a9e5644af8dfdea7d16f4ba2b": { "model_module": "@jupyter-widgets/controls", "model_name": "LabelModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "LabelModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "LabelView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f543f1f979ee4ae49ce089e338c75279", "placeholder": "​", "style": "IPY_MODEL_738a1bcb6b3b41b8bc1f71733f4b791d", "value": "Your token has been saved to /root/.cache/huggingface/token" } }, "f6a65b6db69246e389284d920ae95b53": { "model_module": "@jupyter-widgets/controls", "model_name": "LabelModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "LabelModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "LabelView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_eed4bf33ecb5450eb1b29b1b629b39db", "placeholder": "​", "style": "IPY_MODEL_a85822ccb1c949e683e900b966025ad3", "value": "Login successful" } }, "8d540e71071a48c09aa9649926409f7a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7a70671da3b94fa2a5184a4f871ffca5": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "7007fae84ff045bf907324783e43c76a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d433c5972dd14c618cdb3c0f34389475": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f543f1f979ee4ae49ce089e338c75279": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "738a1bcb6b3b41b8bc1f71733f4b791d": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "eed4bf33ecb5450eb1b29b1b629b39db": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a85822ccb1c949e683e900b966025ad3": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "markdown", "source": [ "# Connect to Google Drive" ], "metadata": { "id": "NESbD1fETnSh" } }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "rj5bwXP1ThC_", "outputId": "baf32de2-09eb-487b-d2cb-e28b26f783a6" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ], "source": [ "# Connect to google drive\n", "from google.colab import drive\n", "drive.mount('/content/drive')" ] }, { "cell_type": "markdown", "source": [ "# Import Libraries" ], "metadata": { "id": "FfT_Yae-X1DB" } }, { "cell_type": "code", "source": [ "import pandas as pd\n", "import torch\n", "from transformers import DistilBertTokenizer, DistilBertForSequenceClassification\n", "from sklearn.model_selection import train_test_split\n", "from torch.utils.data import DataLoader, TensorDataset\n", "from torch.optim import AdamW\n", "from tqdm import tqdm" ], "metadata": { "id": "wiAkqufRX0fH" }, "execution_count": 15, "outputs": [] }, { "cell_type": "code", "source": [ "# Read csv\n", "elon_tweets = pd.read_csv('/content/drive/MyDrive/elon_musk_tweets.csv')\n", "non_elon_tweets = pd.read_csv('/content/drive/MyDrive/Tweets.csv')\n", "\n", "elon_tweets" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 843 }, "id": "VE8dG16AYAbp", "outputId": "ad34bdea-13fd-4717-a9e2-1487aeb8bcc6" }, "execution_count": 5, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " id user_name user_location user_description \\\n", "0 1544379368478212100 Elon Musk NaN Mars & Cars, Chips & Dips \n", "1 1544377493263720450 Elon Musk NaN Mars & Cars, Chips & Dips \n", "2 1544377130590552064 Elon Musk NaN Mars & Cars, Chips & Dips \n", "3 1544375575724400645 Elon Musk NaN Mars & Cars, Chips & Dips \n", "4 1544375148605853699 Elon Musk NaN Mars & Cars, Chips & Dips \n", "... ... ... ... ... \n", "5899 1665143503108677634 Elon Musk NaN NaN \n", "5900 1665139144425631747 Elon Musk NaN NaN \n", "5901 1665137204782419968 Elon Musk NaN NaN \n", "5902 1665131126900285445 Elon Musk NaN NaN \n", "5903 1665121551652474880 Elon Musk NaN NaN \n", "\n", " user_created user_followers user_friends \\\n", "0 2009-06-02 20:12:29+00:00 101240855 115 \n", "1 2009-06-02 20:12:29+00:00 101240806 115 \n", "2 2009-06-02 20:12:29+00:00 101240806 115 \n", "3 2009-06-02 20:12:29+00:00 101240806 115 \n", "4 2009-06-02 20:12:29+00:00 101240806 115 \n", "... ... ... ... \n", "5899 2009-06-02 20:12:29+00:00 143325985 330 \n", "5900 2009-06-02 20:12:29+00:00 143325985 330 \n", "5901 2009-06-02 20:12:29+00:00 143325985 330 \n", "5902 2009-06-02 20:12:29+00:00 143325985 330 \n", "5903 2009-06-02 20:12:29+00:00 143325985 330 \n", "\n", " user_favourites user_verified date \\\n", "0 13503 True 2022-07-05 17:55:09+00:00 \n", "1 13503 True 2022-07-05 17:47:42+00:00 \n", "2 13503 True 2022-07-05 17:46:15+00:00 \n", "3 13503 True 2022-07-05 17:40:05+00:00 \n", "4 13503 True 2022-07-05 17:38:23+00:00 \n", "... ... ... ... \n", "5899 25655 False 2023-06-03 23:48:42+00:00 \n", "5900 25655 False 2023-06-03 23:31:23+00:00 \n", "5901 25655 False 2023-06-03 23:23:41+00:00 \n", "5902 25655 False 2023-06-03 22:59:31+00:00 \n", "5903 25655 False 2023-06-03 22:21:29+00:00 \n", "\n", " text hashtags \\\n", "0 @BillyM2k I find the gold toe sock – inevitabl... NaN \n", "1 Sock Con, the conference for socks NaN \n", "2 Always something new for the magazine cover an... NaN \n", "3 @ExplainThisBob This guy gets it NaN \n", "4 Sock tech is so advanced that you can get pret... NaN \n", "... ... ... \n", "5899 @JonErlichman He’s not wrong … NaN \n", "5900 @alifarhat79 Guys, I think I maybe took too mu... NaN \n", "5901 @sriramk Cool NaN \n", "5902 @cb_doge Time to complete the circle NaN \n", "5903 @Jason Late stage civilization complacency NaN \n", "\n", " source retweets favorites is_retweet \n", "0 Twitter for iPhone 335 6542 False \n", "1 Twitter for iPhone 1451 30753 False \n", "2 Twitter for iPhone 1284 28610 False \n", "3 Twitter for iPhone 131 3640 False \n", "4 Twitter for iPhone 1191 23790 False \n", "... ... ... ... ... \n", "5899 Twitter for iPhone 361 4791 False \n", "5900 Twitter for iPhone 1609 61964 False \n", "5901 Twitter for iPhone 46 879 False \n", "5902 Twitter for iPhone 898 12467 False \n", "5903 Twitter for iPhone 1997 38113 False \n", "\n", "[5904 rows x 16 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
iduser_nameuser_locationuser_descriptionuser_createduser_followersuser_friendsuser_favouritesuser_verifieddatetexthashtagssourceretweetsfavoritesis_retweet
01544379368478212100Elon MuskNaNMars & Cars, Chips & Dips2009-06-02 20:12:29+00:0010124085511513503True2022-07-05 17:55:09+00:00@BillyM2k I find the gold toe sock – inevitabl...NaNTwitter for iPhone3356542False
11544377493263720450Elon MuskNaNMars & Cars, Chips & Dips2009-06-02 20:12:29+00:0010124080611513503True2022-07-05 17:47:42+00:00Sock Con, the conference for socksNaNTwitter for iPhone145130753False
21544377130590552064Elon MuskNaNMars & Cars, Chips & Dips2009-06-02 20:12:29+00:0010124080611513503True2022-07-05 17:46:15+00:00Always something new for the magazine cover an...NaNTwitter for iPhone128428610False
31544375575724400645Elon MuskNaNMars & Cars, Chips & Dips2009-06-02 20:12:29+00:0010124080611513503True2022-07-05 17:40:05+00:00@ExplainThisBob This guy gets itNaNTwitter for iPhone1313640False
41544375148605853699Elon MuskNaNMars & Cars, Chips & Dips2009-06-02 20:12:29+00:0010124080611513503True2022-07-05 17:38:23+00:00Sock tech is so advanced that you can get pret...NaNTwitter for iPhone119123790False
...................................................
58991665143503108677634Elon MuskNaNNaN2009-06-02 20:12:29+00:0014332598533025655False2023-06-03 23:48:42+00:00@JonErlichman He’s not wrong …NaNTwitter for iPhone3614791False
59001665139144425631747Elon MuskNaNNaN2009-06-02 20:12:29+00:0014332598533025655False2023-06-03 23:31:23+00:00@alifarhat79 Guys, I think I maybe took too mu...NaNTwitter for iPhone160961964False
59011665137204782419968Elon MuskNaNNaN2009-06-02 20:12:29+00:0014332598533025655False2023-06-03 23:23:41+00:00@sriramk CoolNaNTwitter for iPhone46879False
59021665131126900285445Elon MuskNaNNaN2009-06-02 20:12:29+00:0014332598533025655False2023-06-03 22:59:31+00:00@cb_doge Time to complete the circleNaNTwitter for iPhone89812467False
59031665121551652474880Elon MuskNaNNaN2009-06-02 20:12:29+00:0014332598533025655False2023-06-03 22:21:29+00:00@Jason Late stage civilization complacencyNaNTwitter for iPhone199738113False
\n", "

5904 rows × 16 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "elon_tweets", "summary": "{\n \"name\": \"elon_tweets\",\n \"rows\": 5904,\n \"fields\": [\n {\n \"column\": \"id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 32598337931898456,\n \"min\": 1544316752657629189,\n \"max\": 1668435272235720705,\n \"num_unique_values\": 5904,\n \"samples\": [\n 1661525947022180352,\n 1649039669190098947,\n 1607850458554449920\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"user_name\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Mr. Tweet\",\n \"Elon Musk\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"user_location\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Boring\",\n \"Twitter HQ\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"user_description\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"Mars & Cars, Chips & Dips\",\n \"Perfume Salesman\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"user_created\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"2009-06-02 20:12:29+00:00\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"user_followers\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 11862039,\n \"min\": 101240806,\n \"max\": 143325990,\n \"num_unique_values\": 655,\n \"samples\": [\n 126687007\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"user_friends\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 67,\n \"min\": 115,\n \"max\": 330,\n \"num_unique_values\": 101,\n \"samples\": [\n 289\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"user_favourites\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3673,\n \"min\": 13503,\n \"max\": 25655,\n \"num_unique_values\": 319,\n \"samples\": [\n 14331\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"user_verified\",\n \"properties\": {\n \"dtype\": \"boolean\",\n \"num_unique_values\": 2,\n \"samples\": [\n false\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"date\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 5904,\n \"samples\": [\n \"2023-05-25 00:13:50+00:00\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5831,\n \"samples\": [\n \"The BBC interview last week was exceptional in illustrating why you cannot rely on the media for truth\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hashtags\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"['deletefacebook']\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"source\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Twitter Web App\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"retweets\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14457,\n \"min\": 0,\n \"max\": 359672,\n \"num_unique_values\": 3471,\n \"samples\": [\n 2053\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"favorites\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 132679,\n \"min\": 52,\n \"max\": 2500167,\n \"num_unique_values\": 5600,\n \"samples\": [\n 3002\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"is_retweet\",\n \"properties\": {\n \"dtype\": \"boolean\",\n \"num_unique_values\": 1,\n \"samples\": [\n false\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 5 } ] }, { "cell_type": "code", "source": [ "# Drop off all columns except text from elon musk tweets. Delete row if retweet\n", "\n", "elon_tweets = elon_tweets[elon_tweets['is_retweet'] == False]\n", "elon_tweets = elon_tweets[['text']]\n", "\n", "elon_tweets" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "id": "kBe01eCvYT8D", "outputId": "589cbcd2-cfac-4eb4-cfdb-a3588d0d83d7" }, "execution_count": 6, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " text\n", "0 @BillyM2k I find the gold toe sock – inevitabl...\n", "1 Sock Con, the conference for socks\n", "2 Always something new for the magazine cover an...\n", "3 @ExplainThisBob This guy gets it\n", "4 Sock tech is so advanced that you can get pret...\n", "... ...\n", "5899 @JonErlichman He’s not wrong …\n", "5900 @alifarhat79 Guys, I think I maybe took too mu...\n", "5901 @sriramk Cool\n", "5902 @cb_doge Time to complete the circle\n", "5903 @Jason Late stage civilization complacency\n", "\n", "[5904 rows x 1 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
text
0@BillyM2k I find the gold toe sock – inevitabl...
1Sock Con, the conference for socks
2Always something new for the magazine cover an...
3@ExplainThisBob This guy gets it
4Sock tech is so advanced that you can get pret...
......
5899@JonErlichman He’s not wrong …
5900@alifarhat79 Guys, I think I maybe took too mu...
5901@sriramk Cool
5902@cb_doge Time to complete the circle
5903@Jason Late stage civilization complacency
\n", "

5904 rows × 1 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "elon_tweets", "summary": "{\n \"name\": \"elon_tweets\",\n \"rows\": 5904,\n \"fields\": [\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5831,\n \"samples\": [\n \"The BBC interview last week was exceptional in illustrating why you cannot rely on the media for truth\",\n \"@Teslaconomics Welcome back @jbstraubel!\",\n \"@CorySteuben @Erdayastronaut @live_munro Interesting\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 6 } ] }, { "cell_type": "code", "source": [ "non_elon_tweets = non_elon_tweets[['text']]\n", "non_elon_tweets" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "id": "aqnd6NMQYqbd", "outputId": "dd9292f9-6eb4-4176-b7b9-c3dbef72aad4" }, "execution_count": 8, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " text\n", "0 I`d have responded, if I were going\n", "1 Sooo SAD I will miss you here in San Diego!!!\n", "2 my boss is bullying me...\n", "3 what interview! leave me alone\n", "4 Sons of ****, why couldn`t they put them on t...\n", "... ...\n", "27476 wish we could come see u on Denver husband l...\n", "27477 I`ve wondered about rake to. The client has ...\n", "27478 Yay good for both of you. Enjoy the break - y...\n", "27479 But it was worth it ****.\n", "27480 All this flirting going on - The ATG smiles...\n", "\n", "[27481 rows x 1 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
text
0I`d have responded, if I were going
1Sooo SAD I will miss you here in San Diego!!!
2my boss is bullying me...
3what interview! leave me alone
4Sons of ****, why couldn`t they put them on t...
......
27476wish we could come see u on Denver husband l...
27477I`ve wondered about rake to. The client has ...
27478Yay good for both of you. Enjoy the break - y...
27479But it was worth it ****.
27480All this flirting going on - The ATG smiles...
\n", "

27481 rows × 1 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "non_elon_tweets", "summary": "{\n \"name\": \"non_elon_tweets\",\n \"rows\": 27481,\n \"fields\": [\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 27480,\n \"samples\": [\n \" Enjoy! Family trumps everything\",\n \" --of them kinda turns me off of it all. And then I buy more of them and dig a deeper hole, etc. ;;\",\n \"Clive it`s my birthday pat me http://apps.facebook.com/dogbook/profile/view/6386106\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 8 } ] }, { "cell_type": "code", "source": [ "def load_and_preprocess_data(elon_file, non_elon_file):\n", " elon_tweets = pd.read_csv(elon_file)\n", " non_elon_tweets = pd.read_csv(non_elon_file)\n", " non_elon_tweets = non_elon_tweets[['text']]\n", " elon_tweets = elon_tweets[elon_tweets['is_retweet'] == False]\n", " elon_tweets = elon_tweets[['text']]\n", " # 'text' 列が存在することを確認し、存在しない場合は適切な列名に変更\n", " text_column = 'text' if 'text' in elon_tweets.columns else elon_tweets.columns[0]\n", "\n", " elon_tweets['label'] = 1\n", " non_elon_tweets['label'] = 0\n", "\n", " all_tweets = pd.concat([elon_tweets, non_elon_tweets], ignore_index=True)\n", "\n", " # None値や空の文字列を除去\n", " all_tweets = all_tweets.dropna(subset=[text_column])\n", " all_tweets = all_tweets[all_tweets[text_column].astype(bool)]\n", "\n", " # テキストを文字列に変換\n", " texts = all_tweets[text_column].astype(str).tolist()\n", " labels = all_tweets['label'].tolist()\n", "\n", " return train_test_split(texts, labels, test_size=0.2, random_state=42)\n", "\n", "# データの読み込みと分割\n", "train_texts, test_texts, train_labels, test_labels = load_and_preprocess_data('/content/drive/MyDrive/elon_musk_tweets.csv', '/content/drive/MyDrive/Tweets.csv')" ], "metadata": { "id": "py1xhu7GYx8Y" }, "execution_count": 19, "outputs": [] }, { "cell_type": "code", "source": [ "tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')\n", "model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)\n", "\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", "model.to(device)\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "n8B3YGtcZKno", "outputId": "ca1d834c-0364-4a9e-93da-e9d93667875e" }, "execution_count": 20, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "DistilBertForSequenceClassification(\n", " (distilbert): DistilBertModel(\n", " (embeddings): Embeddings(\n", " (word_embeddings): Embedding(30522, 768, padding_idx=0)\n", " (position_embeddings): Embedding(512, 768)\n", " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (transformer): Transformer(\n", " (layer): ModuleList(\n", " (0-5): 6 x TransformerBlock(\n", " (attention): MultiHeadSelfAttention(\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " (q_lin): Linear(in_features=768, out_features=768, bias=True)\n", " (k_lin): Linear(in_features=768, out_features=768, bias=True)\n", " (v_lin): Linear(in_features=768, out_features=768, bias=True)\n", " (out_lin): Linear(in_features=768, out_features=768, bias=True)\n", " )\n", " (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", " (ffn): FFN(\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " (lin1): Linear(in_features=768, out_features=3072, bias=True)\n", " (lin2): Linear(in_features=3072, out_features=768, bias=True)\n", " (activation): GELUActivation()\n", " )\n", " (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", " )\n", " )\n", " )\n", " )\n", " (pre_classifier): Linear(in_features=768, out_features=768, bias=True)\n", " (classifier): Linear(in_features=768, out_features=2, bias=True)\n", " (dropout): Dropout(p=0.2, inplace=False)\n", ")" ] }, "metadata": {}, "execution_count": 20 } ] }, { "cell_type": "code", "source": [ "def preprocess_data(texts, labels):\n", " encodings = tokenizer(texts, truncation=True, padding=True, max_length=128, return_tensors='pt')\n", " dataset = TensorDataset(encodings['input_ids'], encodings['attention_mask'], torch.tensor(labels))\n", " return dataset\n", "\n", "train_dataset = preprocess_data(train_texts, train_labels)\n", "test_dataset = preprocess_data(test_texts, test_labels)\n", "\n", "train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)\n", "test_loader = DataLoader(test_dataset, batch_size=16)" ], "metadata": { "id": "HMnetkPMZZyN" }, "execution_count": 21, "outputs": [] }, { "cell_type": "code", "source": [ "optimizer = AdamW(model.parameters(), lr=5e-5)\n", "num_epochs = 3\n", "\n", "for epoch in range(num_epochs):\n", " model.train()\n", " total_loss = 0\n", " for batch in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):\n", " input_ids, attention_mask, labels = [b.to(device) for b in batch]\n", " outputs = model(input_ids, attention_mask=attention_mask, labels=labels)\n", " loss = outputs.loss\n", " total_loss += loss.item()\n", "\n", " loss.backward()\n", " optimizer.step()\n", " optimizer.zero_grad()\n", "\n", " avg_loss = total_loss / len(train_loader)\n", " print(f'Epoch {epoch+1}/{num_epochs} completed. Average loss: {avg_loss:.4f}')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "YIJem2_uac2p", "outputId": "3e15ed02-e42e-4a56-9e55-20438ae8645d" }, "execution_count": 22, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Epoch 1/3: 100%|██████████| 1670/1670 [04:16<00:00, 6.50it/s]\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Epoch 1/3 completed. Average loss: 0.0444\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "Epoch 2/3: 100%|██████████| 1670/1670 [04:15<00:00, 6.55it/s]\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Epoch 2/3 completed. Average loss: 0.0157\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "Epoch 3/3: 100%|██████████| 1670/1670 [04:15<00:00, 6.54it/s]" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Epoch 3/3 completed. Average loss: 0.0087\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "\n" ] } ] }, { "cell_type": "code", "source": [ "model.eval()\n", "correct = 0\n", "total = 0\n", "\n", "with torch.no_grad():\n", " for batch in tqdm(test_loader, desc='Evaluating'):\n", " input_ids, attention_mask, labels = [b.to(device) for b in batch]\n", " outputs = model(input_ids, attention_mask=attention_mask)\n", " _, predicted = torch.max(outputs.logits, 1)\n", " total += labels.size(0)\n", " correct += (predicted == labels).sum().item()\n", "\n", "accuracy = correct / total\n", "print(f'Test Accuracy: {accuracy:.2f}')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "YO88Wy9Uaicq", "outputId": "6ca5812d-fa21-47ac-f7f3-713f481c7be9" }, "execution_count": 23, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Evaluating: 100%|██████████| 418/418 [00:17<00:00, 23.91it/s]" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Test Accuracy: 0.99\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "\n" ] } ] }, { "cell_type": "code", "source": [ "def classify_tweet(text):\n", " inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=128).to(device)\n", " with torch.no_grad():\n", " outputs = model(**inputs)\n", " probabilities = torch.softmax(outputs.logits, dim=1)\n", " prediction = torch.argmax(probabilities, dim=1).item()\n", " return \"Elon Musk\" if prediction == 1 else \"Not Elon Musk\"\n", "\n", "# 使用例\n", "new_tweet = \"I'm Elon\"\n", "result = classify_tweet(new_tweet)\n", "print(f\"The tweet '{new_tweet}' is classified as: {result}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "UPWW-shsal2V", "outputId": "82ec7abb-d896-4601-8461-884a8fdb3fb9" }, "execution_count": 29, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "The tweet 'I'm Elon' is classified as: Not Elon Musk\n" ] } ] }, { "cell_type": "code", "source": [ "model.save_pretrained('/content/drive/MyDrive/EMD')" ], "metadata": { "id": "_ZjJOIj8caI2" }, "execution_count": 35, "outputs": [] }, { "cell_type": "code", "source": [ "tokenizer.save_pretrained('/content/drive/MyDrive/EMD')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "UUxoItUce2VW", "outputId": "ce7bea54-090e-4888-dbc3-df47451ab21e" }, "execution_count": 36, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "('/content/drive/MyDrive/EMD/tokenizer_config.json',\n", " '/content/drive/MyDrive/EMD/special_tokens_map.json',\n", " '/content/drive/MyDrive/EMD/vocab.txt',\n", " '/content/drive/MyDrive/EMD/added_tokens.json')" ] }, "metadata": {}, "execution_count": 36 } ] }, { "cell_type": "code", "source": [ "!git clone https://huggingface.co/kix-intl/elon-musk-detector.git" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "6jaCMh3mfLF4", "outputId": "ad1c6fa9-35dd-4ad4-e60e-d62d4833651d" }, "execution_count": 37, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Cloning into 'elon-musk-detector'...\n", "fatal: could not read Username for 'https://huggingface.co': No such device or address\n" ] } ] }, { "cell_type": "code", "source": [ "from huggingface_hub import notebook_login\n", "\n", "notebook_login()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 145, "referenced_widgets": [ "ed08792b62e14889b92ce01d10520ed4", "c0e08f3c449948e4971c9dc4934840c2", "b7ea807d74d841368a512deadbaeccb3", "632ed488a1a04fc2afe287fa5275c87a", "fe8e2d9c438d4d45bf5039db91b3bd33", "d148fb7b4d4b4571804e8e290fad547c", "141398f982974bbb85db2a555d4d007e", "8067924b93a049c3a33c2f196751d572", "b04ed66f9a4f41f3a2a07de004e8f4d9", "55e9bba010344ca4beb985df6e19fa0f", "d6b83cedf72b4b6f8064b99341f67a24", "504ebe991a2744129fe505e11eda37b4", "b558200eabf1452da063f6fd765407fb", "0ab6065d9f2b45879b71bfdd49a7b839", "048a8ad112794f628dfacaa6afc3392b", "9db360e78485441aaa8e1ded2e68dedd", "11ffd14bba034f50867e369fbf5daef1", "ff52a5a13235408a829a3d1f8774e3a6", "a40b0ea231da481099657870d5eee2c1", "3d0824795c76430285086b909b3f5338", "7beaed2d230d42e79106b3181d7774b1", "43dc5b885de04d70a6fb2ba162d1343b", "776ffb2a9e5644af8dfdea7d16f4ba2b", "f6a65b6db69246e389284d920ae95b53", "8d540e71071a48c09aa9649926409f7a", "7a70671da3b94fa2a5184a4f871ffca5", "7007fae84ff045bf907324783e43c76a", "d433c5972dd14c618cdb3c0f34389475", "f543f1f979ee4ae49ce089e338c75279", "738a1bcb6b3b41b8bc1f71733f4b791d", "eed4bf33ecb5450eb1b29b1b629b39db", "a85822ccb1c949e683e900b966025ad3" ] }, "id": "8msqvr3RfqAN", "outputId": "ac700ed0-354f-4f33-dd32-1254a372ba2f" }, "execution_count": 41, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "VBox(children=(HTML(value='
main\n" ] } ] } ] }